diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml new file mode 100644 index 0000000..7fbed63 --- /dev/null +++ b/.github/workflows/pkgdown.yml @@ -0,0 +1,48 @@ +on: + push: + branches: + - dev + - master + +name: Deploy-package-documentation + +jobs: + pkgdown: + runs-on: macos-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v1 + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Restore R package cache + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + install.packages("pkgdown", type = "binary") + shell: Rscript {0} + + - name: Install package + run: R CMD INSTALL . + + - name: Deploy package documentation + run: | + git config --local user.email "actions@github.com" + git config --local user.name "GitHub Actions" + Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE, commit_message="deploy docs with a github action")' \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 6fd5380..0156f3b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BiocProject Title: Bioconductor Management with Portable Encapsulated Project (PEP) Objects -Version: 0.2.1 +Version: 0.3.0 Authors@R: c(person("Michal", "Stolarczyk", email = "mjs5kd@virginia.edu",role = c("aut", "cre")), person("Nathan", "Sheffield", email = "nathan@code.databio.org",role = c("aut"))) Description: A Bioconductor-oriented project management class. It wraps the @@ -9,8 +9,9 @@ Description: A Bioconductor-oriented project management class. It wraps the License: BSD_2_clause + file LICENSE Encoding: UTF-8 LazyData: true -Depends: S4Vectors, pepr, methods -Suggests: testthat, yaml, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown +Depends: S4Vectors, pepr +Imports: methods, glue, RCurl, yaml, stats, pryr +Suggests: testthat, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown biocViews: DataImport, DataRepresentation RoxygenNote: 7.1.1 URL: https://github.com/pepkit/BiocProject diff --git a/NAMESPACE b/NAMESPACE index 9dc4c23..8640caf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,15 @@ # Generated by roxygen2: do not edit by hand export(.insertPEP) +export(.pyToR) export(.setShowMethod) export(.unionList) export(BiocProject) +export(gatherPipelineInterfaces) +export(getOutputsBySample) +export(getProjectOutputs) +export(pipelineInterfacesBySample) +export(readSchema) exportMethods(config) exportMethods(getProject) exportMethods(is) @@ -11,3 +17,11 @@ exportMethods(sampleTable) import(S4Vectors) import(methods) import(pepr) +importFrom(RCurl,getURLContent) +importFrom(glue,glue) +importFrom(methods,new) +importFrom(pepr,checkSection) +importFrom(pepr,config) +importFrom(pryr,partial) +importFrom(stats,setNames) +importFrom(yaml,yaml.load_file) diff --git a/R/constants.R b/R/constants.R index 50025a1..556c2fc 100644 --- a/R/constants.R +++ b/R/constants.R @@ -7,4 +7,15 @@ BIOC_SECTION = "bioconductor" FUNCTION_ARGS = "funcArgs" FUNCTION_PATH = "readFunPath" -FUNCTION_NAME = "readFunName" \ No newline at end of file +FUNCTION_NAME = "readFunName" + +# other constants +PIP_TYPE_KEY = "pipeline_type" +PIP_NAME_KEY = "pipeline_name" +OUTPUT_SCHEMA_SECTION = "output_schema" +LOOPER_SECTION = "looper" +PIP_IFACE_NAME = "pipeline_interfaces" +PIP_IFACE_KEY = "pipeline_interfaces_key" +SCHEMA_SAMPLE_OUTS = c("properties", "samples", "items", "properties") +PATH_LIKE_KEYS = c("path", "thumbnail_path") +PIP_IFACE_SECTION = c(LOOPER_SECTION, PIP_IFACE_NAME) \ No newline at end of file diff --git a/R/functions.R b/R/functions.R index 40a4716..29146b7 100644 --- a/R/functions.R +++ b/R/functions.R @@ -63,6 +63,8 @@ #' the \code{bioconductor} section in the config file. #' @param autoLoad a logical indicating whether the data should be loaded #' automatically. See \code{Details} for more information. +#' @param projectLevel logical indicating whether a only project-level pifaces +#' should be considered. Otherwise, only sample-level ones are. #' #' @return an object of \code{\link[S4Vectors]{Annotated-class}} that is #' returned by the user provided function with @@ -81,8 +83,8 @@ #' @seealso \url{https://pepkit.github.io/} #' @import pepr #' @export BiocProject -BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, - funcArgs = NULL) { +BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, + projectLevel = FALSE, funcArgs = NULL) { p = pepr::Project(file=file, amendments = amendments) # prevent PEP (Project object) input. This prevents BiocProject object # failing when the user provides the Project object @@ -96,13 +98,15 @@ BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, } } args = append(list(p), funcArgs) - cfg = pepr::config(p) + cfg = .getBiocConfig(p, projectLevel) + if(is.null(cfg)) + cfg = pepr::config(p) if(pepr::.checkSection(cfg, c(BIOC_SECTION, FUNCTION_ARGS))){ - args = .unionList(config(p)[[BIOC_SECTION]][[FUNCTION_ARGS]],args) + args = .unionList(config(p)[[BIOC_SECTION]][[FUNCTION_ARGS]], args) argsNames = names(args) project = args[[.findProjectInList(args)]] argsNames = append("",argsNames[-.findProjectInList(args)]) - args = append(list(p), args[[-.findProjectInList(args)]]) + args = append(list(p), args[-.findProjectInList(args)]) names(args) = argsNames } if (!is.null(func)) { diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R new file mode 100644 index 0000000..475b409 --- /dev/null +++ b/R/pipeline_interface.R @@ -0,0 +1,279 @@ + +#' Collect all pipeline interfaces +#' +#' Collects all relevant pipeline interfaces +#' for this \code{\link[pepr]{Project-class}} +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' @param ... other arguments +#' +#' @return a list of pipeline interface file paths. +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' gatherPipelineInterfaces(p) +#' gatherPipelineInterfaces(p, TRUE) +setGeneric("gatherPipelineInterfaces", + function(project, ...) standardGeneric("gatherPipelineInterfaces"), + signature = "project") + +#' @describeIn gatherPipelineInterfaces Collect all pipeline interfaces +#' @param projectLevel logical indicating whether a only project-level pifaces +#' should be considered. Otherwise, only sample-level ones are. +#' @importFrom stats setNames +setMethod("gatherPipelineInterfaces", + c(project = "Project"), function(project, + projectLevel = FALSE) { + if (!projectLevel) { + return(.gatherSamplePipelineInterfaces(project)) + } else { + pik = PIP_IFACE_NAME + if (!is.null(config(project)[[LOOPER_SECTION]][[PIP_IFACE_KEY]])) + pik = config(project)[[LOOPER_SECTION]][[PIP_IFACE_KEY]] + if (!is.null(config(project)[[LOOPER_SECTION]][[pik]])) + return(setNames(vapply(unlist( + config(project)[[LOOPER_SECTION]][[pik]]), + function(x) { + pepr::.makeAbsPath(x, parent=dirname(project@file)) + }, character(1)), + NULL)) + warning("No project pipeline interfaces defined") + return(invisible(NULL)) + } + }) + + +setGeneric(".gatherSamplePipelineInterfaces", + function(project) standardGeneric(".gatherSamplePipelineInterfaces"), + signature = "project") + +#' @describeIn gatherPipelineInterfaces extracts pipeline outputs +#' for a given pipeline +#' @importFrom pryr partial +setMethod(".gatherSamplePipelineInterfaces", + c(project = "Project"), function(project) { + t = pepr::sampleTable(project) + .mkAbs = pryr::partial(pepr::.makeAbsPath, parent=dirname(project@file)) + if (PIP_IFACE_NAME %in% colnames(t)) + return(setNames(vapply(unique(unlist(t[, PIP_IFACE_NAME])), + .mkAbs, character(1)), NULL)) + return(invisible(NULL)) + }) + + +#' Get pipeline interfaces by sample +#' +#' Collects all relevant pipeline interfaces for this +#' \code{\link[pepr]{Project-class}} and provides a sample to interfaces mapping +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' +#' @return a list of pipeline interface file paths keyed by sample names +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' pipelineInterfacesBySample(p) +setGeneric("pipelineInterfacesBySample", + function(project) standardGeneric("pipelineInterfacesBySample"), + signature = "project") + +#' @describeIn pipelineInterfacesBySample Get pipeline interfaces by sample +setMethod("pipelineInterfacesBySample", + c(project = "Project"), function(project) { + t = pepr::sampleTable(project) + if (PIP_IFACE_NAME %in% + colnames(t)) { + .mkAbs = pryr::partial(pepr::.makeAbsPath, + parent=dirname(project@file)) + pifaces = t[, PIP_IFACE_NAME] + names(pifaces) = unlist(t[, + "sample_name"]) + return(lapply(pifaces, .mkAbs)) + } + return(invisible(NULL)) + }) + +#' Populates and returns output files for a given sample +#' +#' Returns the sample level pipeline outputs which are defined in the output +#' schema indicated by the pipeline interface indicated in the +#' \code{\link[pepr]{Project-class}} +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' @param ... other arguments +#' +#' @return a list of output file paths. The order of the first level of the +#' list corresponds to the order of the pipeline interface files, second level +#' is a named list of file paths populated by the samples +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' getOutputsBySample(p) +#' getOutputsBySample(p, 'sample1') +setGeneric("getOutputsBySample", + function(project, ...) standardGeneric("getOutputsBySample"), + signature = "project") + +#' @describeIn getOutputsBySample Populates and returns output files +#' for a given sample +#' @param sampleNames names of the samples +#' @importFrom yaml yaml.load_file +setMethod("getOutputsBySample", + c(project = "Project"), function(project, + sampleNames = NULL) { + pifacesBySample = pipelineInterfacesBySample(project = project) + defSampleNames = names(pifacesBySample) + if (!is.null(sampleNames)) + defSampleNames = intersect(sampleNames, defSampleNames) + if (length(defSampleNames) < 1) + stop("No samples matched by: ", + paste0(sampleNames, collapse = ",")) + ret = list() + for (sampleName in defSampleNames) { + sampleRet = list() + pifaceSources = pifacesBySample[[sampleName]] + for (pifaceSource in pifaceSources) { + piface = yaml::yaml.load_file(pifaceSource) + if (!.checkPifaceType(piface, "sample")) + return(invisible(NULL)) + if(!OUTPUT_SCHEMA_SECTION %in% names(piface)) next + schema = readSchema( + piface[[OUTPUT_SCHEMA_SECTION]], dirname(pifaceSource)) + sampleRet[[piface[[PIP_NAME_KEY]]]] = .populateSchemaPaths( + schema=schema, project=project, sampleName=sampleName) + } + ret[[sampleName]] = sampleRet + } + ret + }) + +#' Populates and returns outputs for a given \code{\link[pepr]{Project-class}} +#' +#' Returns the project level pipeline outputs which are defined in the output +#' schema indicated by the pipeline interface indicated in the +#' \code{\link[pepr]{Project-class}} +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' +#' @return a list of output file paths. The order of the first level of the +#' list corresponds to the order of the pipeline interface files, second level +#' is a named list of file paths populated +#' by the \code{\link[pepr]{Project-class}} +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' getProjectOutputs(p) +setGeneric("getProjectOutputs", + function(project) standardGeneric("getProjectOutputs"), + signature = "project") + +#' @describeIn getProjectOutputs Populates and returns output files for +#' a given \code{\link[pepr]{Project-class}} +setMethod("getProjectOutputs", + c(project = "Project"), function(project) { + pifaceSources = gatherPipelineInterfaces( + project, projectLevel=TRUE) + ret = list() + for (pifaceSource in pifaceSources) { + piface = yaml::yaml.load_file(pifaceSource) + if (!.checkPifaceType(piface, "project")) + return(invisible(NULL)) + if(!OUTPUT_SCHEMA_SECTION %in% names(piface)) next + schema = readSchema( + piface[[OUTPUT_SCHEMA_SECTION]], dirname(pifaceSource)) + ret[[piface[[PIP_NAME_KEY]]]] = .populateSchemaPaths( + schema, project, NULL, projectContext=TRUE) + } + ret + }) + +#' Populate values in output schema +#' +#' Populates schema values of type path and thumbnail path in the provided +#' output schema for the selected sample or project +#' +#' @param schema schema with value templates to populate +#' @param project \code{\link[pepr]{Project-class}} object +#' @param projectContext whether the values for path templates populating +#' should be sourced from the project metadata. Otherwise metadata for +#' a selected sample is used +#' @param sampleName name of the sample to populate the outputs for. Required +#' if \code{projectContext} set to \code{FALSE} +#' +#' @return a possibly nested list of length equal to the number of results defined in +#' the schema with populated outputs +.populateSchemaPaths <- function( + schema, project, projectContext=FALSE, sampleName=NULL) { + ret = list() + if(!projectContext && is.null(sampleName)) + stop("Must specify sample to populate schema path templates for in no + project context mode") + if(projectContext) sampleName = NULL + for(i in seq_along(schema)){ + if("value" %in% names(schema[[i]])) { + if(is(schema[[i]][["value"]], "list")) { + ret[[i]] = .populateRecursively( + schema[[i]][["value"]], project, sampleName, projectContext) + } else { + ret[[i]] = schema[[i]][["value"]] + } + } + } + names(ret) = names(schema) + return(ret) +} + + +#' Recursively populate paths in results of type object +#' +#' @param l list to populate paths in +#' @param project \code{\link[pepr]{Project-class}} object +#' @param sampleName name of the sample to populate the outputs for +#' @param projectContext a logical indicating whether project level attributes should be used +#' +#' @return list with populate paths +.populateRecursively <- function(l, project, sampleName, projectContext=FALSE) { + namesL = names(l) + if(projectContext) sampleName = NULL + for(i in seq_along(l)) { + if(is(l[[i]], "list")){ + l[[i]] = .populateRecursively( + l[[i]], project, sampleName, projectContext) + } else{ + if(namesL[i] %in% PATH_LIKE_KEYS) + l[[i]] = .populateString( + string=l[[i]], project=project, sampleName=sampleName, + projectContext=projectContext) + } + } + return(l) +} + + + + + diff --git a/R/utils.R b/R/utils.R index 9be2ff8..d541014 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,4 +1,42 @@ -# internal function used for wrapping the user-supplied function meessages +#' Determine whether the string is a valid URL +#' +#' @param str string to inspect +#' +#' @return logical indicating whether a string is a valid URL +.isValidUrl = function(str) { + ans = FALSE + if (grepl("www.|http:|https:", + str)) { + ans = RCurl::url.exists(str) + } + ans +} + +#' Read a YAML-formatted schema +#' +#' Remote or local schemas are supported +#' +#' @param path path to a local schema or URL pointing to a remote one +#' @param parent a path to parent folder to use +#' @return list read schema +#' @export +#' @importFrom RCurl getURLContent +#' @examples +#' readSchema('https://schema.databio.org/pep/2.0.0.yaml') +readSchema = function(path, parent = NULL) { + if (.isValidUrl(path)) + return(yaml::yaml.load(getURLContent(path))) + file = pepr::.makeAbsPath(path, + parent) + if (file.exists(file)) { + return(yaml::read_yaml(file)) + } + stop(paste0("Schema has to be either a valid URL or an existing path. ", + "Got: ", path)) +} + + +# internal function used for wrapping the user-supplied function messages # in a box .wrapFunMessages = function(messages, type) { n = options("width")[[1]] @@ -79,10 +117,7 @@ # its index If it is not # present, returns integer(0) .findProjectInList = function(l) { - which(as.logical(lapply(l, - function(x) { - is(x, "Project") - }))) + which(as.logical(lapply(l, function(x) { is(x, "Project") }))) } # internal function that wraps the external function execution @@ -115,6 +150,70 @@ return(res) } + +#' Get the preferred source of the bioconductor section +#' +#' @param p \code{\link[pepr]{Project-class}} object +#' @param projectLevel logical indicating whether a only project-level pifaces +#' should be considered. Otherwise, only sample-level ones are. +#' +#' @return a list with the selected config +#' @importFrom pepr checkSection config +#' @importFrom methods new +.getBiocConfig = function(p, projectLevel = FALSE) { + if (checkSection(config(p), BIOC_SECTION)) { + # if the BIOC_SECTION section is found in the project + # config, override any other locations + message("The '", BIOC_SECTION, + "' key found in the Project config") + return(config(p)) + } + # check for BIOC_SECTION in pipeline interfaces + pifaceSource = gatherPipelineInterfaces(p, projectLevel=projectLevel) + if (length(pifaceSource) > 0) { + if (length(pifaceSource) > 1) + message(length(pifaceSource), " pipeline interface sources matched. ", + "Using the first one: ", pifaceSource) + pifaceSource = pifaceSource[1] + } + + if (!is.null(pifaceSource)) { + piface = yaml::read_yaml(pifaceSource) + if (pepr::.checkSection(piface, BIOC_SECTION)) { + message("The '", BIOC_SECTION, "' key found in the pipeline interface") + return(.makeReadFunPathAbs(piface, parent=dirname(pifaceSource))) + } else { + warning("The '", BIOC_SECTION, + "' key is missing in Project config and pipeline interface") + return(invisible(NULL)) + } + } else { + warning("The '", BIOC_SECTION, + "' key is missing in Project config and pipeline interface") + return(invisible(NULL)) + } +} + +#' Make readFunPath absolute +#' +#' Uses the absolute pipeline interface path in the config to determine the +#' absolute path to the readFunPath file that consists of the data +#' processing function +#' +#' @param piface \code{\link[pepr]{Config-class}}/list with a pipeline interface +#' @param parent a path to parent folder to use +#' +#' @return piface \code{\link[pepr]{Config-class}} pipeline interface with +#' the readFunPath made absolute +.makeReadFunPathAbs = function(piface, parent) { + pth = piface[[BIOC_SECTION]][[FUNCTION_PATH]] + absReadFunPath = .makeAbsPath(pth, parent) + if (!.isAbsolute(absReadFunPath)) + stop("Failed to make the readFunPath absolute: ", absReadFunPath) + piface[[BIOC_SECTION]][[FUNCTION_PATH]] = absReadFunPath + piface +} + # Create an absolute path from a primary target and a parent candidate. # # @param perhapsRelative: Path to primary target directory. @@ -248,4 +347,116 @@ selectMethod("show", "Project")(pep) }, where = parent.frame()) +} + +#' Switch from python to R list accession syntax +#' +#' Python uses a dot to access attributes, while R uses \code{$}; this function +#' converts the python style into R so that we can use R code to populate +#' variables with R lists. From this: '\code{{sample.name}}' +#' to this: '\code{{sample$name}}' +#' @param str String to recode +#' @return string with the recoded accession syntax +#' @export +#' @examples +#' .pyToR('{sample.genome}/{sample.read_type}/test') +.pyToR = function(str) { + # This is the regex where the + # magic happens + pytor = function(str) gsub("(\\{[^\\.\\}]+)\\.", + "\\1$", str) + # This loop allows multi-layer + # accession + res = str + prev = "" + while (prev != res) { + prev = res + res = pytor(res) + } + return(res) +} + +#' Populate a variable-encoded string with sample/project variables +#' +#' Given a string and a project this function will go through samples and +#' populate the variables. Used to return real files for each sample from an +#' output variable in the pipeline interface +#' +#' @param string Variable-encoded string to populate +#' @param project \code{\link[pepr]{Project-class}} object with values +#' to draw from +#' @param sampleName string, name of the sample to use +#' @param projectContext logical indicating whether project context should be +#' applied for string formatting. Default: sample +#' +#' @return a named list of populated strings +#' @importFrom glue glue +.populateString = function(string, project, sampleName = NULL, projectContext = FALSE) { + # Apply this glue function on + # each row in the samples + # table, coerced to a list + # object to allow attribute + # accession. + samplesSubset = subset(sampleTable(project), sample_name == sampleName) + if (!projectContext && NROW(samplesSubset) < 1) return(invisible(NULL)) + if (projectContext) { + populatedStrings = with( + config(project), as.character(glue(.pyToR(string)))) + } else { + populatedStrings = as.character(apply( + samplesSubset, 1, function(s) { + with(s, as.character(glue(.pyToR(string)))) + })) + } + if (!projectContext && length(populatedStrings) != + NROW(samplesSubset)) { + warning("Paths templates populating problem: number of paths (", + length(populatedStrings), + ") does not correspond to the number of samples (", + NROW(samplesSubset), + "). Path template '", + string, "' will not be populated") + return(invisible(NULL)) + } + return(populatedStrings) +} + + +#' Populate list of path templates +#' +#' @param project an object of \code{\link[pepr]{Config-class}} +#' @param templList list of strings, +#' like: 'aligned_{sample.genome}/{sample.sample_name}_sort.bam' +#' @param sampleName string, name of the protocol to select the samples +#' @param projectContext logical indicating whether project context +#' should be applied. Default: sample +#' +#' @return list of strings +.populateTemplates = function(project, templList, sampleName = NULL, + projectContext = FALSE) { + if (!projectContext && is.null(sampleName)) + stop("Must specify the sample to populate templates for") + expandedTemplList = lapply(templList, pepr::.expandPath) + x=lapply(expandedTemplList, .populateString, project, sampleName, projectContext) + return(x) +} + + +#' Validate type of the pipeline interface +#' +#' @param piface pipeline interface to inspect +#' @param type string, type of the pipeline interface, either "sample" or "project" +#' +#' @return a logical indicating whether the pipeline interface matches the specified type +.checkPifaceType <- function(piface, type) { + if(!.checkSection(piface, PIP_NAME_KEY)) + stop(PIP_NAME_KEY, " section missing in pipeline interface") + if (!pepr::.checkSection(piface, PIP_TYPE_KEY) || + piface[[PIP_TYPE_KEY]] != type) { + warning(sprintf( + "%s pipeline interface has to specify '%s' pipeline type in '%s'", + type, type, PIP_TYPE_KEY)) + return(FALSE) + } + return(TRUE) } \ No newline at end of file diff --git a/_pkgdown.yaml b/_pkgdown.yaml index e4f25fa..2a8a1d5 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -32,6 +32,7 @@ articles: - vignette2multipleArguments - vignette3simpleCache - vignette4remoteData + - vignette5piface - vignette6tximeta reference: diff --git a/inst/extdata/example_peps-master/.gitignore b/inst/extdata/example_peps-master/.gitignore new file mode 100755 index 0000000..aae0f17 --- /dev/null +++ b/inst/extdata/example_peps-master/.gitignore @@ -0,0 +1,95 @@ +# Specific ignores: + +.ipynb_checkpoints/ +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Gedit temporary files +*~ + +# Openoffice lock files +.~* + +# Rcaches +RCache/* + +# Compiled source +*.com +*.class +*.dll +*.exe +*.o +*.so + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Gedit temporary files +*~ + +# Openoffice lock files +.~* + + +# Rcaches +RCache/* + +# macOS +*.DS_Store \ No newline at end of file diff --git a/inst/extdata/example_peps-master/README.md b/inst/extdata/example_peps-master/README.md new file mode 100644 index 0000000..78520a6 --- /dev/null +++ b/inst/extdata/example_peps-master/README.md @@ -0,0 +1,25 @@ +# example_peps + +This repository contains examples of **PEPs** (Portable Encapsulated Projects). Visit the [PEP2.0.0 specification webiste](http://pep.databio.org) to learn more about the PEP standard and features. Explore the examples interactively with `Python` or `R`: + + +## Python + +Your basic python workflow uses the [`peppy`](http://github.com/pepkit/peppy) package and starts out like this: + +```python +import peppy +proj1 = peppy.Project("example_basic/project_config.yaml") +``` +More detailed Python vignettes are available as part of the [documentation for the `peppy` package](http://peppy.databio.org/en/latest/). + +## R + +Your basic `R` workflow uses the [`pepr`](http://github.com/pepkit/pepr) package and starts like this: + +```r +library('pepr') +p = pepr::Project("example_basic/project_config.yaml") +``` + +More detailed R vignettes are available as part of the [documentation for the `pepr` package](http://code.databio.org/pepr). diff --git a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R index 722888a..a57d55f 100644 --- a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R +++ b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R @@ -1,5 +1,6 @@ -readBedFiles_resize = function(project, resize.width) { +readBedFiles_resize = function(project, resize.width, test.arg) { cwd = getwd() + print(paste0("this is the test arg: ", test.arg)) paths = pepr::sampleTable(project)$file_path sampleNames = pepr::sampleTable(project)$sample_name setwd(dirname(project@file)) diff --git a/inst/extdata/example_peps-master/example_piface/annotation_sheet.csv b/inst/extdata/example_peps-master/example_piface/annotation_sheet.csv new file mode 100644 index 0000000..fdccccf --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/annotation_sheet.csv @@ -0,0 +1,4 @@ +sample_name,protocol,data_source,SRR,Sample_geo_accession,read1,read2 +sample1,PROTO1,SRA,SRR5210416,GSM2471255,SRA_1,SRA_2 +sample2,PROTO1,SRA,SRR5210450,GSM2471300,SRA_1,SRA_2 +sample3,PROTO2,SRA,SRR5210398,GSM2471249,SRA_1,SRA_2 \ No newline at end of file diff --git a/inst/extdata/example_peps-master/example_piface/output_schema_project.yaml b/inst/extdata/example_peps-master/example_piface/output_schema_project.yaml new file mode 100644 index 0000000..e619944 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/output_schema_project.yaml @@ -0,0 +1,45 @@ +output_file_in_object: + type: object + value: { + "prop2": {"path": "{attribute1}_{attribute2}_test1.pdf", "thumbnail_path": "{attribute1}_{attribute2}_test1.png", "title": "test title"}, + "prop1": {"path": "{attribute1}_{attribute2}_test1.pdf", "title": "test title"} + } + properties: + prop1: + type: file + prop2: + type: image + description: "Object output" +number_of_things: + type: integer + value: 1 + description: "Number of things" +percentage_of_things: + type: number + description: "Percentage of things" +name_of_something: + type: string + value: "my name" + description: "Name of something" +switch_value: + type: boolean + value: True + description: "Is the switch on of off" +collection_of_things: + type: array + value: [1, 2, 3] + description: "This store collection of values" +output_object_missing_value: + type: object + description: "Object output" +output_file: + type: file + description: "This a path to the output file" +output_file1: + type: file + value: {"path": "{attribute1}_{attribute2}_output_file1.pdf", "title": "test title"} + description: "This a path to the output file" +output_image: + type: image + value: {"path": "test2.pdf", "thumbnail_path": "test2.png", "title": "test title"} + description: "This a path to the output image" diff --git a/inst/extdata/example_peps-master/example_piface/output_schema_sample.yaml b/inst/extdata/example_peps-master/example_piface/output_schema_sample.yaml new file mode 100644 index 0000000..0495a86 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/output_schema_sample.yaml @@ -0,0 +1,44 @@ +output_file_in_object: + type: object + value: { + "prop2": {"path": "{protocol}_{SRR}_test1.pdf", "thumbnail_path": "{protocol}_{SRR}_test1.png", "title": "test title"}, + "prop1": {"path": "{protocol}_{SRR}_test1.pdf", "title": "test title"} + } + properties: + prop1: + type: file + prop2: + type: image + description: "Object output" +number_of_things: + type: integer + value: 1 + description: "Number of things" +percentage_of_things: + type: number + description: "Percentage of things" +name_of_something: + type: string + description: "Name of something" +switch_value: + type: boolean + value: True + description: "Is the switch on of off" +collection_of_things: + type: array + value: [1, 2, 3] + description: "This store collection of values" +output_object_missing_value: + type: object + description: "Object output" +output_file: + type: file + description: "This a path to the output file" +output_file1: + type: file + value: {"path": "{protocol}_{SRR}_output_file1.pdf", "title": "test title"} + description: "This a path to the output file" +output_image: + type: image + value: {"path": "test2.pdf", "thumbnail_path": "test2.png", "title": "test title"} + description: "This a path to the output image" diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml new file mode 100644 index 0000000..61c9317 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml @@ -0,0 +1,11 @@ +pipeline_name: PIPELINE1 +pipeline_type: project +var_templates: + path: "{looper.piface_dir}/pipelines/proj_pipeline1.py" +output_schema: output_schema_project.yaml +command_template: > + {pipeline.var_templates.path} --project-name {project.name} + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml new file mode 100644 index 0000000..aca6181 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml @@ -0,0 +1,12 @@ +pipeline_name: PIPELINE1 +pipeline_type: sample +var_templates: + path: "{looper.piface_dir}/pipelines/pipeline1.py" +input_schema: https://schema.databio.org/pep/2.0.0.yaml +output_schema: output_schema_sample.yaml +command_template: > + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml new file mode 100644 index 0000000..b3f619c --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml @@ -0,0 +1,13 @@ +pipeline_name: OTHER_PIPELINE2 +pipeline_type: project +var_templates: + path: "{looper.piface_dir}/pipelines/proj_pipeline2.py" +output_schema: output_schema_project.yaml +command_template: > + {pipeline.var_templates.path} --project-name {project.name} +compute: + size_dependent_variables: resources-project.tsv + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml new file mode 100644 index 0000000..9da96ea --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml @@ -0,0 +1,13 @@ +pipeline_name: OTHER_PIPELINE2 +pipeline_type: sample +var_templates: + path: "{looper.piface_dir}/pipelines/other_pipeline2.py" +output_schema: output_schema_sample.yaml +command_template: > + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} +compute: + size_dependent_variables: resources-sample.tsv + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/project_config.yaml b/inst/extdata/example_peps-master/example_piface/project_config.yaml new file mode 100644 index 0000000..9ffc983 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/project_config.yaml @@ -0,0 +1,19 @@ +pep_version: "2.0.0" +name: test +attribute1: value1 +attribute2: value2 + +sample_table: annotation_sheet.csv +looper: + output_dir: ../output + pipeline_interfaces: ["pipeline_interface1_project.yaml", "pipeline_interface2_project.yaml"] + +sample_modifiers: + append: + attr: "val" + pipeline_interfaces: ["pipeline_interface1_sample.yaml", "pipeline_interface2_sample.yaml"] + derive: + attributes: [read1, read2] + sources: + SRA_1: "{SRR}_1.fastq.gz" + SRA_2: "{SRR}_2.fastq.gz" diff --git a/inst/extdata/example_peps-master/example_piface/readData.R b/inst/extdata/example_peps-master/example_piface/readData.R new file mode 100644 index 0000000..bd5bb4e --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/readData.R @@ -0,0 +1,12 @@ +readData = function(project, sampleName="sample1") { + lapply(getOutputsBySample(project, sampleName), function(x) { + lapply(x, function(x1){ + lapply(x1, function(x2){ + message("Reading: ", x2) + df[[x2]] = read.table(x2, stringsAsFactors=F) + colnames(df)[1:3] = c('chr', 'start', 'end') + }) + }) + }) + GenomicRanges::GRanges(df) +} diff --git a/inst/extdata/example_peps-master/example_piface/resources-project.tsv b/inst/extdata/example_peps-master/example_piface/resources-project.tsv new file mode 100644 index 0000000..4efd0f1 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/resources-project.tsv @@ -0,0 +1,6 @@ +max_file_size cores mem time +0.05 1 12000 00-01:00:00 +0.5 1 16000 00-01:00:00 +1 1 16000 00-01:00:00 +10 1 16000 00-01:00:00 +NaN 1 32000 00-02:00:00 diff --git a/inst/extdata/example_peps-master/example_piface/resources-sample.tsv b/inst/extdata/example_peps-master/example_piface/resources-sample.tsv new file mode 100644 index 0000000..20ec284 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/resources-sample.tsv @@ -0,0 +1,7 @@ +max_file_size cores mem time +0.001 1 8000 00-04:00:00 +0.05 2 12000 00-08:00:00 +0.5 4 16000 00-12:00:00 +1 8 16000 00-24:00:00 +10 16 32000 02-00:00:00 +NaN 32 32000 04-00:00:00 diff --git a/man/BiocProject.Rd b/man/BiocProject.Rd index e81199b..9ad11a2 100644 --- a/man/BiocProject.Rd +++ b/man/BiocProject.Rd @@ -9,6 +9,7 @@ BiocProject( amendments = NULL, autoLoad = TRUE, func = NULL, + projectLevel = FALSE, funcArgs = NULL ) } @@ -26,6 +27,9 @@ it must take the \code{\link[pepr]{Project-class}} as an argument. See \code{Details} for more information} +\item{projectLevel}{logical indicating whether a only project-level pifaces +should be considered. Otherwise, only sample-level ones are.} + \item{funcArgs}{a named list with arguments you want to pass to the \code{func}. The PEP will be passed automatically, diff --git a/man/dot-checkPifaceType.Rd b/man/dot-checkPifaceType.Rd new file mode 100644 index 0000000..7c9ce5c --- /dev/null +++ b/man/dot-checkPifaceType.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.checkPifaceType} +\alias{.checkPifaceType} +\title{Validate type of the pipeline interface} +\usage{ +.checkPifaceType(piface, type) +} +\arguments{ +\item{piface}{pipeline interface to inspect} + +\item{type}{string, type of the pipeline interface, either "sample" or "project"} +} +\value{ +a logical indicating whether the pipeline interface matches the specified type +} +\description{ +Validate type of the pipeline interface +} diff --git a/man/dot-getBiocConfig.Rd b/man/dot-getBiocConfig.Rd new file mode 100644 index 0000000..23da525 --- /dev/null +++ b/man/dot-getBiocConfig.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.getBiocConfig} +\alias{.getBiocConfig} +\title{Get the preferred source of the bioconductor section} +\usage{ +.getBiocConfig(p, projectLevel = FALSE) +} +\arguments{ +\item{p}{\code{\link[pepr]{Project-class}} object} + +\item{projectLevel}{logical indicating whether a only project-level pifaces +should be considered. Otherwise, only sample-level ones are.} +} +\value{ +a list with the selected config +} +\description{ +Get the preferred source of the bioconductor section +} diff --git a/man/dot-isValidUrl.Rd b/man/dot-isValidUrl.Rd new file mode 100644 index 0000000..5416df9 --- /dev/null +++ b/man/dot-isValidUrl.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.isValidUrl} +\alias{.isValidUrl} +\title{Determine whether the string is a valid URL} +\usage{ +.isValidUrl(str) +} +\arguments{ +\item{str}{string to inspect} +} +\value{ +logical indicating whether a string is a valid URL +} +\description{ +Determine whether the string is a valid URL +} diff --git a/man/dot-makeReadFunPathAbs.Rd b/man/dot-makeReadFunPathAbs.Rd new file mode 100644 index 0000000..3760c7a --- /dev/null +++ b/man/dot-makeReadFunPathAbs.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.makeReadFunPathAbs} +\alias{.makeReadFunPathAbs} +\title{Make readFunPath absolute} +\usage{ +.makeReadFunPathAbs(piface, parent) +} +\arguments{ +\item{piface}{\code{\link[pepr]{Config-class}}/list with a pipeline interface} + +\item{parent}{a path to parent folder to use} +} +\value{ +piface \code{\link[pepr]{Config-class}} pipeline interface with +the readFunPath made absolute +} +\description{ +Uses the absolute pipeline interface path in the config to determine the +absolute path to the readFunPath file that consists of the data +processing function +} diff --git a/man/dot-populateRecursively.Rd b/man/dot-populateRecursively.Rd new file mode 100644 index 0000000..1ff55a2 --- /dev/null +++ b/man/dot-populateRecursively.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{.populateRecursively} +\alias{.populateRecursively} +\title{Recursively populate paths in results of type object} +\usage{ +.populateRecursively(l, project, sampleName, projectContext = FALSE) +} +\arguments{ +\item{l}{list to populate paths in} + +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{sampleName}{name of the sample to populate the outputs for} + +\item{projectContext}{a logical indicating whether project level attributes should be used} +} +\value{ +list with populate paths +} +\description{ +Recursively populate paths in results of type object +} diff --git a/man/dot-populateSchemaPaths.Rd b/man/dot-populateSchemaPaths.Rd new file mode 100644 index 0000000..588878f --- /dev/null +++ b/man/dot-populateSchemaPaths.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{.populateSchemaPaths} +\alias{.populateSchemaPaths} +\title{Populate values in output schema} +\usage{ +.populateSchemaPaths( + schema, + project, + projectContext = FALSE, + sampleName = NULL +) +} +\arguments{ +\item{schema}{schema with value templates to populate} + +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{projectContext}{whether the values for path templates populating +should be sourced from the project metadata. Otherwise metadata for +a selected sample is used} + +\item{sampleName}{name of the sample to populate the outputs for. Required +if \code{projectContext} set to \code{FALSE}} +} +\value{ +a possibly nested list of length equal to the number of results defined in +the schema with populated outputs +} +\description{ +Populates schema values of type path and thumbnail path in the provided +output schema for the selected sample or project +} diff --git a/man/dot-populateString.Rd b/man/dot-populateString.Rd new file mode 100644 index 0000000..fd5a387 --- /dev/null +++ b/man/dot-populateString.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.populateString} +\alias{.populateString} +\title{Populate a variable-encoded string with sample/project variables} +\usage{ +.populateString(string, project, sampleName = NULL, projectContext = FALSE) +} +\arguments{ +\item{string}{Variable-encoded string to populate} + +\item{project}{\code{\link[pepr]{Project-class}} object with values +to draw from} + +\item{sampleName}{string, name of the sample to use} + +\item{projectContext}{logical indicating whether project context should be +applied for string formatting. Default: sample} +} +\value{ +a named list of populated strings +} +\description{ +Given a string and a project this function will go through samples and +populate the variables. Used to return real files for each sample from an +output variable in the pipeline interface +} diff --git a/man/dot-populateTemplates.Rd b/man/dot-populateTemplates.Rd new file mode 100644 index 0000000..aa51a03 --- /dev/null +++ b/man/dot-populateTemplates.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.populateTemplates} +\alias{.populateTemplates} +\title{Populate list of path templates} +\usage{ +.populateTemplates( + project, + templList, + sampleName = NULL, + projectContext = FALSE +) +} +\arguments{ +\item{project}{an object of \code{\link[pepr]{Config-class}}} + +\item{templList}{list of strings, +like: 'aligned_{sample.genome}/{sample.sample_name}_sort.bam'} + +\item{sampleName}{string, name of the protocol to select the samples} + +\item{projectContext}{logical indicating whether project context +should be applied. Default: sample} +} +\value{ +list of strings +} +\description{ +Populate list of path templates +} diff --git a/man/dot-pyToR.Rd b/man/dot-pyToR.Rd new file mode 100644 index 0000000..7c37f46 --- /dev/null +++ b/man/dot-pyToR.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.pyToR} +\alias{.pyToR} +\title{Switch from python to R list accession syntax} +\usage{ +.pyToR(str) +} +\arguments{ +\item{str}{String to recode} +} +\value{ +string with the recoded accession syntax +} +\description{ +Python uses a dot to access attributes, while R uses \code{$}; this function +converts the python style into R so that we can use R code to populate +variables with R lists. From this: '\code{{sample.name}}' +to this: '\code{{sample$name}}' +} +\examples{ +.pyToR('{sample.genome}/{sample.read_type}/test') +} diff --git a/man/gatherPipelineInterfaces.Rd b/man/gatherPipelineInterfaces.Rd new file mode 100644 index 0000000..8a0c0b3 --- /dev/null +++ b/man/gatherPipelineInterfaces.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{gatherPipelineInterfaces} +\alias{gatherPipelineInterfaces} +\alias{gatherPipelineInterfaces,Project-method} +\alias{.gatherSamplePipelineInterfaces,Project-method} +\title{Collect all pipeline interfaces} +\usage{ +gatherPipelineInterfaces(project, ...) + +\S4method{gatherPipelineInterfaces}{Project}(project, projectLevel = FALSE) + +\S4method{.gatherSamplePipelineInterfaces}{Project}(project) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{...}{other arguments} + +\item{projectLevel}{logical indicating whether a only project-level pifaces +should be considered. Otherwise, only sample-level ones are.} +} +\value{ +a list of pipeline interface file paths. +} +\description{ +Collects all relevant pipeline interfaces +for this \code{\link[pepr]{Project-class}} +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Collect all pipeline interfaces + +\item \code{Project}: extracts pipeline outputs +for a given pipeline +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +gatherPipelineInterfaces(p) +gatherPipelineInterfaces(p, TRUE) +} diff --git a/man/getOutputsBySample.Rd b/man/getOutputsBySample.Rd new file mode 100644 index 0000000..81989bf --- /dev/null +++ b/man/getOutputsBySample.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{getOutputsBySample} +\alias{getOutputsBySample} +\alias{getOutputsBySample,Project-method} +\title{Populates and returns output files for a given sample} +\usage{ +getOutputsBySample(project, ...) + +\S4method{getOutputsBySample}{Project}(project, sampleNames = NULL) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{...}{other arguments} + +\item{sampleNames}{names of the samples} +} +\value{ +a list of output file paths. The order of the first level of the +list corresponds to the order of the pipeline interface files, second level +is a named list of file paths populated by the samples +} +\description{ +Returns the sample level pipeline outputs which are defined in the output +schema indicated by the pipeline interface indicated in the +\code{\link[pepr]{Project-class}} +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Populates and returns output files +for a given sample +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +getOutputsBySample(p) +getOutputsBySample(p, 'sample1') +} diff --git a/man/getProjectOutputs.Rd b/man/getProjectOutputs.Rd new file mode 100644 index 0000000..f070bb5 --- /dev/null +++ b/man/getProjectOutputs.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{getProjectOutputs} +\alias{getProjectOutputs} +\alias{getProjectOutputs,Project-method} +\title{Populates and returns outputs for a given \code{\link[pepr]{Project-class}}} +\usage{ +getProjectOutputs(project) + +\S4method{getProjectOutputs}{Project}(project) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} +} +\value{ +a list of output file paths. The order of the first level of the +list corresponds to the order of the pipeline interface files, second level +is a named list of file paths populated +by the \code{\link[pepr]{Project-class}} +} +\description{ +Returns the project level pipeline outputs which are defined in the output +schema indicated by the pipeline interface indicated in the +\code{\link[pepr]{Project-class}} +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Populates and returns output files for +a given \code{\link[pepr]{Project-class}} +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +getProjectOutputs(p) +} diff --git a/man/pipelineInterfacesBySample.Rd b/man/pipelineInterfacesBySample.Rd new file mode 100644 index 0000000..6253e8d --- /dev/null +++ b/man/pipelineInterfacesBySample.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{pipelineInterfacesBySample} +\alias{pipelineInterfacesBySample} +\alias{pipelineInterfacesBySample,Project-method} +\title{Get pipeline interfaces by sample} +\usage{ +pipelineInterfacesBySample(project) + +\S4method{pipelineInterfacesBySample}{Project}(project) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} +} +\value{ +a list of pipeline interface file paths keyed by sample names +} +\description{ +Collects all relevant pipeline interfaces for this +\code{\link[pepr]{Project-class}} and provides a sample to interfaces mapping +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Get pipeline interfaces by sample +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +pipelineInterfacesBySample(p) +} diff --git a/man/readSchema.Rd b/man/readSchema.Rd new file mode 100644 index 0000000..a69b96c --- /dev/null +++ b/man/readSchema.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{readSchema} +\alias{readSchema} +\title{Read a YAML-formatted schema} +\usage{ +readSchema(path, parent = NULL) +} +\arguments{ +\item{path}{path to a local schema or URL pointing to a remote one} + +\item{parent}{a path to parent folder to use} +} +\value{ +list read schema +} +\description{ +Remote or local schemas are supported +} +\examples{ +readSchema('https://schema.databio.org/pep/2.0.0.yaml') +} diff --git a/tests/testthat/test_Annotated.R b/tests/testthat/test_Annotated.R new file mode 100644 index 0000000..083cab8 --- /dev/null +++ b/tests/testthat/test_Annotated.R @@ -0,0 +1,69 @@ +library(yaml) +# Prep data --------------------------------------------------------------- + +branch = "master" + +configFile = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config.yaml", + package = "BiocProject" +) + +configFileArgs = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config_resize.yaml", + package = "BiocProject" +) + +configFileMissingFun = system.file( + "test_projects", + "faulty_project", + "project_config_no_function.yaml", + package = "BiocProject" +) + +configFileNoSection = system.file( + "test_projects", + "faulty_project", + "project_config_no_section.yaml", + package = "BiocProject" +) + +bp = BiocProject(configFile) + +# Test -------------------------------------------------------------------- + +context("Test Annotated methods") + +test_that("samples returns a correct object", { + expect_is(sampleTable(bp),"data.table") +}) + +test_that("config returns a correct object", { + expect_is(config(bp),"Config") +}) + +test_that(".is.project returns a correct object", { + expect_is(.is.project(bp),"logical") +}) + +test_that(".is.project returns a value", { + expect_equal(.is.project(bp),TRUE) + expect_equal(.is.project(S4Vectors::List(a=1)), FALSE) +}) + +test_that("is method returns correct value when Annotated provided", { + expect_equal(is(bp,"Project"), TRUE) +}) + +test_that("getProject returns a correct object", { + expect_is(getProject(bp),"Project") +}) + +test_that("getProject returns a correct value", { + expect_equal(getProject(bp), pepr::Project(configFile)) +}) \ No newline at end of file diff --git a/tests/testthat/test_BiocProject.R b/tests/testthat/test_BiocProject.R new file mode 100644 index 0000000..0665854 --- /dev/null +++ b/tests/testthat/test_BiocProject.R @@ -0,0 +1,127 @@ +library(yaml) +# Prep data --------------------------------------------------------------- + +branch = "master" + +configFile = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config.yaml", + package = "BiocProject" +) + +configFileArgs = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config_resize.yaml", + package = "BiocProject" +) + +configFileMissingFun = system.file( + "test_projects", + "faulty_project", + "project_config_no_function.yaml", + package = "BiocProject" +) + +configFileNoSection = system.file( + "test_projects", + "faulty_project", + "project_config_no_section.yaml", + package = "BiocProject" +) + +configPiface = system.file( + "extdata", + paste0("example_peps-",branch), + "example_piface", + "project_config.yaml", + package = "BiocProject" +) + + +bp = BiocProject(configFile) + +a=function(arg) { + stop(arg) +} + +b=function(arg) { + warning(arg) +} + +c=function(arg) { + return(arg) +} + +testChar = "a" + +# Test -------------------------------------------------------------------- + +context("Test BiocProject function") + +test_that("BiocProject function return correct object", { + expect_is(BiocProject(configFile),"Annotated") +}) + +test_that("BiocProject function works with arguments", { + expect_is(BiocProject(configFileArgs),"Annotated") + expect_is(BiocProject(configFileArgs, funcArgs = list(resize.width=200)), + "Annotated") +}) + +test_that("BiocProject function returns Annotated when provided objects of + different class and thorows a warning", { + expect_warning(expect_is(BiocProject(configFile, func = function(x){ + return("test") + }),"Annotated")) + }) + +test_that("BiocProject function returns a Project object + when autoload is set to FALSE", { + expect_is(BiocProject(file=configFile,autoLoad = FALSE),"Project") + }) + +test_that("BiocProject function throws errors/warnings + when the arguments are inappropriate", { + expect_error(BiocProject(file=configFile,func = "2")) + expect_error(BiocProject(file = "test")) + expect_error(BiocProject(file = configFile,autoLoad = "test")) + }) + +test_that("BiocProject function catches errors in the user-provided + function returns the error message as Annotated", { + expect_is(BiocProject(file=configFile,func=function(x) { + stop("test") + }),"Annotated") + }) + +test_that("BiocProject function catches errors when the function specified + does not exist", { + expect_error(BiocProject(configFileMissingFun)) + }) + +test_that("BiocProject function throws a warning and returns a Project object + when no bioconductor section found", { + expect_warning(expect_is(BiocProject(configFileNoSection),"Project")) + }) + +test_that("BiocProject function reads the bioconductor section from the + pipeline interface if not found in the project config", { + expect_true(is(BiocProject(configPiface), "Project")) + }) + +test_that("BiocProject function returns a valid object when project pipeline outputs requested", { + expect_true(is(BiocProject(configPiface, projectLevel=TRUE), "Project")) +}) + +test_that("BiocProject disregards the Project object as a user-privided argument", { + expect_warning(expect_true(is(BiocProject(configFile, funcArgs = list(p=Project(configFile))), "Project"))) +}) + +test_that("BiocProject uses the function from the environment, if possible", { + source(config(Project(configFile))$bioconductor$readFunPath) + expect_true(is(BiocProject(configFile), "Project")) +}) \ No newline at end of file diff --git a/tests/testthat/test_piface.R b/tests/testthat/test_piface.R new file mode 100644 index 0000000..3e324e2 --- /dev/null +++ b/tests/testthat/test_piface.R @@ -0,0 +1,69 @@ +library(yaml) +# Prep data --------------------------------------------------------------- + +branch = "master" + +configFile = system.file( + "extdata", + paste0("example_peps-", branch), + "example_piface", + "project_config.yaml", + package = "BiocProject" +) + +configNoPifaces = system.file( + "extdata", + paste0("example_peps-", branch), + "example_BiocProject", + "project_config.yaml", + package = "BiocProject" +) + +p = pepr::Project(configFile) +pifaces = gatherPipelineInterfaces(p) +piface = pifaces[[1]] + +pNoPifaces = pepr::Project(configNoPifaces) + +samplesTable = sampleTable(p) + +# Test -------------------------------------------------------------------- + +context("Test gatherPipelineInterfaces function") + +test_that("gatherPipelineInterfaces function returns a character", { + expect_is(gatherPipelineInterfaces(p),"character") +}) + +test_that("gatherPipelineInterfaces function returns an object of + correct length", { + expect_equal(length(gatherPipelineInterfaces(p)), 2) + }) + +test_that("gatherPipelineInterfaces returns NULL when no piface section + not found", { + expect_null(gatherPipelineInterfaces(pNoPifaces)) + }) + +test_that("gatherPipelineInterfaces works for project with no pipeline interfaces defined", { + expect_warning(expect_null(gatherPipelineInterfaces(pNoPifaces, projectLevel=TRUE))) +}) + +context("Test output getters") + +test_that("getProjectOutputs function returns a list", { + expect_is(getProjectOutputs(p), "list") +}) + +test_that("getOutputsBySample function returns a list", { + expect_is(getOutputsBySample(p), "list") +}) + +test_that("getOutputsBySample function allows for specific sample selection", { + expect_false(length(getOutputsBySample(p)) == + length(getOutputsBySample(p, sampleNames="sample1"))) +}) + +test_that("getOutputsBySample errors when non-existent sample selected", { + expect_error(getOutputsBySample(p, samleNames="bogusSample")) +}) \ No newline at end of file diff --git a/tests/testthat/test_all.R b/tests/testthat/test_utils.R similarity index 50% rename from tests/testthat/test_all.R rename to tests/testthat/test_utils.R index 6923546..4e129ac 100644 --- a/tests/testthat/test_all.R +++ b/tests/testthat/test_utils.R @@ -1,9 +1,11 @@ library(yaml) # Prep data --------------------------------------------------------------- +branch = "master" + configFile = system.file( "extdata", - "example_peps-master", + paste0("example_peps-",branch), "example_BiocProject", "project_config.yaml", package = "BiocProject" @@ -11,7 +13,7 @@ configFile = system.file( configFileArgs = system.file( "extdata", - "example_peps-master", + paste0("example_peps-",branch), "example_BiocProject", "project_config_resize.yaml", package = "BiocProject" @@ -31,7 +33,6 @@ configFileNoSection = system.file( package = "BiocProject" ) - bp = BiocProject(configFile) a=function(arg) { @@ -49,6 +50,7 @@ c=function(arg) { testChar = "a" # Test -------------------------------------------------------------------- + context("Test .unionList utility function") test_that(".unionList returns correct object type", { @@ -64,6 +66,7 @@ test_that(".unionList returns list of correct length", { test_that(".unionList throws errors", { expect_error(.unionList(list(a=1),2)) }) + context("Test .makeAbsPath utility function") test_that(".makeAbsPath returns correct object", { @@ -101,7 +104,6 @@ context("Test .callBiocFun untility function") test_that(".callBiocFun catches errors", { expect_error(expect_error(.callBiocFun(a,list(testChar)))) - expect_equal(.callBiocFun(a,list(testChar)),S4Vectors::List(testChar)) expect_warning(.callBiocFun(b,list(testChar))) }) @@ -129,90 +131,4 @@ test_that(".insertPEP returns correct object",{ test_that(".insertPEP throws errors",{ expect_error(.insertPEP(S4Vectors::List(),"test")) -}) - -context("Test BiocProject function") - -test_that("BiocProject function return correct object", { - expect_is(BiocProject(configFile),"Annotated") -}) - -test_that("BiocProject function works with arguments", { - expect_is(BiocProject(configFileArgs),"Annotated") - expect_is(BiocProject(configFileArgs, funcArgs = list(resize.width=200)), "Annotated") -}) - -# test_that("BiocProject function overrides the arguments specified in the config -# file with ones that have the same names in the funcArgs list", { -# expect_failure(expect_identical( -# BiocProject(configFileArgs), -# BiocProject(configFileArgs, funcArgs = list(resize.width = 200)) -# )) -# }) - -test_that("BiocProject function returns Annotated when provided objects of - different class and thorows a warning", { - expect_warning(expect_is(BiocProject(configFile, func = function(x){ - return("test") - }),"Annotated")) - }) - -test_that("BiocProject function returns a Project object - when autoload is set to FALSE", { - expect_is(BiocProject(file=configFile,autoLoad = FALSE),"Project") - }) - -test_that("BiocProject function throws errors/warnings - when the arguments are inappropriate", { - expect_error(BiocProject(file=configFile,func = "2")) - expect_error(BiocProject(file = "test")) - expect_error(BiocProject(file = configFile,autoLoad = "test")) - }) - -test_that("BiocProject function catches errors in the user-provided - function returns the error message as Annotated", { - expect_is(BiocProject(file=configFile,func=function(x) { - stop("test") - }),"Annotated") - }) - -test_that("BiocProject function catches errors when the function specified - does not exist", { - expect_error(BiocProject(configFileMissingFun)) - }) - -test_that("BiocProject function throws a warning and returns a Project object - when no bioconductor section found",{ - expect_warning(expect_is(BiocProject(configFileNoSection),"Project")) - }) - -context("Test Annotated methods") - -test_that("samples returns a correct object", { - expect_is(sampleTable(bp),"data.table") -}) - -test_that("config returns a correct object", { - expect_is(config(bp),"Config") -}) - -test_that(".is.project returns a correct object", { - expect_is(.is.project(bp),"logical") -}) - -test_that(".is.project returns a value", { - expect_equal(.is.project(bp),TRUE) - expect_equal(.is.project(S4Vectors::List(a=1)), FALSE) -}) - -test_that("is method returns correct value when Annotated provided", { - expect_equal(is(bp,"Project"), TRUE) -}) - -test_that("getProject returns a correct object", { - expect_is(getProject(bp),"Project") -}) - -test_that("getProject returns a correct value", { - expect_equal(getProject(bp), pepr::Project(configFile)) }) \ No newline at end of file diff --git a/update_examples.sh b/update_examples.sh index 3f7f85b..20ff490 100755 --- a/update_examples.sh +++ b/update_examples.sh @@ -1,5 +1,16 @@ -wget https://github.com/pepkit/example_peps/archive/master.zip -unzip master.zip -rm -rf inst/extdata/example_peps-master -mv example_peps-master inst/extdata -rm master.zip \ No newline at end of file +#!/bin/bash + +if [ $# -ne 1 ]; then + echo $0: usage: update_test_data.sh branch + exit 1 +fi + +branch=$1 + +wget https://github.com/pepkit/example_peps/archive/${branch}.zip +unzip ${branch}.zip +rm -rf inst/extdata/example_peps-${branch} +mv example_peps-${branch} inst/extdata +rm ${branch}.zip +cd inst/extdata/example_peps-${branch}/ +find . -type d -maxdepth 1 ! -name 'example_BiocProjec*' ! -name 'example_piface' -exec rm -r {} \; \ No newline at end of file diff --git a/vignettes/vignette5piface.Rmd b/vignettes/vignette5piface.Rmd new file mode 100644 index 0000000..ef0f493 --- /dev/null +++ b/vignettes/vignette5piface.Rmd @@ -0,0 +1,128 @@ +--- +title: "Using a pipeline interface in your project" +author: "MichaƂ Stolarczyk" +date: "`r Sys.Date()`" +output: BiocStyle::html_document +vignette: > + %\VignetteIndexEntry{Using a pipeline interface in your project} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Introduction + +Pipeline interface tells the pipeline submission engine (such as [`looper`](http://looper.databio.org/en/latest/)) how to interact with your +project and pipelines. In brief, it is just a `yaml` file with three required +sections: + +* `pipeline_name` - A string identifying the pipeline, +* `pipeline_type` - A string indicating a pipeline type: "sample" (for run) +or "project" (for runp), +* `command_template`- A [Jinja2](https://jinja.palletsprojects.com/en/2.11.x/) template used to construct a pipeline command to run. + +Follow the pipeline interface [specification](http://looper.databio.org/en/latest/pipeline-interface-specification/) +to learn more about all the features that `looper` provides via that file. + +# Relevant features + +The only two sections of the pipeline interface relevant from the BiocProject perspective are: + +1. `bioconductor` -- used to specify the data processing function name and/or path +2. `output_schema` -- used to specify a path to a pipeline output schema that describes the outputs of the pipeline + + +Let's consider the examples below that illustrate the pipeline interface-related +functionality of `BiocProject` package. + +## Specify `bioconductor` section in the pipeline interface + +The first advantage of pipeline interface concept is the possibility to declare +the data processing function in the pipeline interface itself. Since the data +processing function is pipeline specific rather than project specific, it is +much more sensible to place the `bioconductor` section in the pipeline +interface file -- the code that will be used to preprocess the output of the pipeline depends of the pipeline (defined by pipeline interface), not the data (defined by the PEP). + +```{r echo=F,message=FALSE, collapse=TRUE, comment=" "} +library(BiocProject) +branch = "master" +configFile = system.file( + "extdata", + paste0("example_peps-", branch), + "example_piface", + "project_config.yaml", + package = "BiocProject" +) +p=pepr::Project(file = configFile) +.printNestedList(yaml::read_yaml(pipelineInterfacesBySample(p)[[1]][1])) +``` + +## Get output file paths + +Pipeline outputs can be defined in a schema. As shown in the example above, +pipeline interface specifies a path to a schema in a top-level `output_schema` +section. +Example of a schema defining pipeline outputs: +```{r echo=F,message=FALSE, collapse=TRUE, comment=" "} +pifaceSource = pipelineInterfacesBySample(p)[[1]][1] +piface = yaml::read_yaml(pifaceSource) +schemaPath = file.path(dirname(pifaceSource), piface$output_schema) +.printNestedList(yaml::read_yaml(schemaPath)) +``` + +The output schema has to follow the [pipestat schema specification](http://pipestat.databio.org/en/latest/pipestat_specification/#pipestat-schema). Additionally, if the schema lists any path-like outputs, i.e. type `file` or `image` it is beneficial to add a `values` section that include path templates to populate. + +Check out the [looper documentation on output schema](http://looper.databio.org/en/latest/pipeline-interface-specification/#output_schema) to learn more. + +### Sample-level + +Pipeline interface system divides pipelines (and their outputs) into project- +and sample-level. + +In order to list the outputs for a sample, or all the samples use +`getOutputsBySample` method. If you indicate a specific sample name, only outputs for this sample will be returned. +```{r} +getOutputsBySample(p, sampleNames=c("sample1")) +``` + +### Project-level + +In order to list project-level outputs, use `getProjectOutputs` method: + +```{r} +getProjectOutputs(p) +``` + +# Use case + +This functionality provides a convenient way to process the files produced by +the pipeline, when used in the data processing function indicated in the +`bioconductor` section of the pipeline interface file. See the example function +below that demonstrates the application of the `getSampleOutputs` function. + +```{r echo=FALSE, eval=TRUE, comment=""} +branch = "master" +processFunction = system.file( + "extdata", + paste0("example_peps-", branch), + "example_piface", + "readData.R", + package = "BiocProject" +) +source(processFunction) +piface = yaml::read_yaml(pipelineInterfacesBySample(p)[[1]][1]) +get(piface$bioconductor$readFunName) +``` +Such a link between the project and the outputs +(declared in the pipeline interface) makes it possible to read and process +the pipeline results with just a line of code: + +```r +bp = BiocProject(configFile) +``` \ No newline at end of file diff --git a/vignettes/vignette6tximeta.Rmd b/vignettes/vignette6tximeta.Rmd index 8d6726e..51a7890 100644 --- a/vignettes/vignette6tximeta.Rmd +++ b/vignettes/vignette6tximeta.Rmd @@ -20,7 +20,7 @@ This vignette demonstrates how to integrate BiocProject with the [tximeta Biocon Tximeta is a package that imports transcript quantification files from the [salmon](https://salmon.readthedocs.io/en/latest/salmon.html) transcript quantifier. When importing, tximeta automatically annotates the data with the transcriptome used. How it works is that `salmon` records a unique identifier of the transcriptome it uses during quantification; then, tximeta reads this identifier and looks up metadata about those sequences using a local database of known transcriptome identifiers. For more details, refer to the [tximeta GitHub repository](https://github.com/mikelove/tximeta) or [publication in PLoS Computational Biology](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1007664). -The `tximeta::tximeta` function takes as input a `data.frame` (`coldata`) object that, for Salmon results, points to a quantification results directory for each sample. The `tximeta` function reads the `*.sa` files and returns a single `SummarizedExperiment` object with the Salmon-generated metadata in the object `metadata` slot. +The `tximeta::tximeta` function takes as input a `data.frame` (`coldata`) object that, for Salmon results, points to a quantification results directory for each sample. The `tximeta` function reads the `*.sf` files and returns a single `SummarizedExperiment` object with the various elements of metadata in the object's `rowRanges` and `metadata` slots. In addition, the `SummarizedExperiment` has pointers to local cached databases for subsequent annotation tasks. Since `SummarizedExperiment` inherits from the Bioconductor `Annotated` class, it fits perfectly into `BiocProject` output object class requirements. @@ -38,7 +38,7 @@ is(SummarizedExperiment(), "Annotated") ## Advantages of using BiocProject with tximeta -If we add BiocProject in to the tximeta workflow, then sample metadata from the PEP project specification can be easily plugged in! For example, if a researcher used a PEP to run Salmon to quantify reads across multiple samples with PEP-compatible workflow management engine/job scatterer like [Snakemake](https://snakemake.github.io/), [CWL](https://www.commonwl.org/), or [looper](https://looper.databio.org/), the same PEP would be ready to use with tximeta as long as the samples had `files` attribute defined. This could be done either via a `files` column in the sample table, or by using one of the sample modifiers provided by the PEP framework. The advantages of calling `tximport` within `BiocProject` include: +If we add BiocProject in to the tximeta workflow, then sample metadata from the PEP project specification can be easily plugged in! For example, if a researcher used a PEP to run Salmon to quantify reads across multiple samples with PEP-compatible workflow management engine/job scatterer like [Snakemake](https://snakemake.github.io/), [CWL](https://www.commonwl.org/), or [looper](https://looper.databio.org/), the same PEP would be ready to use with tximeta as long as the samples had the `files` attribute defined. This could be done either via a `files` column in the sample table, or by using one of the sample modifiers provided by the PEP framework. The advantages of calling `tximeta` within `BiocProject` include: - project portability, inherent to projects following PEP specification - single source of metadata from start of the analysis to finish -- all the PEP-defined metadata will be propagated to the output object of the `tximeta` function automatically. It will be accessible from within your R session using the [pepr](http://code.databio.org/pepr/) API, or with `@PEP` in the `metadata` slot of the `SummarizedExperiment` object, just as any other metadata attached to the result by `tximeta` function. @@ -85,7 +85,7 @@ The `Biocproject` + `tximeta` workflow requires a PEP. The example we just downl readFunPath: readTximeta.R ``` -As you can see, this PEP configuration file uses a `$TXIMPORTDATA` environment variable to specify a file path. This is just an optional way to make this PEP work in any computing environment without being changed, so you can share your sample metadata more easily. For this vignette, we need to set the variable to the output directory where our downloaded results are stored: +As you can see, this PEP configuration file uses a `$TXIMPORTDATA` environment variable to specify a file path. This is just an optional way to make this PEP work in any computing environment without being changed, so you can share your sample metadata more easily. (`tximportData` is a Bioconductor data package with various transcript quantification output, for unit testing of Bioconductor software packages.) For this vignette, we need to set the variable to the output directory where our downloaded results are stored: ```r @@ -306,4 +306,4 @@ config(bp) # Conclusion -If you format your project metadata according to the PEP specification, it will be ready to use with tximeta and the resulting object will include project-wide metadata and expose [pepr](http://code.databio.org/pepr/) API for any PEP-compatible R packages for downstream analysis. +If you format your project metadata according to the PEP specification, it will be ready to use with tximeta and the resulting object will include project-wide metadata and expose [pepr](http://code.databio.org/pepr/) API for any PEP-compatible R packages for downstream analysis. \ No newline at end of file