From 016eef8c6b54bd146f47e7e70680b6a722abb4ef Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 22 Oct 2020 11:28:47 -0400 Subject: [PATCH 01/24] add Project method for piface path determination --- NAMESPACE | 5 ++ R/constants.R | 12 +++- R/pipeline_interface.R | 67 +++++++++++++++++++ R/utils.R | 40 ++++++++++- .../example_piface/annotation_sheet.csv | 4 ++ .../example_piface/output_schema.yaml | 27 ++++++++ .../pipeline_interface1_project.yaml | 10 +++ .../pipeline_interface1_sample.yaml | 11 +++ .../pipeline_interface2_project.yaml | 12 ++++ .../pipeline_interface2_sample.yaml | 12 ++++ .../example_piface/project_config.yaml | 17 +++++ .../example_piface/readData.R | 10 +++ .../example_piface/resources-project.tsv | 6 ++ .../example_piface/resources-sample.tsv | 7 ++ man/dot-isValidUrl.Rd | 17 +++++ man/gatherPipelineInterfaces.Rd | 47 +++++++++++++ man/readSchema.Rd | 22 ++++++ 17 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 R/pipeline_interface.R create mode 100644 inst/extdata/example_peps-master/example_piface/annotation_sheet.csv create mode 100644 inst/extdata/example_peps-master/example_piface/output_schema.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/project_config.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/readData.R create mode 100644 inst/extdata/example_peps-master/example_piface/resources-project.tsv create mode 100644 inst/extdata/example_peps-master/example_piface/resources-sample.tsv create mode 100644 man/dot-isValidUrl.Rd create mode 100644 man/gatherPipelineInterfaces.Rd create mode 100644 man/readSchema.Rd diff --git a/NAMESPACE b/NAMESPACE index 9dc4c23..768fa7b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,8 @@ export(.insertPEP) export(.setShowMethod) export(.unionList) export(BiocProject) +export(gatherPipelineInterfaces) +export(readSchema) exportMethods(config) exportMethods(getProject) exportMethods(is) @@ -11,3 +13,6 @@ exportMethods(sampleTable) import(S4Vectors) import(methods) import(pepr) +importFrom(RCurl,getURLContent) +importFrom(pryr,partial) +importFrom(stats,setNames) diff --git a/R/constants.R b/R/constants.R index 50025a1..c4341af 100644 --- a/R/constants.R +++ b/R/constants.R @@ -7,4 +7,14 @@ BIOC_SECTION = "bioconductor" FUNCTION_ARGS = "funcArgs" FUNCTION_PATH = "readFunPath" -FUNCTION_NAME = "readFunName" \ No newline at end of file +FUNCTION_NAME = "readFunName" + +# other constants +PIP_TYPE_KEY = "pipeline_type" +PIP_NAME_KEY = "pipeline_name" +OUTPUT_SCHEMA_SECTION = "output_schema" +LOOPER_SECTION = "looper" +PIP_IFACE_NAME = "pipeline_interfaces" +PIP_IFACE_KEY = "pipeline_interfaces_key" +SCHEMA_SAMPLE_OUTS = c("properties", "samples", "items", "properties") +PIP_IFACE_SECTION = c(LOOPER_SECTION, PIP_IFACE_NAME) \ No newline at end of file diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R new file mode 100644 index 0000000..8104463 --- /dev/null +++ b/R/pipeline_interface.R @@ -0,0 +1,67 @@ + +#' Collect all pipeline interfaces +#' +#' Collects all relevant pipeline interfaces +#' for this \code{\link[pepr]{Project-class}} +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' @param ... other arguments +#' +#' @return a list of pipeline interface file paths. +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' gatherPipelineInterfaces(p) +#' gatherPipelineInterfaces(p, TRUE) +setGeneric("gatherPipelineInterfaces", + function(project, ...) standardGeneric("gatherPipelineInterfaces"), + signature = "project") + +#' @describeIn gatherPipelineInterfaces Collect all pipeline interfaces +#' @param projectLevel logical indicating whether a only project-level pifaces +#' should be considered. Otherwise, only sample-level ones are. +#' @importFrom stats setNames +setMethod("gatherPipelineInterfaces", + c(project = "Project"), function(project, + projectLevel = FALSE) { + if (!projectLevel) { + return(.gatherSamplePipelineInterfaces(project)) + } else { + pik = PIP_IFACE_NAME + if (!is.null(config(project)[[LOOPER_SECTION]][[PIP_IFACE_KEY]])) + pik = config(project)[[LOOPER_SECTION]][[PIP_IFACE_KEY]] + if (!is.null(config(project)[[LOOPER_SECTION]][[pik]])) + return(setNames(vapply(unlist( + config(project)[[LOOPER_SECTION]][[pik]]), + function(x) { + pepr::.makeAbsPath(x, parent=dirname(project@file)) + }, character(1)), + NULL)) + warning("No project pipeline interfaces defined") + return(invisible(NULL)) + } + }) + + +setGeneric(".gatherSamplePipelineInterfaces", + function(project) standardGeneric(".gatherSamplePipelineInterfaces"), + signature = "project") + +#' @describeIn gatherPipelineInterfaces extracts pipeline outputs +#' for a given pipeline +#' @importFrom pryr partial +setMethod(".gatherSamplePipelineInterfaces", + c(project = "Project"), function(project) { + t = pepr::sampleTable(project) + .mkAbs = pryr::partial(pepr::.makeAbsPath, parent=dirname(project@file)) + if (PIP_IFACE_NAME %in% colnames(t)) + return(setNames(vapply(unique(unlist(t[, PIP_IFACE_NAME])), + .mkAbs, character(1)), NULL)) + return(invisible(NULL)) + }) diff --git a/R/utils.R b/R/utils.R index 9be2ff8..e7eea85 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,4 +1,42 @@ -# internal function used for wrapping the user-supplied function meessages +#' Determine whether the string is a valid URL +#' +#' @param str string to inspect +#' +#' @return logical indicating whether a string is a valid URL +.isValidUrl = function(str) { + ans = FALSE + if (grepl("www.|http:|https:", + str)) { + ans = RCurl::url.exists(str) + } + ans +} + +#' Read a YAML-formatted schema +#' +#' Remote or local schemas are supported +#' +#' @param path path to a local schema or URL pointing to a remote one +#' @param parent a path to parent folder to use +#' @return list read schema +#' @export +#' @importFrom RCurl getURLContent +#' @examples +#' readSchema('https://schema.databio.org/pep/2.0.0.yaml') +readSchema = function(path, parent = NULL) { + if (.isValidUrl(path)) + return(yaml::yaml.load(getURLContent(path))) + file = pepr::.makeAbsPath(path, + parent) + if (file.exists(file)) { + return(yaml::read_yaml(file)) + } + stop(paste0("Schema has to be either a valid URL or an existing path. ", + "Got: ", path)) +} + + +# internal function used for wrapping the user-supplied function messages # in a box .wrapFunMessages = function(messages, type) { n = options("width")[[1]] diff --git a/inst/extdata/example_peps-master/example_piface/annotation_sheet.csv b/inst/extdata/example_peps-master/example_piface/annotation_sheet.csv new file mode 100644 index 0000000..fdccccf --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/annotation_sheet.csv @@ -0,0 +1,4 @@ +sample_name,protocol,data_source,SRR,Sample_geo_accession,read1,read2 +sample1,PROTO1,SRA,SRR5210416,GSM2471255,SRA_1,SRA_2 +sample2,PROTO1,SRA,SRR5210450,GSM2471300,SRA_1,SRA_2 +sample3,PROTO2,SRA,SRR5210398,GSM2471249,SRA_1,SRA_2 \ No newline at end of file diff --git a/inst/extdata/example_peps-master/example_piface/output_schema.yaml b/inst/extdata/example_peps-master/example_piface/output_schema.yaml new file mode 100644 index 0000000..8f3bde0 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/output_schema.yaml @@ -0,0 +1,27 @@ +description: Sample objects produced by test pipeline. +properties: + samples: + type: array + items: + type: object + properties: + test_property: + type: string + description: "Test sample property" + path: "~/sample/{sample_name}_file.txt" + test_property1: + type: string + description: "Test sample property" + path: "~/sample/{sample_name}_file1.txt" + test_property: + type: image + title: "Test title" + description: "Test project property" + thumbnail_path: "~/test_{name}.png" + path: "~/test_{name}.pdf" + test_property1: + type: image + title: "Test title1" + description: "Test project property1" + thumbnail_path: "~/test_{name}.png" + path: "~/test_{name}1.pdf" diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml new file mode 100644 index 0000000..cd9903c --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml @@ -0,0 +1,10 @@ +pipeline_name: PIPELINE1 +pipeline_type: project +path: pipelines/col_pipeline1.py +output_schema: output_schema.yaml +command_template: > + {pipeline.path} --project-name {project.name} + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml new file mode 100644 index 0000000..e0d766d --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml @@ -0,0 +1,11 @@ +pipeline_name: PIPELINE1 +pipeline_type: sample +path: pipelines/pipeline1.py +input_schema: https://schema.databio.org/pep/2.0.0.yaml +output_schema: output_schema.yaml +command_template: > + {pipeline.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml new file mode 100644 index 0000000..b77a621 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml @@ -0,0 +1,12 @@ +pipeline_name: OTHER_PIPELINE2 +pipeline_type: project +path: pipelines/col_pipeline2.py +output_schema: output_schema.yaml +command_template: > + {pipeline.path} --project-name {project.name} +compute: + size_dependent_variables: resources-project.tsv + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml new file mode 100644 index 0000000..93186f6 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml @@ -0,0 +1,12 @@ +pipeline_name: OTHER_PIPELINE2 +pipeline_type: sample +path: pipelines/other_pipeline2.py +output_schema: output_schema.yaml +command_template: > + {pipeline.path} --sample-name {sample.sample_name} --req-attr {sample.attr} +compute: + size_dependent_variables: resources-sample.tsv + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/inst/extdata/example_peps-master/example_piface/project_config.yaml b/inst/extdata/example_peps-master/example_piface/project_config.yaml new file mode 100644 index 0000000..a47098a --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/project_config.yaml @@ -0,0 +1,17 @@ +pep_version: "2.0.0" +name: test + +sample_table: annotation_sheet.csv +looper: + output_dir: ../output + pipeline_interfaces: ["pipeline_interface1_project.yaml", "pipeline_interface2_project.yaml"] + +sample_modifiers: + append: + attr: "val" + pipeline_interfaces: ["pipeline_interface1_sample.yaml", "pipeline_interface2_sample.yaml"] + derive: + attributes: [read1, read2] + sources: + SRA_1: "{SRR}_1.fastq.gz" + SRA_2: "{SRR}_2.fastq.gz" diff --git a/inst/extdata/example_peps-master/example_piface/readData.R b/inst/extdata/example_peps-master/example_piface/readData.R new file mode 100644 index 0000000..89557a1 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/readData.R @@ -0,0 +1,10 @@ +readData = function(project, sampleName="sample1") { + lapply(getOutputsBySample(project, sampleName), function(x) { + lapply(x, function(x1){ + message("Reading: ", basename(x1)) + df = read.table(x1, stringsAsFactors=F) + colnames(df)[1:3] = c('chr', 'start', 'end') + GenomicRanges::GRanges(df) + }) + }) +} diff --git a/inst/extdata/example_peps-master/example_piface/resources-project.tsv b/inst/extdata/example_peps-master/example_piface/resources-project.tsv new file mode 100644 index 0000000..4efd0f1 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/resources-project.tsv @@ -0,0 +1,6 @@ +max_file_size cores mem time +0.05 1 12000 00-01:00:00 +0.5 1 16000 00-01:00:00 +1 1 16000 00-01:00:00 +10 1 16000 00-01:00:00 +NaN 1 32000 00-02:00:00 diff --git a/inst/extdata/example_peps-master/example_piface/resources-sample.tsv b/inst/extdata/example_peps-master/example_piface/resources-sample.tsv new file mode 100644 index 0000000..20ec284 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/resources-sample.tsv @@ -0,0 +1,7 @@ +max_file_size cores mem time +0.001 1 8000 00-04:00:00 +0.05 2 12000 00-08:00:00 +0.5 4 16000 00-12:00:00 +1 8 16000 00-24:00:00 +10 16 32000 02-00:00:00 +NaN 32 32000 04-00:00:00 diff --git a/man/dot-isValidUrl.Rd b/man/dot-isValidUrl.Rd new file mode 100644 index 0000000..5416df9 --- /dev/null +++ b/man/dot-isValidUrl.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.isValidUrl} +\alias{.isValidUrl} +\title{Determine whether the string is a valid URL} +\usage{ +.isValidUrl(str) +} +\arguments{ +\item{str}{string to inspect} +} +\value{ +logical indicating whether a string is a valid URL +} +\description{ +Determine whether the string is a valid URL +} diff --git a/man/gatherPipelineInterfaces.Rd b/man/gatherPipelineInterfaces.Rd new file mode 100644 index 0000000..8a0c0b3 --- /dev/null +++ b/man/gatherPipelineInterfaces.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{gatherPipelineInterfaces} +\alias{gatherPipelineInterfaces} +\alias{gatherPipelineInterfaces,Project-method} +\alias{.gatherSamplePipelineInterfaces,Project-method} +\title{Collect all pipeline interfaces} +\usage{ +gatherPipelineInterfaces(project, ...) + +\S4method{gatherPipelineInterfaces}{Project}(project, projectLevel = FALSE) + +\S4method{.gatherSamplePipelineInterfaces}{Project}(project) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{...}{other arguments} + +\item{projectLevel}{logical indicating whether a only project-level pifaces +should be considered. Otherwise, only sample-level ones are.} +} +\value{ +a list of pipeline interface file paths. +} +\description{ +Collects all relevant pipeline interfaces +for this \code{\link[pepr]{Project-class}} +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Collect all pipeline interfaces + +\item \code{Project}: extracts pipeline outputs +for a given pipeline +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +gatherPipelineInterfaces(p) +gatherPipelineInterfaces(p, TRUE) +} diff --git a/man/readSchema.Rd b/man/readSchema.Rd new file mode 100644 index 0000000..a69b96c --- /dev/null +++ b/man/readSchema.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{readSchema} +\alias{readSchema} +\title{Read a YAML-formatted schema} +\usage{ +readSchema(path, parent = NULL) +} +\arguments{ +\item{path}{path to a local schema or URL pointing to a remote one} + +\item{parent}{a path to parent folder to use} +} +\value{ +list read schema +} +\description{ +Remote or local schemas are supported +} +\examples{ +readSchema('https://schema.databio.org/pep/2.0.0.yaml') +} From e99bfad9203e071649e8a269ab7f502234248a62 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 22 Oct 2020 13:04:48 -0400 Subject: [PATCH 02/24] add pipeline outputs retrieval --- NAMESPACE | 6 + R/pipeline_interface.R | 188 ++++++++++++++++++++++++++++++ R/utils.R | 113 ++++++++++++++++++ man/dot-checkPifaceType.Rd | 19 +++ man/dot-getOutputs.Rd | 23 ++++ man/dot-populateString.Rd | 27 +++++ man/dot-populateTemplates.Rd | 30 +++++ man/dot-pyToR.Rd | 23 ++++ man/getOutputsBySample.Rd | 43 +++++++ man/getProjectOutputs.Rd | 40 +++++++ man/pipelineInterfacesBySample.Rd | 35 ++++++ 11 files changed, 547 insertions(+) create mode 100644 man/dot-checkPifaceType.Rd create mode 100644 man/dot-getOutputs.Rd create mode 100644 man/dot-populateString.Rd create mode 100644 man/dot-populateTemplates.Rd create mode 100644 man/dot-pyToR.Rd create mode 100644 man/getOutputsBySample.Rd create mode 100644 man/getProjectOutputs.Rd create mode 100644 man/pipelineInterfacesBySample.Rd diff --git a/NAMESPACE b/NAMESPACE index 768fa7b..7930ee3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,14 @@ # Generated by roxygen2: do not edit by hand export(.insertPEP) +export(.pyToR) export(.setShowMethod) export(.unionList) export(BiocProject) export(gatherPipelineInterfaces) +export(getOutputsBySample) +export(getProjectOutputs) +export(pipelineInterfacesBySample) export(readSchema) exportMethods(config) exportMethods(getProject) @@ -14,5 +18,7 @@ import(S4Vectors) import(methods) import(pepr) importFrom(RCurl,getURLContent) +importFrom(glue,glue) importFrom(pryr,partial) importFrom(stats,setNames) +importFrom(yaml,yaml.load_file) diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index 8104463..ec9a20b 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -65,3 +65,191 @@ setMethod(".gatherSamplePipelineInterfaces", .mkAbs, character(1)), NULL)) return(invisible(NULL)) }) + + +#' Get pipeline interfaces by sample +#' +#' Collects all relevant pipeline interfaces for this +#' \code{\link[pepr]{Project-class}} and provides a sample to interfaces mapping +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' +#' @return a list of pipeline interface file paths keyed by sample names +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' pipelineInterfacesBySample(p) +setGeneric("pipelineInterfacesBySample", + function(project) standardGeneric("pipelineInterfacesBySample"), + signature = "project") + +#' @describeIn pipelineInterfacesBySample Get pipeline interfaces by sample +setMethod("pipelineInterfacesBySample", + c(project = "Project"), function(project) { + t = pepr::sampleTable(project) + if (PIP_IFACE_NAME %in% + colnames(t)) { + .mkAbs = pryr::partial(pepr::.makeAbsPath, + parent=dirname(project@file)) + pifaces = t[, PIP_IFACE_NAME] + names(pifaces) = unlist(t[, + "sample_name"]) + return(lapply(pifaces, .mkAbs)) + } + return(invisible(NULL)) + }) + + +#' Get outputs from pipeline defined in an output schema +#' +#' Extracts the output file templates defined for a given pipeline +#' +#' @param pipeline an object of \code{\link[pepr]{Config-class}} +#' @param parent a path to parent folder to use +#' @param projectContext logical indicating whether a only project-level +#' pifaces should be considered. Otherwise, only sample-level ones are. +#' +#' @return named list of output path templates, +#' like: \code{'aligned_{sample.genome}/{sample.sample_name}_sort.bam'} +.getOutputs = function(pipeline, parent, projectContext = FALSE) { + if (!OUTPUT_SCHEMA_SECTION %in% + names(pipeline)) + return(invisible(NULL)) + outputSchema = readSchema(pipeline[[OUTPUT_SCHEMA_SECTION]], parent) + sect = "properties" + if (!projectContext) + sect = SCHEMA_SAMPLE_OUTS + if (!pepr::.checkSection(outputSchema, sect)) { + pipName = ifelse(is.null(pipeline[[PIP_NAME_KEY]]), + "provided", pipeline[[PIP_NAME_KEY]]) + warning("There is no '", + paste(sect, collapse = ":"), + "' section in the ", + pipName, " pipeline output schema.") + return(invisible(NULL)) + } + outputs = outputSchema[[sect]] + if ("samples" %in% names(outputs)) + outputs[["samples"]] = NULL + x = lapply(outputs, function(x) { + return(x[["path"]]) + }) + if (is.null(unlist(lapply(x, is.null)))) + return(invisible(NULL)) + return(x) +} + +#' Populates and returns output files for a given sample +#' +#' Returns the pipeline outputs which are defined in the pipeline interface +#' indicated in the \code{\link[pepr]{Project-class}} +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' @param ... other arguemnts +#' +#' @return a list of output file paths. The order of the first level of the +#' list corresponds to the order of the pipeline interface files, second level +#' is a named list of file paths populated by the samples +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' getOutputsBySample(p) +#' getOutputsBySample(p, 'sample1') +setGeneric("getOutputsBySample", + function(project, ...) standardGeneric("getOutputsBySample"), + signature = "project") + +#' @describeIn getOutputsBySample Populates and returns output files +#' for a given sample +#' @param sampleNames names of the samples +#' @importFrom yaml yaml.load_file +setMethod("getOutputsBySample", + c(project = "Project"), function(project, + sampleNames = NULL) { + pifacesBySample = pipelineInterfacesBySample(project = project) + defSampleNames = names(pifacesBySample) + if (!is.null(sampleNames)) + defSampleNames = intersect(sampleNames, + defSampleNames) + if (length(defSampleNames) < + 1) + stop("No samples matched by: ", + paste0(sampleNames, + collapse = ",")) + ret = list() + for (sampleName in defSampleNames) { + sampleRet = list() + pifaceSources = pifacesBySample[[sampleName]] + for (pifaceSource in pifaceSources) { + piface = yaml::yaml.load_file(pifaceSource) + if (!.checkPifaceType(piface, + "sample")) + return(invisible(NULL)) + outputs = .getOutputs(piface, + parent = dirname(pifaceSource)) + sampleRet[[pifaceSource]] = .populateTemplates( + project, outputs, sampleName) + } + ret[[sampleName]] = sampleRet + } + ret + }) + +#' Populates and returns output files for a +#' given \code{\link[pepr]{Project-class}} +#' +#' Returns the pipeline outputs which are defined in the pipeline interface +#' indicated in the \code{\link[pepr]{Project-class}} +#' +#' @param project \code{\link[pepr]{Project-class}} object +#' +#' @return a list of output file paths. The order of the first level of the +#' list corresponds to the order of the pipeline interface files, second level +#' is a named list of file paths populated +#' by the \code{\link[pepr]{Project-class}} +#' +#' @export +#' @examples +#' projectConfig = system.file('extdata', +#' 'example_peps-master', +#' 'example_piface', +#' 'project_config.yaml', +#' package = 'BiocProject') +#' p = Project(file = projectConfig) +#' getProjectOutputs(p) +setGeneric("getProjectOutputs", + function(project) standardGeneric("getProjectOutputs"), + signature = "project") + +#' @describeIn getProjectOutputs Populates and returns output files for +#' a given \code{\link[pepr]{Project-class}} +setMethod("getProjectOutputs", + c(project = "Project"), function(project) { + pifaceSources = gatherPipelineInterfaces(project, + projectLevel = TRUE) + ret = list() + for (pifaceSource in pifaceSources) { + piface = yaml::yaml.load_file(pifaceSource) + if (!.checkPifaceType(piface, + "project")) + return(invisible(NULL)) + outputs = .getOutputs(piface, + parent = dirname(pifaceSource), + projectContext = TRUE) + ret[[pifaceSource]] = .populateTemplates(project, outputs, + projectContext = TRUE) + } + ret + }) diff --git a/R/utils.R b/R/utils.R index e7eea85..74da2f9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -286,4 +286,117 @@ readSchema = function(path, parent = NULL) { selectMethod("show", "Project")(pep) }, where = parent.frame()) +} + +#' Switch from python to R list accession syntax +#' +#' Python uses a dot to access attributes, while R uses \code{$}; this function +#' converts the python style into R so that we can use R code to populate +#' variables with R lists. From this: '\code{{sample.name}}' +#' to this: '\code{{sample$name}}' +#' @param str String to recode +#' @return string with the recoded accession syntax +#' @export +#' @examples +#' .pyToR('{sample.genome}/{sample.read_type}/test') +.pyToR = function(str) { + # This is the regex where the + # magic happens + pytor = function(str) gsub("(\\{[^\\.\\}]+)\\.", + "\\1$", str) + # This loop allows multi-layer + # accession + res = str + prev = "" + while (prev != res) { + prev = res + res = pytor(res) + } + return(res) +} + +#' Populate a variable-encoded string with sample/project variables +#' +#' Given a string and a project this function will go through samples and +#' populate the variables. Used to return real files for each sample from an +#' output variable in the pipeline interface +#' +#' @param string Variable-encoded string to populate +#' @param project \code{\link[pepr]{Project-class}} object with values +#' to draw from +#' @param sampleName string, name of the sample to use +#' @param projectContext logical indicating whether project context should be +#' applied for string formatting. Default: sample +#' +#' @return a named list of populated strings +#' @importFrom glue glue +.populateString = function(string, project, sampleName = NULL, projectContext = FALSE) { + # Apply this glue function on + # each row in the samples + # table, coerced to a list + # object to allow attribute + # accession. + samplesSubset = subset(sampleTable(project), + sample_name == sampleName) + if (!projectContext && NROW(samplesSubset) < + 1) + return(invisible(NULL)) + if (projectContext) { + populatedStrings = with(config(project), + glue(.pyToR(string))) + } else { + populatedStrings = as.list(apply(samplesSubset, 1, function(s) { + return(with(s, glue(.pyToR(string)))) + })) + } + if (!projectContext && length(populatedStrings) != + NROW(samplesSubset)) { + warning("Paths templates populating problem: number of paths (", + length(populatedStrings), + ") does not correspond to the number of samples (", + NROW(samplesSubset), + "). Path template '", + string, "' will not be populated") + return(invisible(NULL)) + } + names(populatedStrings) = unlist(samplesSubset$sample_name) + return(populatedStrings) +} + + +#' Populate list of path templates +#' +#' @param project an object of \code{\link[pepr]{Config-class}} +#' @param templList list of strings, +#' like: 'aligned_{sample.genome}/{sample.sample_name}_sort.bam' +#' @param sampleName string, name of the protocol to select the samples +#' @param projectContext logical indicating whether project context +#' should be applied. Default: sample +#' +#' @return list of strings +.populateTemplates = function(project, + templList, sampleName = NULL, + projectContext = FALSE) { + if (!projectContext && is.null(sampleName)) + stop("Must specify the sample to populate templates for") + expandedTemplList = lapply(templList, pepr::.expandPath) + lapply(expandedTemplList, .populateString, project, sampleName, projectContext) +} + + +#' Validate type of the pipeline interface +#' +#' @param piface pipeline interface to inspect +#' @param type string, type of the pipeline interface, either "sample" or "project" +#' +#' @return a logical indicating whether the pipeline interface matches the specified type +.checkPifaceType <- function(piface, type) { + if (!pepr::.checkSection(piface, PIP_TYPE_KEY) || + piface[[PIP_TYPE_KEY]] != type) { + warning(sprintf( + "%s pipeline interface has to specify '%s' pipeline type in '%s'", + type, type, PIP_TYPE_KEY)) + return(FALSE) + } + return(TRUE) } \ No newline at end of file diff --git a/man/dot-checkPifaceType.Rd b/man/dot-checkPifaceType.Rd new file mode 100644 index 0000000..7c9ce5c --- /dev/null +++ b/man/dot-checkPifaceType.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.checkPifaceType} +\alias{.checkPifaceType} +\title{Validate type of the pipeline interface} +\usage{ +.checkPifaceType(piface, type) +} +\arguments{ +\item{piface}{pipeline interface to inspect} + +\item{type}{string, type of the pipeline interface, either "sample" or "project"} +} +\value{ +a logical indicating whether the pipeline interface matches the specified type +} +\description{ +Validate type of the pipeline interface +} diff --git a/man/dot-getOutputs.Rd b/man/dot-getOutputs.Rd new file mode 100644 index 0000000..1671d7c --- /dev/null +++ b/man/dot-getOutputs.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{.getOutputs} +\alias{.getOutputs} +\title{Get outputs from pipeline defined in an output schema} +\usage{ +.getOutputs(pipeline, parent, projectContext = FALSE) +} +\arguments{ +\item{pipeline}{an object of \code{\link[pepr]{Config-class}}} + +\item{parent}{a path to parent folder to use} + +\item{projectContext}{logical indicating whether a only project-level +pifaces should be considered. Otherwise, only sample-level ones are.} +} +\value{ +named list of output path templates, +like: \code{'aligned_{sample.genome}/{sample.sample_name}_sort.bam'} +} +\description{ +Extracts the output file templates defined for a given pipeline +} diff --git a/man/dot-populateString.Rd b/man/dot-populateString.Rd new file mode 100644 index 0000000..fd5a387 --- /dev/null +++ b/man/dot-populateString.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.populateString} +\alias{.populateString} +\title{Populate a variable-encoded string with sample/project variables} +\usage{ +.populateString(string, project, sampleName = NULL, projectContext = FALSE) +} +\arguments{ +\item{string}{Variable-encoded string to populate} + +\item{project}{\code{\link[pepr]{Project-class}} object with values +to draw from} + +\item{sampleName}{string, name of the sample to use} + +\item{projectContext}{logical indicating whether project context should be +applied for string formatting. Default: sample} +} +\value{ +a named list of populated strings +} +\description{ +Given a string and a project this function will go through samples and +populate the variables. Used to return real files for each sample from an +output variable in the pipeline interface +} diff --git a/man/dot-populateTemplates.Rd b/man/dot-populateTemplates.Rd new file mode 100644 index 0000000..aa51a03 --- /dev/null +++ b/man/dot-populateTemplates.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.populateTemplates} +\alias{.populateTemplates} +\title{Populate list of path templates} +\usage{ +.populateTemplates( + project, + templList, + sampleName = NULL, + projectContext = FALSE +) +} +\arguments{ +\item{project}{an object of \code{\link[pepr]{Config-class}}} + +\item{templList}{list of strings, +like: 'aligned_{sample.genome}/{sample.sample_name}_sort.bam'} + +\item{sampleName}{string, name of the protocol to select the samples} + +\item{projectContext}{logical indicating whether project context +should be applied. Default: sample} +} +\value{ +list of strings +} +\description{ +Populate list of path templates +} diff --git a/man/dot-pyToR.Rd b/man/dot-pyToR.Rd new file mode 100644 index 0000000..7c37f46 --- /dev/null +++ b/man/dot-pyToR.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.pyToR} +\alias{.pyToR} +\title{Switch from python to R list accession syntax} +\usage{ +.pyToR(str) +} +\arguments{ +\item{str}{String to recode} +} +\value{ +string with the recoded accession syntax +} +\description{ +Python uses a dot to access attributes, while R uses \code{$}; this function +converts the python style into R so that we can use R code to populate +variables with R lists. From this: '\code{{sample.name}}' +to this: '\code{{sample$name}}' +} +\examples{ +.pyToR('{sample.genome}/{sample.read_type}/test') +} diff --git a/man/getOutputsBySample.Rd b/man/getOutputsBySample.Rd new file mode 100644 index 0000000..ee62f24 --- /dev/null +++ b/man/getOutputsBySample.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{getOutputsBySample} +\alias{getOutputsBySample} +\alias{getOutputsBySample,Project-method} +\title{Populates and returns output files for a given sample} +\usage{ +getOutputsBySample(project, ...) + +\S4method{getOutputsBySample}{Project}(project, sampleNames = NULL) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{...}{other arguemnts} + +\item{sampleNames}{names of the samples} +} +\value{ +a list of output file paths. The order of the first level of the +list corresponds to the order of the pipeline interface files, second level +is a named list of file paths populated by the samples +} +\description{ +Returns the pipeline outputs which are defined in the pipeline interface +indicated in the \code{\link[pepr]{Project-class}} +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Populates and returns output files +for a given sample +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +getOutputsBySample(p) +getOutputsBySample(p, 'sample1') +} diff --git a/man/getProjectOutputs.Rd b/man/getProjectOutputs.Rd new file mode 100644 index 0000000..25c9623 --- /dev/null +++ b/man/getProjectOutputs.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{getProjectOutputs} +\alias{getProjectOutputs} +\alias{getProjectOutputs,Project-method} +\title{Populates and returns output files for a + given \code{\link[pepr]{Project-class}}} +\usage{ +getProjectOutputs(project) + +\S4method{getProjectOutputs}{Project}(project) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} +} +\value{ +a list of output file paths. The order of the first level of the +list corresponds to the order of the pipeline interface files, second level +is a named list of file paths populated +by the \code{\link[pepr]{Project-class}} +} +\description{ +Returns the pipeline outputs which are defined in the pipeline interface +indicated in the \code{\link[pepr]{Project-class}} +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Populates and returns output files for +a given \code{\link[pepr]{Project-class}} +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +getProjectOutputs(p) +} diff --git a/man/pipelineInterfacesBySample.Rd b/man/pipelineInterfacesBySample.Rd new file mode 100644 index 0000000..6253e8d --- /dev/null +++ b/man/pipelineInterfacesBySample.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{pipelineInterfacesBySample} +\alias{pipelineInterfacesBySample} +\alias{pipelineInterfacesBySample,Project-method} +\title{Get pipeline interfaces by sample} +\usage{ +pipelineInterfacesBySample(project) + +\S4method{pipelineInterfacesBySample}{Project}(project) +} +\arguments{ +\item{project}{\code{\link[pepr]{Project-class}} object} +} +\value{ +a list of pipeline interface file paths keyed by sample names +} +\description{ +Collects all relevant pipeline interfaces for this +\code{\link[pepr]{Project-class}} and provides a sample to interfaces mapping +} +\section{Methods (by class)}{ +\itemize{ +\item \code{Project}: Get pipeline interfaces by sample +}} + +\examples{ +projectConfig = system.file('extdata', +'example_peps-master', +'example_piface', +'project_config.yaml', +package = 'BiocProject') +p = Project(file = projectConfig) +pipelineInterfacesBySample(p) +} From 2385a27b685f0103623b8d0edb33e9fe239bf874 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 22 Oct 2020 14:15:00 -0400 Subject: [PATCH 03/24] bump version --- DESCRIPTION | 2 +- vignettes/vignette6tximeta.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dfed912..6b4fa6e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BiocProject Title: Bioconductor Management with Portable Encapsulated Project (PEP) Objects -Version: 0.2.1 +Version: 0.3.0 Authors@R: c(person("Michal", "Stolarczyk", email = "mjs5kd@virginia.edu",role = c("aut", "cre")), person("Nathan", "Sheffield", email = "nathan@code.databio.org",role = c("aut"))) Description: A Bioconductor-oriented project management class. It wraps the diff --git a/vignettes/vignette6tximeta.Rmd b/vignettes/vignette6tximeta.Rmd index 912a85f..ee2db9d 100644 --- a/vignettes/vignette6tximeta.Rmd +++ b/vignettes/vignette6tximeta.Rmd @@ -1,7 +1,7 @@ --- title: "Using BiocProject with tximeta" author: "Michał Stolarczyk" -date: "2020-10-22" +date: "`r Sys.Date()`" output: BiocStyle::html_document vignette: > %\VignetteIndexEntry{Using BiocProject with tximeta"} From f9494a2466155820f0f9460f83e4c39cd6b9dc9a Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 22 Oct 2020 16:06:04 -0400 Subject: [PATCH 04/24] add piface vignette --- vignettes/vignette5piface.Rmd | 118 ++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 vignettes/vignette5piface.Rmd diff --git a/vignettes/vignette5piface.Rmd b/vignettes/vignette5piface.Rmd new file mode 100644 index 0000000..1199d2d --- /dev/null +++ b/vignettes/vignette5piface.Rmd @@ -0,0 +1,118 @@ +--- +title: "Using a pipeline interface in your project" +author: "Michał Stolarczyk" +date: "`r Sys.Date()`" +output: BiocStyle::html_document +vignette: > + %\VignetteIndexEntry{Using a pipeline interface in your project} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Introduction + +Pipeline interface tells the pipeline submission engine (such as [`looper`](http://looper.databio.org/en/latest/)) how to interact with your +project and pipelines. In brief, it is just a `yaml` file with three required +sections: + +* `pipeline_name` - A string identifying the pipeline, +* `pipeline_type` - A string indicating a pipeline type: "sample" (for run) +or "project" (for runp), +* `command_template`- A Jinja2 template used to construct a pipeline command to run. + +Follow the pipeline interface [specification](http://looper.databio.org/en/latest/pipeline-interface-specification/) +to learn more about all the features of that `looper` provides via that file. + +# Main features + +Let's consider the examples below that illustrate the pipeline interface-related +functionality of `BiocProject` package. + +## `bioconductor` section in the pipeline interface + +The first advantage of pipeline interafce concept is the possibility to declare +the data processing function in the pipeline interface itself. Since the data +processing function is pipeline specific rather than project specific, it is +much more convenient to place the `bioconductor` section in the pipeline +interface file. + +```{r echo=F,message=FALSE, collapse=TRUE, comment=" "} +library(BiocProject) +branch = "master" +configFile = system.file( + "extdata", + paste0("example_peps-", branch), + "example_piface", + "project_config.yaml", + package = "BiocProject" +) +p=pepr::Project(configFile) +.printNestedList(yaml::read_yaml(pipelineInterfacesBySample(p)[[1]][1])) +``` + +## Get output file paths + +Pipeline outputs can be defined in a schema. As shown in the example above, +pipeline interface specifies a path to a schema in a top-level `output_schema` +section. +Example of a schema defining pipeline outputs: +```{r, echo=F, message=F} +pifaceSource = pipelineInterfacesBySample(p)[[1]][1] +piface = yaml::read_yaml(pifaceSource) +schemaPath = file.path(dirname(pifaceSource), piface$output_schema) +.printNestedList(yaml::read_yaml(schemaPath)) +``` + +### Sample-level + +Pipeline interface system divides pipelines (and their outputs) into project- +and sample-level. + +In order to list the outputs for a sample, or all the samples use +`getOutputsBySample` method. If you indicate a specific sample name, only outputs for this sample will be returned. +```{r} +getOutputsBySample(p, sampleNames=c("sample1")) +``` + +### Project-level + +In order to list project-level outputs, use `getProjectOutputs` method: + +```{r} +getProjectOutputs(p) +``` + +# Use case + +This functionality provides a convenient way to process the files produced by +the pipeline, when used in the data processing function indicated in the +`bioconductor` section of the pipeline interface file. See the example function +below that demonstrates the application of the `getSampleOutputs` function. + +```{r echo=FALSE, eval=TRUE, comment=""} +branch = "master" +processFunction = system.file( + "extdata", + paste0("example_peps-", branch), + "example_piface", + "readData.R", + package = "BiocProject" +) +source(processFunction) +piface = yaml::read_yaml(pipelineInterfacesBySample(p)[[1]][1]) +get(piface$bioconductor$readFunName) +``` +Such a link between the project and the outputs +(declared in the pipeline interface) makes it possible to read and process +the pipeline results with just a line of code: + +``` +bp = BiocProject(configFile) +``` \ No newline at end of file From abcc608fd34cae05726122335d14bb85854f923c Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 22 Oct 2020 16:29:07 -0400 Subject: [PATCH 05/24] add dependancies --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6b4fa6e..af79813 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Description: A Bioconductor-oriented project management class. It wraps the License: BSD_2_clause + file LICENSE Encoding: UTF-8 LazyData: true -Depends: S4Vectors, pepr, methods +Depends: S4Vectors, pepr, methods, glue, pryr, yaml, RCurl Suggests: testthat, yaml, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown biocViews: DataImport, DataRepresentation RoxygenNote: 7.1.0 From 3e857e00dee6f28dc74fe75cf1ea8aa9961139b9 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 22 Oct 2020 18:23:21 -0400 Subject: [PATCH 06/24] udpate piface --- DESCRIPTION | 4 ++-- _pkgdown.yaml | 1 + vignettes/vignette5piface.Rmd | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index af79813..9b4d41b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,8 +9,8 @@ Description: A Bioconductor-oriented project management class. It wraps the License: BSD_2_clause + file LICENSE Encoding: UTF-8 LazyData: true -Depends: S4Vectors, pepr, methods, glue, pryr, yaml, RCurl -Suggests: testthat, yaml, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown +Depends: S4Vectors, pepr, methods +Suggests: testthat, yaml, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown, glue, pryr, RCurl biocViews: DataImport, DataRepresentation RoxygenNote: 7.1.0 URL: https://github.com/pepkit/BiocProject diff --git a/_pkgdown.yaml b/_pkgdown.yaml index e4f25fa..2a8a1d5 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -32,6 +32,7 @@ articles: - vignette2multipleArguments - vignette3simpleCache - vignette4remoteData + - vignette5piface - vignette6tximeta reference: diff --git a/vignettes/vignette5piface.Rmd b/vignettes/vignette5piface.Rmd index 1199d2d..f5e4e6b 100644 --- a/vignettes/vignette5piface.Rmd +++ b/vignettes/vignette5piface.Rmd @@ -28,7 +28,7 @@ or "project" (for runp), * `command_template`- A Jinja2 template used to construct a pipeline command to run. Follow the pipeline interface [specification](http://looper.databio.org/en/latest/pipeline-interface-specification/) -to learn more about all the features of that `looper` provides via that file. +to learn more about all the features that `looper` provides via that file. # Main features @@ -37,7 +37,7 @@ functionality of `BiocProject` package. ## `bioconductor` section in the pipeline interface -The first advantage of pipeline interafce concept is the possibility to declare +The first advantage of pipeline interface concept is the possibility to declare the data processing function in the pipeline interface itself. Since the data processing function is pipeline specific rather than project specific, it is much more convenient to place the `bioconductor` section in the pipeline From 6313dabd93947355d9049c2cdf5c4d00c4eb58b5 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 23 Oct 2020 10:45:42 -0400 Subject: [PATCH 07/24] enhance update_examples bash script --- DESCRIPTION | 5 +- inst/extdata/example_peps-master/.gitignore | 95 +++++++++++++++++++ inst/extdata/example_peps-master/README.md | 25 +++++ .../pipeline_interface1_project.yaml | 5 +- .../pipeline_interface1_sample.yaml | 5 +- .../pipeline_interface2_project.yaml | 5 +- .../pipeline_interface2_sample.yaml | 5 +- update_examples.sh | 21 +++- 8 files changed, 151 insertions(+), 15 deletions(-) create mode 100755 inst/extdata/example_peps-master/.gitignore create mode 100644 inst/extdata/example_peps-master/README.md diff --git a/DESCRIPTION b/DESCRIPTION index 9b4d41b..8b3ffa7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,8 +9,9 @@ Description: A Bioconductor-oriented project management class. It wraps the License: BSD_2_clause + file LICENSE Encoding: UTF-8 LazyData: true -Depends: S4Vectors, pepr, methods -Suggests: testthat, yaml, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown, glue, pryr, RCurl +Depends: S4Vectors, pepr +Imports: methods, glue, RCurl, yaml, stats, pryr +Suggests: testthat, BiocFileCache, simpleCache, GenomicRanges, knitr, BiocStyle, rmarkdown biocViews: DataImport, DataRepresentation RoxygenNote: 7.1.0 URL: https://github.com/pepkit/BiocProject diff --git a/inst/extdata/example_peps-master/.gitignore b/inst/extdata/example_peps-master/.gitignore new file mode 100755 index 0000000..aae0f17 --- /dev/null +++ b/inst/extdata/example_peps-master/.gitignore @@ -0,0 +1,95 @@ +# Specific ignores: + +.ipynb_checkpoints/ +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Gedit temporary files +*~ + +# Openoffice lock files +.~* + +# Rcaches +RCache/* + +# Compiled source +*.com +*.class +*.dll +*.exe +*.o +*.so + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Gedit temporary files +*~ + +# Openoffice lock files +.~* + + +# Rcaches +RCache/* + +# macOS +*.DS_Store \ No newline at end of file diff --git a/inst/extdata/example_peps-master/README.md b/inst/extdata/example_peps-master/README.md new file mode 100644 index 0000000..78520a6 --- /dev/null +++ b/inst/extdata/example_peps-master/README.md @@ -0,0 +1,25 @@ +# example_peps + +This repository contains examples of **PEPs** (Portable Encapsulated Projects). Visit the [PEP2.0.0 specification webiste](http://pep.databio.org) to learn more about the PEP standard and features. Explore the examples interactively with `Python` or `R`: + + +## Python + +Your basic python workflow uses the [`peppy`](http://github.com/pepkit/peppy) package and starts out like this: + +```python +import peppy +proj1 = peppy.Project("example_basic/project_config.yaml") +``` +More detailed Python vignettes are available as part of the [documentation for the `peppy` package](http://peppy.databio.org/en/latest/). + +## R + +Your basic `R` workflow uses the [`pepr`](http://github.com/pepkit/pepr) package and starts like this: + +```r +library('pepr') +p = pepr::Project("example_basic/project_config.yaml") +``` + +More detailed R vignettes are available as part of the [documentation for the `pepr` package](http://code.databio.org/pepr). diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml index cd9903c..01bfacd 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml @@ -1,9 +1,10 @@ pipeline_name: PIPELINE1 pipeline_type: project -path: pipelines/col_pipeline1.py +var_templates: + path: "{looper.piface_dir}/pipelines/proj_pipeline1.py" output_schema: output_schema.yaml command_template: > - {pipeline.path} --project-name {project.name} + {pipeline.var_templates.path} --project-name {project.name} bioconductor: readFunName: readData diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml index e0d766d..a6a7bbe 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml @@ -1,10 +1,11 @@ pipeline_name: PIPELINE1 pipeline_type: sample -path: pipelines/pipeline1.py +var_templates: + path: "{looper.piface_dir}/pipelines/pipeline1.py" input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: output_schema.yaml command_template: > - {pipeline.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} bioconductor: readFunName: readData diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml index b77a621..2f10ce4 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml @@ -1,9 +1,10 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: project -path: pipelines/col_pipeline2.py +var_templates: + path: "{looper.piface_dir}/pipelines/proj_pipeline2.py" output_schema: output_schema.yaml command_template: > - {pipeline.path} --project-name {project.name} + {pipeline.var_templates.path} --project-name {project.name} compute: size_dependent_variables: resources-project.tsv diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml index 93186f6..178d1d0 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml @@ -1,9 +1,10 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: sample -path: pipelines/other_pipeline2.py +var_templates: + path: "{looper.piface_dir}/pipelines/other_pipeline2.py" output_schema: output_schema.yaml command_template: > - {pipeline.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} compute: size_dependent_variables: resources-sample.tsv diff --git a/update_examples.sh b/update_examples.sh index 3f7f85b..20ff490 100755 --- a/update_examples.sh +++ b/update_examples.sh @@ -1,5 +1,16 @@ -wget https://github.com/pepkit/example_peps/archive/master.zip -unzip master.zip -rm -rf inst/extdata/example_peps-master -mv example_peps-master inst/extdata -rm master.zip \ No newline at end of file +#!/bin/bash + +if [ $# -ne 1 ]; then + echo $0: usage: update_test_data.sh branch + exit 1 +fi + +branch=$1 + +wget https://github.com/pepkit/example_peps/archive/${branch}.zip +unzip ${branch}.zip +rm -rf inst/extdata/example_peps-${branch} +mv example_peps-${branch} inst/extdata +rm ${branch}.zip +cd inst/extdata/example_peps-${branch}/ +find . -type d -maxdepth 1 ! -name 'example_BiocProjec*' ! -name 'example_piface' -exec rm -r {} \; \ No newline at end of file From ab981307923ca5658c34d7818e421f7ec9720be9 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 23 Oct 2020 11:52:54 -0400 Subject: [PATCH 08/24] update piface vignette --- vignettes/vignette5piface.Rmd | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/vignettes/vignette5piface.Rmd b/vignettes/vignette5piface.Rmd index f5e4e6b..95f9ac8 100644 --- a/vignettes/vignette5piface.Rmd +++ b/vignettes/vignette5piface.Rmd @@ -30,18 +30,24 @@ or "project" (for runp), Follow the pipeline interface [specification](http://looper.databio.org/en/latest/pipeline-interface-specification/) to learn more about all the features that `looper` provides via that file. -# Main features +# Relevant features + +The only two sections of the pipeline interface relevant from the BiocProject perspective are: + +1. `bioconductor` -- used to specify the data processing function name and/or path +2. `output_schema` -- used to specify a path to a pipeline output schema that describes the outputs of the pipeline + Let's consider the examples below that illustrate the pipeline interface-related functionality of `BiocProject` package. -## `bioconductor` section in the pipeline interface +## Specify `bioconductor` section in the pipeline interface The first advantage of pipeline interface concept is the possibility to declare the data processing function in the pipeline interface itself. Since the data processing function is pipeline specific rather than project specific, it is -much more convenient to place the `bioconductor` section in the pipeline -interface file. +much more sensible to place the `bioconductor` section in the pipeline +interface file -- the code that will be used to preprocess the output of the pipeline depends of the pipeline (defined by pipeline interface), not the data (defined by the PEP). ```{r echo=F,message=FALSE, collapse=TRUE, comment=" "} library(BiocProject) @@ -63,13 +69,15 @@ Pipeline outputs can be defined in a schema. As shown in the example above, pipeline interface specifies a path to a schema in a top-level `output_schema` section. Example of a schema defining pipeline outputs: -```{r, echo=F, message=F} +```{r echo=F,message=FALSE, collapse=TRUE, comment=" "} pifaceSource = pipelineInterfacesBySample(p)[[1]][1] piface = yaml::read_yaml(pifaceSource) schemaPath = file.path(dirname(pifaceSource), piface$output_schema) .printNestedList(yaml::read_yaml(schemaPath)) ``` +Check out the [looper documentation on output schema](http://looper.databio.org/en/latest/pipeline-interface-specification/#output_schema) to learn more. + ### Sample-level Pipeline interface system divides pipelines (and their outputs) into project- @@ -113,6 +121,6 @@ Such a link between the project and the outputs (declared in the pipeline interface) makes it possible to read and process the pipeline results with just a line of code: -``` +```r bp = BiocProject(configFile) ``` \ No newline at end of file From 1b5424e5008520065ccfcde32af11257d21797b3 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 23 Oct 2020 16:38:01 -0400 Subject: [PATCH 09/24] add bioc section sourcing from piface --- R/functions.R | 10 ++- R/pipeline_interface.R | 22 +++-- R/utils.R | 85 ++++++++++++++++--- .../example_piface/readData.R | 10 ++- 4 files changed, 95 insertions(+), 32 deletions(-) diff --git a/R/functions.R b/R/functions.R index 40a4716..8c2f588 100644 --- a/R/functions.R +++ b/R/functions.R @@ -63,6 +63,8 @@ #' the \code{bioconductor} section in the config file. #' @param autoLoad a logical indicating whether the data should be loaded #' automatically. See \code{Details} for more information. +#' @param projectLevel logical indicating whether a only project-level pifaces +#' should be considered. Otherwise, only sample-level ones are. #' #' @return an object of \code{\link[S4Vectors]{Annotated-class}} that is #' returned by the user provided function with @@ -81,8 +83,8 @@ #' @seealso \url{https://pepkit.github.io/} #' @import pepr #' @export BiocProject -BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, - funcArgs = NULL) { +BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, + projectLevel = FALSE, funcArgs = NULL) { p = pepr::Project(file=file, amendments = amendments) # prevent PEP (Project object) input. This prevents BiocProject object # failing when the user provides the Project object @@ -96,7 +98,9 @@ BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, } } args = append(list(p), funcArgs) - cfg = pepr::config(p) + cfg = .getBiocConfig(p, projectLevel) + if(is.null(cfg)) + cfg = pepr::config(p) if(pepr::.checkSection(cfg, c(BIOC_SECTION, FUNCTION_ARGS))){ args = .unionList(config(p)[[BIOC_SECTION]][[FUNCTION_ARGS]],args) argsNames = names(args) diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index ec9a20b..04b3e11 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -181,26 +181,24 @@ setMethod("getOutputsBySample", pifacesBySample = pipelineInterfacesBySample(project = project) defSampleNames = names(pifacesBySample) if (!is.null(sampleNames)) - defSampleNames = intersect(sampleNames, - defSampleNames) - if (length(defSampleNames) < - 1) + defSampleNames = intersect(sampleNames, defSampleNames) + if (length(defSampleNames) < 1) stop("No samples matched by: ", - paste0(sampleNames, - collapse = ",")) + paste0(sampleNames, collapse = ",")) ret = list() for (sampleName in defSampleNames) { sampleRet = list() pifaceSources = pifacesBySample[[sampleName]] for (pifaceSource in pifaceSources) { piface = yaml::yaml.load_file(pifaceSource) - if (!.checkPifaceType(piface, - "sample")) + if(!.checkSection(piface, "pipeline_name")) + stop("'pipeline_name' section missing in pipeline interface: ", + pifaceSource) + if (!.checkPifaceType(piface, "sample")) return(invisible(NULL)) - outputs = .getOutputs(piface, - parent = dirname(pifaceSource)) - sampleRet[[pifaceSource]] = .populateTemplates( - project, outputs, sampleName) + outputs = .getOutputs(piface, parent = dirname(pifaceSource)) + sampleRet[[piface[["pipeline_name"]]]] = + .populateTemplates(project, outputs, sampleName) } ret[[sampleName]] = sampleRet } diff --git a/R/utils.R b/R/utils.R index 74da2f9..e05cf5d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -153,6 +153,70 @@ readSchema = function(path, parent = NULL) { return(res) } + +#' Get the preferred source of the bioconductor section +#' +#' @param p \code{\link[pepr]{Project-class}} object +#' @param projectLevel logical indicating whether a only project-level pifaces +#' should be considered. Otherwise, only sample-level ones are. +#' +#' @return a list with the selected config +#' @importFrom pepr checkSection config +#' @importFrom methods new +.getBiocConfig = function(p, projectLevel = FALSE) { + if (checkSection(config(p), BIOC_SECTION)) { + # if the BIOC_SECTION section is found in the project + # config, override any other locations + message("The '", BIOC_SECTION, + "' key found in the Project config") + return(config(p)) + } + # check for BIOC_SECTION in pipeline interfaces + pifaceSource = gatherPipelineInterfaces(p, projectLevel=projectLevel) + if (length(pifaceSource) > 0) { + if (length(pifaceSource) > 1) + message(length(pifaceSource), " pipeline interface sources matched. ", + "Using the first one: ", pifaceSource) + pifaceSource = pifaceSource[1] + } + + if (!is.null(pifaceSource)) { + piface = yaml::read_yaml(pifaceSource) + if (pepr::.checkSection(piface, BIOC_SECTION)) { + message("The '", BIOC_SECTION, "' key found in the pipeline interface") + return(.makeReadFunPathAbs(piface, parent=dirname(pifaceSource))) + } else { + warning("The '", BIOC_SECTION, + "' key is missing in Project config and pipeline interface") + return(invisible(NULL)) + } + } else { + warning("The '", BIOC_SECTION, + "' key is missing in Project config and pipeline interface") + return(invisible(NULL)) + } +} + +#' Make readFunPath absolute +#' +#' Uses the absolute pipeline interface path in the config to determine the +#' absolute path to the readFunPath file that consists of the data +#' processing function +#' +#' @param piface \code{\link[pepr]{Config-class}}/list with a pipeline interface +#' @param parent a path to parent folder to use +#' +#' @return piface \code{\link[pepr]{Config-class}} pipeline interface with +#' the readFunPath made absolute +.makeReadFunPathAbs = function(piface, parent) { + pth = piface[[BIOC_SECTION]][[FUNCTION_PATH]] + absReadFunPath = .makeAbsPath(pth, parent) + if (!.isAbsolute(absReadFunPath)) + stop("Failed to make the readFunPath absolute: ", absReadFunPath) + piface[[BIOC_SECTION]][[FUNCTION_PATH]] = absReadFunPath + piface +} + # Create an absolute path from a primary target and a parent candidate. # # @param perhapsRelative: Path to primary target directory. @@ -336,18 +400,14 @@ readSchema = function(path, parent = NULL) { # table, coerced to a list # object to allow attribute # accession. - samplesSubset = subset(sampleTable(project), - sample_name == sampleName) - if (!projectContext && NROW(samplesSubset) < - 1) + samplesSubset = subset(sampleTable(project), sample_name == sampleName) + if (!projectContext && NROW(samplesSubset) < 1) return(invisible(NULL)) if (projectContext) { - populatedStrings = with(config(project), - glue(.pyToR(string))) + populatedStrings = with(config(project), glue(.pyToR(string))) } else { - populatedStrings = as.list(apply(samplesSubset, 1, function(s) { - return(with(s, glue(.pyToR(string)))) - })) + populatedStrings = as.character(apply( + samplesSubset, 1, function(s) { with(s, glue(.pyToR(string)))})) } if (!projectContext && length(populatedStrings) != NROW(samplesSubset)) { @@ -359,7 +419,6 @@ readSchema = function(path, parent = NULL) { string, "' will not be populated") return(invisible(NULL)) } - names(populatedStrings) = unlist(samplesSubset$sample_name) return(populatedStrings) } @@ -374,13 +433,13 @@ readSchema = function(path, parent = NULL) { #' should be applied. Default: sample #' #' @return list of strings -.populateTemplates = function(project, - templList, sampleName = NULL, +.populateTemplates = function(project, templList, sampleName = NULL, projectContext = FALSE) { if (!projectContext && is.null(sampleName)) stop("Must specify the sample to populate templates for") expandedTemplList = lapply(templList, pepr::.expandPath) - lapply(expandedTemplList, .populateString, project, sampleName, projectContext) + x=lapply(expandedTemplList, .populateString, project, sampleName, projectContext) + return(x) } diff --git a/inst/extdata/example_peps-master/example_piface/readData.R b/inst/extdata/example_peps-master/example_piface/readData.R index 89557a1..bd5bb4e 100644 --- a/inst/extdata/example_peps-master/example_piface/readData.R +++ b/inst/extdata/example_peps-master/example_piface/readData.R @@ -1,10 +1,12 @@ readData = function(project, sampleName="sample1") { lapply(getOutputsBySample(project, sampleName), function(x) { lapply(x, function(x1){ - message("Reading: ", basename(x1)) - df = read.table(x1, stringsAsFactors=F) - colnames(df)[1:3] = c('chr', 'start', 'end') - GenomicRanges::GRanges(df) + lapply(x1, function(x2){ + message("Reading: ", x2) + df[[x2]] = read.table(x2, stringsAsFactors=F) + colnames(df)[1:3] = c('chr', 'start', 'end') + }) }) }) + GenomicRanges::GRanges(df) } From 440dfec37a02e9a16cf9c45b4ff87d8fa5c08738 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 23 Oct 2020 17:09:49 -0400 Subject: [PATCH 10/24] abstract piface name verification away, use name no path in output lists --- R/pipeline_interface.R | 18 +++++++----------- R/utils.R | 2 ++ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index 04b3e11..1f617da 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -191,13 +191,10 @@ setMethod("getOutputsBySample", pifaceSources = pifacesBySample[[sampleName]] for (pifaceSource in pifaceSources) { piface = yaml::yaml.load_file(pifaceSource) - if(!.checkSection(piface, "pipeline_name")) - stop("'pipeline_name' section missing in pipeline interface: ", - pifaceSource) if (!.checkPifaceType(piface, "sample")) return(invisible(NULL)) outputs = .getOutputs(piface, parent = dirname(pifaceSource)) - sampleRet[[piface[["pipeline_name"]]]] = + sampleRet[[piface[[PIP_NAME_KEY]]]] = .populateTemplates(project, outputs, sampleName) } ret[[sampleName]] = sampleRet @@ -235,19 +232,18 @@ setGeneric("getProjectOutputs", #' a given \code{\link[pepr]{Project-class}} setMethod("getProjectOutputs", c(project = "Project"), function(project) { - pifaceSources = gatherPipelineInterfaces(project, - projectLevel = TRUE) + pifaceSources = gatherPipelineInterfaces( + project, projectLevel=TRUE) ret = list() for (pifaceSource in pifaceSources) { piface = yaml::yaml.load_file(pifaceSource) - if (!.checkPifaceType(piface, - "project")) + if (!.checkPifaceType(piface, "project")) return(invisible(NULL)) outputs = .getOutputs(piface, parent = dirname(pifaceSource), - projectContext = TRUE) - ret[[pifaceSource]] = .populateTemplates(project, outputs, - projectContext = TRUE) + projectContext=TRUE) + ret[[piface[[PIP_NAME_KEY]]]] = + .populateTemplates(project, outputs, projectContext=TRUE) } ret }) diff --git a/R/utils.R b/R/utils.R index e05cf5d..08ac002 100644 --- a/R/utils.R +++ b/R/utils.R @@ -450,6 +450,8 @@ readSchema = function(path, parent = NULL) { #' #' @return a logical indicating whether the pipeline interface matches the specified type .checkPifaceType <- function(piface, type) { + if(!.checkSection(piface, PIP_NAME_KEY)) + stop(PIP_NAME_KEY, " section missing in pipeline interface") if (!pepr::.checkSection(piface, PIP_TYPE_KEY) || piface[[PIP_TYPE_KEY]] != type) { warning(sprintf( From 7f3d2fb72b4b8aabe8cae5afbccf33df0fbfd882 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 23 Oct 2020 17:24:55 -0400 Subject: [PATCH 11/24] docs --- NAMESPACE | 3 +++ man/BiocProject.Rd | 4 ++++ man/dot-getBiocConfig.Rd | 20 ++++++++++++++++++++ man/dot-makeReadFunPathAbs.Rd | 22 ++++++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 man/dot-getBiocConfig.Rd create mode 100644 man/dot-makeReadFunPathAbs.Rd diff --git a/NAMESPACE b/NAMESPACE index 7930ee3..8640caf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,9 @@ import(methods) import(pepr) importFrom(RCurl,getURLContent) importFrom(glue,glue) +importFrom(methods,new) +importFrom(pepr,checkSection) +importFrom(pepr,config) importFrom(pryr,partial) importFrom(stats,setNames) importFrom(yaml,yaml.load_file) diff --git a/man/BiocProject.Rd b/man/BiocProject.Rd index e81199b..9ad11a2 100644 --- a/man/BiocProject.Rd +++ b/man/BiocProject.Rd @@ -9,6 +9,7 @@ BiocProject( amendments = NULL, autoLoad = TRUE, func = NULL, + projectLevel = FALSE, funcArgs = NULL ) } @@ -26,6 +27,9 @@ it must take the \code{\link[pepr]{Project-class}} as an argument. See \code{Details} for more information} +\item{projectLevel}{logical indicating whether a only project-level pifaces +should be considered. Otherwise, only sample-level ones are.} + \item{funcArgs}{a named list with arguments you want to pass to the \code{func}. The PEP will be passed automatically, diff --git a/man/dot-getBiocConfig.Rd b/man/dot-getBiocConfig.Rd new file mode 100644 index 0000000..23da525 --- /dev/null +++ b/man/dot-getBiocConfig.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.getBiocConfig} +\alias{.getBiocConfig} +\title{Get the preferred source of the bioconductor section} +\usage{ +.getBiocConfig(p, projectLevel = FALSE) +} +\arguments{ +\item{p}{\code{\link[pepr]{Project-class}} object} + +\item{projectLevel}{logical indicating whether a only project-level pifaces +should be considered. Otherwise, only sample-level ones are.} +} +\value{ +a list with the selected config +} +\description{ +Get the preferred source of the bioconductor section +} diff --git a/man/dot-makeReadFunPathAbs.Rd b/man/dot-makeReadFunPathAbs.Rd new file mode 100644 index 0000000..3760c7a --- /dev/null +++ b/man/dot-makeReadFunPathAbs.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.makeReadFunPathAbs} +\alias{.makeReadFunPathAbs} +\title{Make readFunPath absolute} +\usage{ +.makeReadFunPathAbs(piface, parent) +} +\arguments{ +\item{piface}{\code{\link[pepr]{Config-class}}/list with a pipeline interface} + +\item{parent}{a path to parent folder to use} +} +\value{ +piface \code{\link[pepr]{Config-class}} pipeline interface with +the readFunPath made absolute +} +\description{ +Uses the absolute pipeline interface path in the config to determine the +absolute path to the readFunPath file that consists of the data +processing function +} From 5c0b264aca9d5219cc44813bb443da864a9daf03 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 25 Oct 2020 12:55:40 -0400 Subject: [PATCH 12/24] add tests --- tests/testthat/test_Annotated.R | 69 +++++++++++ tests/testthat/test_BiocProject.R | 127 ++++++++++++++++++++ tests/testthat/test_piface.R | 69 +++++++++++ tests/testthat/{test_all.R => test_utils.R} | 96 +-------------- 4 files changed, 271 insertions(+), 90 deletions(-) create mode 100644 tests/testthat/test_Annotated.R create mode 100644 tests/testthat/test_BiocProject.R create mode 100644 tests/testthat/test_piface.R rename tests/testthat/{test_all.R => test_utils.R} (50%) diff --git a/tests/testthat/test_Annotated.R b/tests/testthat/test_Annotated.R new file mode 100644 index 0000000..083cab8 --- /dev/null +++ b/tests/testthat/test_Annotated.R @@ -0,0 +1,69 @@ +library(yaml) +# Prep data --------------------------------------------------------------- + +branch = "master" + +configFile = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config.yaml", + package = "BiocProject" +) + +configFileArgs = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config_resize.yaml", + package = "BiocProject" +) + +configFileMissingFun = system.file( + "test_projects", + "faulty_project", + "project_config_no_function.yaml", + package = "BiocProject" +) + +configFileNoSection = system.file( + "test_projects", + "faulty_project", + "project_config_no_section.yaml", + package = "BiocProject" +) + +bp = BiocProject(configFile) + +# Test -------------------------------------------------------------------- + +context("Test Annotated methods") + +test_that("samples returns a correct object", { + expect_is(sampleTable(bp),"data.table") +}) + +test_that("config returns a correct object", { + expect_is(config(bp),"Config") +}) + +test_that(".is.project returns a correct object", { + expect_is(.is.project(bp),"logical") +}) + +test_that(".is.project returns a value", { + expect_equal(.is.project(bp),TRUE) + expect_equal(.is.project(S4Vectors::List(a=1)), FALSE) +}) + +test_that("is method returns correct value when Annotated provided", { + expect_equal(is(bp,"Project"), TRUE) +}) + +test_that("getProject returns a correct object", { + expect_is(getProject(bp),"Project") +}) + +test_that("getProject returns a correct value", { + expect_equal(getProject(bp), pepr::Project(configFile)) +}) \ No newline at end of file diff --git a/tests/testthat/test_BiocProject.R b/tests/testthat/test_BiocProject.R new file mode 100644 index 0000000..0665854 --- /dev/null +++ b/tests/testthat/test_BiocProject.R @@ -0,0 +1,127 @@ +library(yaml) +# Prep data --------------------------------------------------------------- + +branch = "master" + +configFile = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config.yaml", + package = "BiocProject" +) + +configFileArgs = system.file( + "extdata", + paste0("example_peps-",branch), + "example_BiocProject", + "project_config_resize.yaml", + package = "BiocProject" +) + +configFileMissingFun = system.file( + "test_projects", + "faulty_project", + "project_config_no_function.yaml", + package = "BiocProject" +) + +configFileNoSection = system.file( + "test_projects", + "faulty_project", + "project_config_no_section.yaml", + package = "BiocProject" +) + +configPiface = system.file( + "extdata", + paste0("example_peps-",branch), + "example_piface", + "project_config.yaml", + package = "BiocProject" +) + + +bp = BiocProject(configFile) + +a=function(arg) { + stop(arg) +} + +b=function(arg) { + warning(arg) +} + +c=function(arg) { + return(arg) +} + +testChar = "a" + +# Test -------------------------------------------------------------------- + +context("Test BiocProject function") + +test_that("BiocProject function return correct object", { + expect_is(BiocProject(configFile),"Annotated") +}) + +test_that("BiocProject function works with arguments", { + expect_is(BiocProject(configFileArgs),"Annotated") + expect_is(BiocProject(configFileArgs, funcArgs = list(resize.width=200)), + "Annotated") +}) + +test_that("BiocProject function returns Annotated when provided objects of + different class and thorows a warning", { + expect_warning(expect_is(BiocProject(configFile, func = function(x){ + return("test") + }),"Annotated")) + }) + +test_that("BiocProject function returns a Project object + when autoload is set to FALSE", { + expect_is(BiocProject(file=configFile,autoLoad = FALSE),"Project") + }) + +test_that("BiocProject function throws errors/warnings + when the arguments are inappropriate", { + expect_error(BiocProject(file=configFile,func = "2")) + expect_error(BiocProject(file = "test")) + expect_error(BiocProject(file = configFile,autoLoad = "test")) + }) + +test_that("BiocProject function catches errors in the user-provided + function returns the error message as Annotated", { + expect_is(BiocProject(file=configFile,func=function(x) { + stop("test") + }),"Annotated") + }) + +test_that("BiocProject function catches errors when the function specified + does not exist", { + expect_error(BiocProject(configFileMissingFun)) + }) + +test_that("BiocProject function throws a warning and returns a Project object + when no bioconductor section found", { + expect_warning(expect_is(BiocProject(configFileNoSection),"Project")) + }) + +test_that("BiocProject function reads the bioconductor section from the + pipeline interface if not found in the project config", { + expect_true(is(BiocProject(configPiface), "Project")) + }) + +test_that("BiocProject function returns a valid object when project pipeline outputs requested", { + expect_true(is(BiocProject(configPiface, projectLevel=TRUE), "Project")) +}) + +test_that("BiocProject disregards the Project object as a user-privided argument", { + expect_warning(expect_true(is(BiocProject(configFile, funcArgs = list(p=Project(configFile))), "Project"))) +}) + +test_that("BiocProject uses the function from the environment, if possible", { + source(config(Project(configFile))$bioconductor$readFunPath) + expect_true(is(BiocProject(configFile), "Project")) +}) \ No newline at end of file diff --git a/tests/testthat/test_piface.R b/tests/testthat/test_piface.R new file mode 100644 index 0000000..3e324e2 --- /dev/null +++ b/tests/testthat/test_piface.R @@ -0,0 +1,69 @@ +library(yaml) +# Prep data --------------------------------------------------------------- + +branch = "master" + +configFile = system.file( + "extdata", + paste0("example_peps-", branch), + "example_piface", + "project_config.yaml", + package = "BiocProject" +) + +configNoPifaces = system.file( + "extdata", + paste0("example_peps-", branch), + "example_BiocProject", + "project_config.yaml", + package = "BiocProject" +) + +p = pepr::Project(configFile) +pifaces = gatherPipelineInterfaces(p) +piface = pifaces[[1]] + +pNoPifaces = pepr::Project(configNoPifaces) + +samplesTable = sampleTable(p) + +# Test -------------------------------------------------------------------- + +context("Test gatherPipelineInterfaces function") + +test_that("gatherPipelineInterfaces function returns a character", { + expect_is(gatherPipelineInterfaces(p),"character") +}) + +test_that("gatherPipelineInterfaces function returns an object of + correct length", { + expect_equal(length(gatherPipelineInterfaces(p)), 2) + }) + +test_that("gatherPipelineInterfaces returns NULL when no piface section + not found", { + expect_null(gatherPipelineInterfaces(pNoPifaces)) + }) + +test_that("gatherPipelineInterfaces works for project with no pipeline interfaces defined", { + expect_warning(expect_null(gatherPipelineInterfaces(pNoPifaces, projectLevel=TRUE))) +}) + +context("Test output getters") + +test_that("getProjectOutputs function returns a list", { + expect_is(getProjectOutputs(p), "list") +}) + +test_that("getOutputsBySample function returns a list", { + expect_is(getOutputsBySample(p), "list") +}) + +test_that("getOutputsBySample function allows for specific sample selection", { + expect_false(length(getOutputsBySample(p)) == + length(getOutputsBySample(p, sampleNames="sample1"))) +}) + +test_that("getOutputsBySample errors when non-existent sample selected", { + expect_error(getOutputsBySample(p, samleNames="bogusSample")) +}) \ No newline at end of file diff --git a/tests/testthat/test_all.R b/tests/testthat/test_utils.R similarity index 50% rename from tests/testthat/test_all.R rename to tests/testthat/test_utils.R index 6923546..4e129ac 100644 --- a/tests/testthat/test_all.R +++ b/tests/testthat/test_utils.R @@ -1,9 +1,11 @@ library(yaml) # Prep data --------------------------------------------------------------- +branch = "master" + configFile = system.file( "extdata", - "example_peps-master", + paste0("example_peps-",branch), "example_BiocProject", "project_config.yaml", package = "BiocProject" @@ -11,7 +13,7 @@ configFile = system.file( configFileArgs = system.file( "extdata", - "example_peps-master", + paste0("example_peps-",branch), "example_BiocProject", "project_config_resize.yaml", package = "BiocProject" @@ -31,7 +33,6 @@ configFileNoSection = system.file( package = "BiocProject" ) - bp = BiocProject(configFile) a=function(arg) { @@ -49,6 +50,7 @@ c=function(arg) { testChar = "a" # Test -------------------------------------------------------------------- + context("Test .unionList utility function") test_that(".unionList returns correct object type", { @@ -64,6 +66,7 @@ test_that(".unionList returns list of correct length", { test_that(".unionList throws errors", { expect_error(.unionList(list(a=1),2)) }) + context("Test .makeAbsPath utility function") test_that(".makeAbsPath returns correct object", { @@ -101,7 +104,6 @@ context("Test .callBiocFun untility function") test_that(".callBiocFun catches errors", { expect_error(expect_error(.callBiocFun(a,list(testChar)))) - expect_equal(.callBiocFun(a,list(testChar)),S4Vectors::List(testChar)) expect_warning(.callBiocFun(b,list(testChar))) }) @@ -129,90 +131,4 @@ test_that(".insertPEP returns correct object",{ test_that(".insertPEP throws errors",{ expect_error(.insertPEP(S4Vectors::List(),"test")) -}) - -context("Test BiocProject function") - -test_that("BiocProject function return correct object", { - expect_is(BiocProject(configFile),"Annotated") -}) - -test_that("BiocProject function works with arguments", { - expect_is(BiocProject(configFileArgs),"Annotated") - expect_is(BiocProject(configFileArgs, funcArgs = list(resize.width=200)), "Annotated") -}) - -# test_that("BiocProject function overrides the arguments specified in the config -# file with ones that have the same names in the funcArgs list", { -# expect_failure(expect_identical( -# BiocProject(configFileArgs), -# BiocProject(configFileArgs, funcArgs = list(resize.width = 200)) -# )) -# }) - -test_that("BiocProject function returns Annotated when provided objects of - different class and thorows a warning", { - expect_warning(expect_is(BiocProject(configFile, func = function(x){ - return("test") - }),"Annotated")) - }) - -test_that("BiocProject function returns a Project object - when autoload is set to FALSE", { - expect_is(BiocProject(file=configFile,autoLoad = FALSE),"Project") - }) - -test_that("BiocProject function throws errors/warnings - when the arguments are inappropriate", { - expect_error(BiocProject(file=configFile,func = "2")) - expect_error(BiocProject(file = "test")) - expect_error(BiocProject(file = configFile,autoLoad = "test")) - }) - -test_that("BiocProject function catches errors in the user-provided - function returns the error message as Annotated", { - expect_is(BiocProject(file=configFile,func=function(x) { - stop("test") - }),"Annotated") - }) - -test_that("BiocProject function catches errors when the function specified - does not exist", { - expect_error(BiocProject(configFileMissingFun)) - }) - -test_that("BiocProject function throws a warning and returns a Project object - when no bioconductor section found",{ - expect_warning(expect_is(BiocProject(configFileNoSection),"Project")) - }) - -context("Test Annotated methods") - -test_that("samples returns a correct object", { - expect_is(sampleTable(bp),"data.table") -}) - -test_that("config returns a correct object", { - expect_is(config(bp),"Config") -}) - -test_that(".is.project returns a correct object", { - expect_is(.is.project(bp),"logical") -}) - -test_that(".is.project returns a value", { - expect_equal(.is.project(bp),TRUE) - expect_equal(.is.project(S4Vectors::List(a=1)), FALSE) -}) - -test_that("is method returns correct value when Annotated provided", { - expect_equal(is(bp,"Project"), TRUE) -}) - -test_that("getProject returns a correct object", { - expect_is(getProject(bp),"Project") -}) - -test_that("getProject returns a correct value", { - expect_equal(getProject(bp), pepr::Project(configFile)) }) \ No newline at end of file From ce7c11c1c89474234aca2217a6a018f10b512b05 Mon Sep 17 00:00:00 2001 From: Mike Love Date: Tue, 3 Nov 2020 10:29:09 -0500 Subject: [PATCH 13/24] small changes to tximeta vignette --- vignettes/vignette6tximeta.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/vignette6tximeta.Rmd b/vignettes/vignette6tximeta.Rmd index 8d6726e..0b2a32f 100644 --- a/vignettes/vignette6tximeta.Rmd +++ b/vignettes/vignette6tximeta.Rmd @@ -20,7 +20,7 @@ This vignette demonstrates how to integrate BiocProject with the [tximeta Biocon Tximeta is a package that imports transcript quantification files from the [salmon](https://salmon.readthedocs.io/en/latest/salmon.html) transcript quantifier. When importing, tximeta automatically annotates the data with the transcriptome used. How it works is that `salmon` records a unique identifier of the transcriptome it uses during quantification; then, tximeta reads this identifier and looks up metadata about those sequences using a local database of known transcriptome identifiers. For more details, refer to the [tximeta GitHub repository](https://github.com/mikelove/tximeta) or [publication in PLoS Computational Biology](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1007664). -The `tximeta::tximeta` function takes as input a `data.frame` (`coldata`) object that, for Salmon results, points to a quantification results directory for each sample. The `tximeta` function reads the `*.sa` files and returns a single `SummarizedExperiment` object with the Salmon-generated metadata in the object `metadata` slot. +The `tximeta::tximeta` function takes as input a `data.frame` (`coldata`) object that, for Salmon results, points to a quantification results directory for each sample. The `tximeta` function reads the `*.sf` files and returns a single `SummarizedExperiment` object with the various elements of metadata in the object's `rowRanges` and `metadata` slots. In addition, the `SummarizedExperiment` has pointers to local cached databases for subsequent annotation tasks. Since `SummarizedExperiment` inherits from the Bioconductor `Annotated` class, it fits perfectly into `BiocProject` output object class requirements. @@ -38,7 +38,7 @@ is(SummarizedExperiment(), "Annotated") ## Advantages of using BiocProject with tximeta -If we add BiocProject in to the tximeta workflow, then sample metadata from the PEP project specification can be easily plugged in! For example, if a researcher used a PEP to run Salmon to quantify reads across multiple samples with PEP-compatible workflow management engine/job scatterer like [Snakemake](https://snakemake.github.io/), [CWL](https://www.commonwl.org/), or [looper](https://looper.databio.org/), the same PEP would be ready to use with tximeta as long as the samples had `files` attribute defined. This could be done either via a `files` column in the sample table, or by using one of the sample modifiers provided by the PEP framework. The advantages of calling `tximport` within `BiocProject` include: +If we add BiocProject in to the tximeta workflow, then sample metadata from the PEP project specification can be easily plugged in! For example, if a researcher used a PEP to run Salmon to quantify reads across multiple samples with PEP-compatible workflow management engine/job scatterer like [Snakemake](https://snakemake.github.io/), [CWL](https://www.commonwl.org/), or [looper](https://looper.databio.org/), the same PEP would be ready to use with tximeta as long as the samples had the `files` attribute defined. This could be done either via a `files` column in the sample table, or by using one of the sample modifiers provided by the PEP framework. The advantages of calling `tximeta` within `BiocProject` include: - project portability, inherent to projects following PEP specification - single source of metadata from start of the analysis to finish -- all the PEP-defined metadata will be propagated to the output object of the `tximeta` function automatically. It will be accessible from within your R session using the [pepr](http://code.databio.org/pepr/) API, or with `@PEP` in the `metadata` slot of the `SummarizedExperiment` object, just as any other metadata attached to the result by `tximeta` function. @@ -85,7 +85,7 @@ The `Biocproject` + `tximeta` workflow requires a PEP. The example we just downl readFunPath: readTximeta.R ``` -As you can see, this PEP configuration file uses a `$TXIMPORTDATA` environment variable to specify a file path. This is just an optional way to make this PEP work in any computing environment without being changed, so you can share your sample metadata more easily. For this vignette, we need to set the variable to the output directory where our downloaded results are stored: +As you can see, this PEP configuration file uses a `$TXIMPORTDATA` environment variable to specify a file path. This is just an optional way to make this PEP work in any computing environment without being changed, so you can share your sample metadata more easily. (`tximportData` is a Bioconductor data package with various transcript quantification output, for unit testing of Bioconductor software packages.) For this vignette, we need to set the variable to the output directory where our downloaded results are stored: ```r From afc62265241e573cd836b31102839f54681eb0a5 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 5 Nov 2020 08:21:01 -0500 Subject: [PATCH 14/24] fix multi funcArgs issue --- R/functions.R | 4 ++-- R/utils.R | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/R/functions.R b/R/functions.R index 8c2f588..29146b7 100644 --- a/R/functions.R +++ b/R/functions.R @@ -102,11 +102,11 @@ BiocProject = function(file, amendments = NULL, autoLoad = TRUE, func = NULL, if(is.null(cfg)) cfg = pepr::config(p) if(pepr::.checkSection(cfg, c(BIOC_SECTION, FUNCTION_ARGS))){ - args = .unionList(config(p)[[BIOC_SECTION]][[FUNCTION_ARGS]],args) + args = .unionList(config(p)[[BIOC_SECTION]][[FUNCTION_ARGS]], args) argsNames = names(args) project = args[[.findProjectInList(args)]] argsNames = append("",argsNames[-.findProjectInList(args)]) - args = append(list(p), args[[-.findProjectInList(args)]]) + args = append(list(p), args[-.findProjectInList(args)]) names(args) = argsNames } if (!is.null(func)) { diff --git a/R/utils.R b/R/utils.R index 08ac002..979be59 100644 --- a/R/utils.R +++ b/R/utils.R @@ -117,10 +117,7 @@ readSchema = function(path, parent = NULL) { # its index If it is not # present, returns integer(0) .findProjectInList = function(l) { - which(as.logical(lapply(l, - function(x) { - is(x, "Project") - }))) + which(as.logical(lapply(l, function(x) { is(x, "Project") }))) } # internal function that wraps the external function execution From 3ba63f479980d2808729787d9f4492dccec26f5c Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 8 Jan 2021 09:49:28 -0500 Subject: [PATCH 15/24] add path populating functions; follow new schema format --- R/pipeline_interface.R | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index 1f617da..1f47b1b 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -247,3 +247,57 @@ setMethod("getProjectOutputs", } ret }) + +#' Populate values in output schema +#' +#' Populates schema values of type path and thumbnail path in the provided +#' output schema for each sample in the project +#' +#' @param schemaPath path to the schema to populate +#' @param p \code{\link[pepr]{Project-class}} object +#' +#' @return a nested list of length equal to the number of results defined in +#' the schema with populated outputs for each sample within every element +populateSchemaPaths <- function(schemaPath, p) { + ret = list() + schema = yaml::yaml.load_file(schemaPath) + for(i in seq_along(schema)){ + ret[[i]] = list() + for(sn in unlist(p@samples$sample_name)) { + if("value" %in% names(schema[[i]])) { + if(is(schema[[i]][["value"]], "list")) { + ret[[i]][[sn]] = iterateRecursively(schema[[i]][["value"]], p, sn) + } else { + ret[[i]][[sn]] = schema[[i]][["value"]] + } + } + } + } + return(ret) +} + + +#' Recursively populate paths in results of type object +#' +#' @param m mapping to populate paths in +#' @param p \code{\link[pepr]{Project-class}} object +#' @param sn name of the sample +#' +#' @return list woith populate paths +populateRecursively <- function(m, p, sn) { + namesM = names(m) + for(i in seq_along(m)) { + if(is(m[[i]], "list")){ + m[[i]] = populateRecursively(m[[i]], p, sn) + } else{ + if(namesM[i] == "path" || namesM[i] == "thumbnail_path") + m[[i]] = .populateString(string=m[[i]], project=p, sampleName=sn) + } + } + return(m) +} + + + + + From 3a43f43558343829b1ee22c3739071b18ede193c Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 8 Jan 2021 12:05:19 -0500 Subject: [PATCH 16/24] enable project context in path templates populating --- R/pipeline_interface.R | 45 ++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index 1f47b1b..a0cf1e3 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -253,22 +253,38 @@ setMethod("getProjectOutputs", #' Populates schema values of type path and thumbnail path in the provided #' output schema for each sample in the project #' -#' @param schemaPath path to the schema to populate -#' @param p \code{\link[pepr]{Project-class}} object +#' @param schema schema with value templates to populate +#' @param project \code{\link[pepr]{Project-class}} object +#' @param projectContext whether the values for path templates populating +#' should be sourced from the project metadata. Otherwise metadata for +#' each sample is used #' #' @return a nested list of length equal to the number of results defined in -#' the schema with populated outputs for each sample within every element -populateSchemaPaths <- function(schemaPath, p) { +#' the schema with populated outputs for each sample within every element, +#' if projectContext=FALSE. Otherwise a one-level list is returned of length +#' equal to the number of results defined in the schema with populated outputs +populateSchemaPaths <- function(schema, project, projectContext=FALSE) { ret = list() - schema = yaml::yaml.load_file(schemaPath) for(i in seq_along(schema)){ - ret[[i]] = list() - for(sn in unlist(p@samples$sample_name)) { + if(projectContext){ if("value" %in% names(schema[[i]])) { if(is(schema[[i]][["value"]], "list")) { - ret[[i]][[sn]] = iterateRecursively(schema[[i]][["value"]], p, sn) + ret[[i]] = populateRecursively( + schema[[i]][["value"]], project, NULL, TRUE) } else { - ret[[i]][[sn]] = schema[[i]][["value"]] + ret[[i]] = schema[[i]][["value"]] + } + } + } else { + ret[[i]] = list() + for(sn in unlist(project@samples$sample_name)) { + if("value" %in% names(schema[[i]])) { + if(is(schema[[i]][["value"]], "list")) { + ret[[i]][[sn]] = populateRecursively( + schema[[i]][["value"]], project, sn, FALSE) + } else { + ret[[i]][[sn]] = schema[[i]][["value"]] + } } } } @@ -283,15 +299,18 @@ populateSchemaPaths <- function(schemaPath, p) { #' @param p \code{\link[pepr]{Project-class}} object #' @param sn name of the sample #' -#' @return list woith populate paths -populateRecursively <- function(m, p, sn) { +#' @return list with populate paths +populateRecursively <- function(m, p, sn, projectContext=FALSE) { namesM = names(m) + if(projectContext) sn = NULL for(i in seq_along(m)) { if(is(m[[i]], "list")){ - m[[i]] = populateRecursively(m[[i]], p, sn) + m[[i]] = populateRecursively(m[[i]], p, sn, projectContext) } else{ if(namesM[i] == "path" || namesM[i] == "thumbnail_path") - m[[i]] = .populateString(string=m[[i]], project=p, sampleName=sn) + m[[i]] = .populateString( + string=m[[i]], project=p, sampleName=sn, + projectContext=projectContext) } } return(m) From 89b4463b5eb305a30e9a7f1160a4a9575be88e72 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 8 Jan 2021 15:13:24 -0500 Subject: [PATCH 17/24] update user facing schema methods --- R/pipeline_interface.R | 130 +++++++++++++---------------------------- 1 file changed, 42 insertions(+), 88 deletions(-) diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index a0cf1e3..138cfd5 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -105,46 +105,6 @@ setMethod("pipelineInterfacesBySample", return(invisible(NULL)) }) - -#' Get outputs from pipeline defined in an output schema -#' -#' Extracts the output file templates defined for a given pipeline -#' -#' @param pipeline an object of \code{\link[pepr]{Config-class}} -#' @param parent a path to parent folder to use -#' @param projectContext logical indicating whether a only project-level -#' pifaces should be considered. Otherwise, only sample-level ones are. -#' -#' @return named list of output path templates, -#' like: \code{'aligned_{sample.genome}/{sample.sample_name}_sort.bam'} -.getOutputs = function(pipeline, parent, projectContext = FALSE) { - if (!OUTPUT_SCHEMA_SECTION %in% - names(pipeline)) - return(invisible(NULL)) - outputSchema = readSchema(pipeline[[OUTPUT_SCHEMA_SECTION]], parent) - sect = "properties" - if (!projectContext) - sect = SCHEMA_SAMPLE_OUTS - if (!pepr::.checkSection(outputSchema, sect)) { - pipName = ifelse(is.null(pipeline[[PIP_NAME_KEY]]), - "provided", pipeline[[PIP_NAME_KEY]]) - warning("There is no '", - paste(sect, collapse = ":"), - "' section in the ", - pipName, " pipeline output schema.") - return(invisible(NULL)) - } - outputs = outputSchema[[sect]] - if ("samples" %in% names(outputs)) - outputs[["samples"]] = NULL - x = lapply(outputs, function(x) { - return(x[["path"]]) - }) - if (is.null(unlist(lapply(x, is.null)))) - return(invisible(NULL)) - return(x) -} - #' Populates and returns output files for a given sample #' #' Returns the pipeline outputs which are defined in the pipeline interface @@ -193,9 +153,11 @@ setMethod("getOutputsBySample", piface = yaml::yaml.load_file(pifaceSource) if (!.checkPifaceType(piface, "sample")) return(invisible(NULL)) - outputs = .getOutputs(piface, parent = dirname(pifaceSource)) - sampleRet[[piface[[PIP_NAME_KEY]]]] = - .populateTemplates(project, outputs, sampleName) + if(!OUTPUT_SCHEMA_SECTION %in% names(piface)) next + schema = readSchema( + piface[[OUTPUT_SCHEMA_SECTION]], dirname(pifaceSource)) + sampleRet[[piface[[PIP_NAME_KEY]]]] = .populateSchemaPaths( + schema=schema, project=project, sampleName=sampleName) } ret[[sampleName]] = sampleRet } @@ -239,11 +201,11 @@ setMethod("getProjectOutputs", piface = yaml::yaml.load_file(pifaceSource) if (!.checkPifaceType(piface, "project")) return(invisible(NULL)) - outputs = .getOutputs(piface, - parent = dirname(pifaceSource), - projectContext=TRUE) - ret[[piface[[PIP_NAME_KEY]]]] = - .populateTemplates(project, outputs, projectContext=TRUE) + if(!OUTPUT_SCHEMA_SECTION %in% names(piface)) next + schema = readSchema( + piface[[OUTPUT_SCHEMA_SECTION]], dirname(pifaceSource)) + ret[[piface[[PIP_NAME_KEY]]]] = .populateSchemaPaths( + schema, project, NULL, projectContext = TRUE) } ret }) @@ -251,43 +213,34 @@ setMethod("getProjectOutputs", #' Populate values in output schema #' #' Populates schema values of type path and thumbnail path in the provided -#' output schema for each sample in the project +#' output schema for rthe selected sample or project #' #' @param schema schema with value templates to populate #' @param project \code{\link[pepr]{Project-class}} object #' @param projectContext whether the values for path templates populating #' should be sourced from the project metadata. Otherwise metadata for #' each sample is used +#' @param sampleName name of the sample to populate the outputs for. Required +#' if projectContext=FALSE #' -#' @return a nested list of length equal to the number of results defined in -#' the schema with populated outputs for each sample within every element, -#' if projectContext=FALSE. Otherwise a one-level list is returned of length -#' equal to the number of results defined in the schema with populated outputs -populateSchemaPaths <- function(schema, project, projectContext=FALSE) { +#' @return a possibly nested list of length equal to the number of results defined in +#' the schema with populated outputs +.populateSchemaPaths <- function( + schema, project, projectContext=FALSE, sampleName=NULL) { ret = list() + if(!projectContext && is.null(sampleName)) + stop("Must specify sample to populate schema path templates for in no + project context mode") + if(projectContext) sampleName = NULL for(i in seq_along(schema)){ - if(projectContext){ - if("value" %in% names(schema[[i]])) { - if(is(schema[[i]][["value"]], "list")) { - ret[[i]] = populateRecursively( - schema[[i]][["value"]], project, NULL, TRUE) - } else { - ret[[i]] = schema[[i]][["value"]] - } - } - } else { - ret[[i]] = list() - for(sn in unlist(project@samples$sample_name)) { - if("value" %in% names(schema[[i]])) { - if(is(schema[[i]][["value"]], "list")) { - ret[[i]][[sn]] = populateRecursively( - schema[[i]][["value"]], project, sn, FALSE) - } else { - ret[[i]][[sn]] = schema[[i]][["value"]] - } - } + if("value" %in% names(schema[[i]])) { + if(is(schema[[i]][["value"]], "list")) { + ret[[i]] = .populateRecursively( + schema[[i]][["value"]], project, sampleName, projectContext) + } else { + ret[[i]] = schema[[i]][["value"]] } - } + } } return(ret) } @@ -295,25 +248,26 @@ populateSchemaPaths <- function(schema, project, projectContext=FALSE) { #' Recursively populate paths in results of type object #' -#' @param m mapping to populate paths in -#' @param p \code{\link[pepr]{Project-class}} object -#' @param sn name of the sample +#' @param l list to populate paths in +#' @param project \code{\link[pepr]{Project-class}} object +#' @param sampleName name of the sample #' #' @return list with populate paths -populateRecursively <- function(m, p, sn, projectContext=FALSE) { - namesM = names(m) - if(projectContext) sn = NULL - for(i in seq_along(m)) { - if(is(m[[i]], "list")){ - m[[i]] = populateRecursively(m[[i]], p, sn, projectContext) +.populateRecursively <- function(l, project, sampleName, projectContext=FALSE) { + namesL = names(l) + if(projectContext) sampleName = NULL + for(i in seq_along(l)) { + if(is(l[[i]], "list")){ + l[[i]] = .populateRecursively( + l[[i]], project, sampleName, projectContext) } else{ - if(namesM[i] == "path" || namesM[i] == "thumbnail_path") - m[[i]] = .populateString( - string=m[[i]], project=p, sampleName=sn, + if(namesL[i] == "path" || namesL[i] == "thumbnail_path") + l[[i]] = .populateString( + string=l[[i]], project=project, sampleName=sampleName, projectContext=projectContext) } } - return(m) + return(l) } From 14876a6110eca9e51e4fa6e7182ef44c3e5f24fc Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 11 Jan 2021 08:48:06 -0500 Subject: [PATCH 18/24] coerce to char after glueing, docs --- R/pipeline_interface.R | 25 +++++++++++++------------ R/utils.R | 10 ++++++---- man/dot-populateRecursively.Rd | 21 +++++++++++++++++++++ man/dot-populateSchemaPaths.Rd | 33 +++++++++++++++++++++++++++++++++ man/getOutputsBySample.Rd | 7 ++++--- man/getProjectOutputs.Rd | 8 ++++---- 6 files changed, 81 insertions(+), 23 deletions(-) create mode 100644 man/dot-populateRecursively.Rd create mode 100644 man/dot-populateSchemaPaths.Rd diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index 138cfd5..0e7889c 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -107,11 +107,12 @@ setMethod("pipelineInterfacesBySample", #' Populates and returns output files for a given sample #' -#' Returns the pipeline outputs which are defined in the pipeline interface -#' indicated in the \code{\link[pepr]{Project-class}} +#' Returns the sample level pipeline outputs which are defined in the output +#' schema indicated by the pipeline interface indicated in the +#' \code{\link[pepr]{Project-class}} #' #' @param project \code{\link[pepr]{Project-class}} object -#' @param ... other arguemnts +#' @param ... other arguments #' #' @return a list of output file paths. The order of the first level of the #' list corresponds to the order of the pipeline interface files, second level @@ -164,11 +165,11 @@ setMethod("getOutputsBySample", ret }) -#' Populates and returns output files for a -#' given \code{\link[pepr]{Project-class}} +#' Populates and returns outputs for a given \code{\link[pepr]{Project-class}} #' -#' Returns the pipeline outputs which are defined in the pipeline interface -#' indicated in the \code{\link[pepr]{Project-class}} +#' Returns the project level pipeline outputs which are defined in the output +#' schema indicated by the pipeline interface indicated in the +#' \code{\link[pepr]{Project-class}} #' #' @param project \code{\link[pepr]{Project-class}} object #' @@ -205,7 +206,7 @@ setMethod("getProjectOutputs", schema = readSchema( piface[[OUTPUT_SCHEMA_SECTION]], dirname(pifaceSource)) ret[[piface[[PIP_NAME_KEY]]]] = .populateSchemaPaths( - schema, project, NULL, projectContext = TRUE) + schema, project, NULL, projectContext=TRUE) } ret }) @@ -213,15 +214,15 @@ setMethod("getProjectOutputs", #' Populate values in output schema #' #' Populates schema values of type path and thumbnail path in the provided -#' output schema for rthe selected sample or project +#' output schema for the selected sample or project #' #' @param schema schema with value templates to populate #' @param project \code{\link[pepr]{Project-class}} object #' @param projectContext whether the values for path templates populating #' should be sourced from the project metadata. Otherwise metadata for -#' each sample is used +#' a selected sample is used #' @param sampleName name of the sample to populate the outputs for. Required -#' if projectContext=FALSE +#' if \code{projectContext} set to \code{FALSE} #' #' @return a possibly nested list of length equal to the number of results defined in #' the schema with populated outputs @@ -250,7 +251,7 @@ setMethod("getProjectOutputs", #' #' @param l list to populate paths in #' @param project \code{\link[pepr]{Project-class}} object -#' @param sampleName name of the sample +#' @param sampleName name of the sample to populate the outputs for #' #' @return list with populate paths .populateRecursively <- function(l, project, sampleName, projectContext=FALSE) { diff --git a/R/utils.R b/R/utils.R index 979be59..d541014 100644 --- a/R/utils.R +++ b/R/utils.R @@ -398,13 +398,15 @@ readSchema = function(path, parent = NULL) { # object to allow attribute # accession. samplesSubset = subset(sampleTable(project), sample_name == sampleName) - if (!projectContext && NROW(samplesSubset) < 1) - return(invisible(NULL)) + if (!projectContext && NROW(samplesSubset) < 1) return(invisible(NULL)) if (projectContext) { - populatedStrings = with(config(project), glue(.pyToR(string))) + populatedStrings = with( + config(project), as.character(glue(.pyToR(string)))) } else { populatedStrings = as.character(apply( - samplesSubset, 1, function(s) { with(s, glue(.pyToR(string)))})) + samplesSubset, 1, function(s) { + with(s, as.character(glue(.pyToR(string)))) + })) } if (!projectContext && length(populatedStrings) != NROW(samplesSubset)) { diff --git a/man/dot-populateRecursively.Rd b/man/dot-populateRecursively.Rd new file mode 100644 index 0000000..7b83852 --- /dev/null +++ b/man/dot-populateRecursively.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{.populateRecursively} +\alias{.populateRecursively} +\title{Recursively populate paths in results of type object} +\usage{ +.populateRecursively(l, project, sampleName, projectContext = FALSE) +} +\arguments{ +\item{l}{list to populate paths in} + +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{sampleName}{name of the sample to populate the outputs for} +} +\value{ +list with populate paths +} +\description{ +Recursively populate paths in results of type object +} diff --git a/man/dot-populateSchemaPaths.Rd b/man/dot-populateSchemaPaths.Rd new file mode 100644 index 0000000..588878f --- /dev/null +++ b/man/dot-populateSchemaPaths.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipeline_interface.R +\name{.populateSchemaPaths} +\alias{.populateSchemaPaths} +\title{Populate values in output schema} +\usage{ +.populateSchemaPaths( + schema, + project, + projectContext = FALSE, + sampleName = NULL +) +} +\arguments{ +\item{schema}{schema with value templates to populate} + +\item{project}{\code{\link[pepr]{Project-class}} object} + +\item{projectContext}{whether the values for path templates populating +should be sourced from the project metadata. Otherwise metadata for +a selected sample is used} + +\item{sampleName}{name of the sample to populate the outputs for. Required +if \code{projectContext} set to \code{FALSE}} +} +\value{ +a possibly nested list of length equal to the number of results defined in +the schema with populated outputs +} +\description{ +Populates schema values of type path and thumbnail path in the provided +output schema for the selected sample or project +} diff --git a/man/getOutputsBySample.Rd b/man/getOutputsBySample.Rd index ee62f24..81989bf 100644 --- a/man/getOutputsBySample.Rd +++ b/man/getOutputsBySample.Rd @@ -12,7 +12,7 @@ getOutputsBySample(project, ...) \arguments{ \item{project}{\code{\link[pepr]{Project-class}} object} -\item{...}{other arguemnts} +\item{...}{other arguments} \item{sampleNames}{names of the samples} } @@ -22,8 +22,9 @@ list corresponds to the order of the pipeline interface files, second level is a named list of file paths populated by the samples } \description{ -Returns the pipeline outputs which are defined in the pipeline interface -indicated in the \code{\link[pepr]{Project-class}} +Returns the sample level pipeline outputs which are defined in the output +schema indicated by the pipeline interface indicated in the +\code{\link[pepr]{Project-class}} } \section{Methods (by class)}{ \itemize{ diff --git a/man/getProjectOutputs.Rd b/man/getProjectOutputs.Rd index 25c9623..f070bb5 100644 --- a/man/getProjectOutputs.Rd +++ b/man/getProjectOutputs.Rd @@ -3,8 +3,7 @@ \name{getProjectOutputs} \alias{getProjectOutputs} \alias{getProjectOutputs,Project-method} -\title{Populates and returns output files for a - given \code{\link[pepr]{Project-class}}} +\title{Populates and returns outputs for a given \code{\link[pepr]{Project-class}}} \usage{ getProjectOutputs(project) @@ -20,8 +19,9 @@ is a named list of file paths populated by the \code{\link[pepr]{Project-class}} } \description{ -Returns the pipeline outputs which are defined in the pipeline interface -indicated in the \code{\link[pepr]{Project-class}} +Returns the project level pipeline outputs which are defined in the output +schema indicated by the pipeline interface indicated in the +\code{\link[pepr]{Project-class}} } \section{Methods (by class)}{ \itemize{ From 928754c73b7c3243d39c258c862673a0fd6d9775 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 11 Jan 2021 08:55:30 -0500 Subject: [PATCH 19/24] preserve output IDs in outputs list --- R/pipeline_interface.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index 0e7889c..3d10ac3 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -243,6 +243,7 @@ setMethod("getProjectOutputs", } } } + names(ret) = names(schema) return(ret) } From b8471e259169bb2bf451ee7ed4939f053f3b71a2 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 12 Jan 2021 13:25:31 -0500 Subject: [PATCH 20/24] store path-like keys in const --- R/constants.R | 1 + R/pipeline_interface.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/R/constants.R b/R/constants.R index c4341af..556c2fc 100644 --- a/R/constants.R +++ b/R/constants.R @@ -17,4 +17,5 @@ LOOPER_SECTION = "looper" PIP_IFACE_NAME = "pipeline_interfaces" PIP_IFACE_KEY = "pipeline_interfaces_key" SCHEMA_SAMPLE_OUTS = c("properties", "samples", "items", "properties") +PATH_LIKE_KEYS = c("path", "thumbnail_path") PIP_IFACE_SECTION = c(LOOPER_SECTION, PIP_IFACE_NAME) \ No newline at end of file diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index 3d10ac3..dadc4b3 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -263,7 +263,7 @@ setMethod("getProjectOutputs", l[[i]] = .populateRecursively( l[[i]], project, sampleName, projectContext) } else{ - if(namesL[i] == "path" || namesL[i] == "thumbnail_path") + if(namesL[i] %in% PATH_LIKE_KEYS) l[[i]] = .populateString( string=l[[i]], project=project, sampleName=sampleName, projectContext=projectContext) From f010a86360e038f995172ee9a76e5e200a565f08 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 12 Jan 2021 13:53:27 -0500 Subject: [PATCH 21/24] add/update example package data --- .../example_BiocProject/readBedFiles_resize.R | 3 +- .../example_piface/output_schema.yaml | 27 ----------- .../example_piface/output_schema_project.yaml | 45 +++++++++++++++++++ .../example_piface/output_schema_sample.yaml | 44 ++++++++++++++++++ .../pipeline_interface1_project.yaml | 2 +- .../pipeline_interface1_sample.yaml | 2 +- .../pipeline_interface2_project.yaml | 2 +- .../pipeline_interface2_sample.yaml | 2 +- .../example_piface/project_config.yaml | 2 + 9 files changed, 97 insertions(+), 32 deletions(-) delete mode 100644 inst/extdata/example_peps-master/example_piface/output_schema.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/output_schema_project.yaml create mode 100644 inst/extdata/example_peps-master/example_piface/output_schema_sample.yaml diff --git a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R index 722888a..a57d55f 100644 --- a/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R +++ b/inst/extdata/example_peps-master/example_BiocProject/readBedFiles_resize.R @@ -1,5 +1,6 @@ -readBedFiles_resize = function(project, resize.width) { +readBedFiles_resize = function(project, resize.width, test.arg) { cwd = getwd() + print(paste0("this is the test arg: ", test.arg)) paths = pepr::sampleTable(project)$file_path sampleNames = pepr::sampleTable(project)$sample_name setwd(dirname(project@file)) diff --git a/inst/extdata/example_peps-master/example_piface/output_schema.yaml b/inst/extdata/example_peps-master/example_piface/output_schema.yaml deleted file mode 100644 index 8f3bde0..0000000 --- a/inst/extdata/example_peps-master/example_piface/output_schema.yaml +++ /dev/null @@ -1,27 +0,0 @@ -description: Sample objects produced by test pipeline. -properties: - samples: - type: array - items: - type: object - properties: - test_property: - type: string - description: "Test sample property" - path: "~/sample/{sample_name}_file.txt" - test_property1: - type: string - description: "Test sample property" - path: "~/sample/{sample_name}_file1.txt" - test_property: - type: image - title: "Test title" - description: "Test project property" - thumbnail_path: "~/test_{name}.png" - path: "~/test_{name}.pdf" - test_property1: - type: image - title: "Test title1" - description: "Test project property1" - thumbnail_path: "~/test_{name}.png" - path: "~/test_{name}1.pdf" diff --git a/inst/extdata/example_peps-master/example_piface/output_schema_project.yaml b/inst/extdata/example_peps-master/example_piface/output_schema_project.yaml new file mode 100644 index 0000000..e619944 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/output_schema_project.yaml @@ -0,0 +1,45 @@ +output_file_in_object: + type: object + value: { + "prop2": {"path": "{attribute1}_{attribute2}_test1.pdf", "thumbnail_path": "{attribute1}_{attribute2}_test1.png", "title": "test title"}, + "prop1": {"path": "{attribute1}_{attribute2}_test1.pdf", "title": "test title"} + } + properties: + prop1: + type: file + prop2: + type: image + description: "Object output" +number_of_things: + type: integer + value: 1 + description: "Number of things" +percentage_of_things: + type: number + description: "Percentage of things" +name_of_something: + type: string + value: "my name" + description: "Name of something" +switch_value: + type: boolean + value: True + description: "Is the switch on of off" +collection_of_things: + type: array + value: [1, 2, 3] + description: "This store collection of values" +output_object_missing_value: + type: object + description: "Object output" +output_file: + type: file + description: "This a path to the output file" +output_file1: + type: file + value: {"path": "{attribute1}_{attribute2}_output_file1.pdf", "title": "test title"} + description: "This a path to the output file" +output_image: + type: image + value: {"path": "test2.pdf", "thumbnail_path": "test2.png", "title": "test title"} + description: "This a path to the output image" diff --git a/inst/extdata/example_peps-master/example_piface/output_schema_sample.yaml b/inst/extdata/example_peps-master/example_piface/output_schema_sample.yaml new file mode 100644 index 0000000..0495a86 --- /dev/null +++ b/inst/extdata/example_peps-master/example_piface/output_schema_sample.yaml @@ -0,0 +1,44 @@ +output_file_in_object: + type: object + value: { + "prop2": {"path": "{protocol}_{SRR}_test1.pdf", "thumbnail_path": "{protocol}_{SRR}_test1.png", "title": "test title"}, + "prop1": {"path": "{protocol}_{SRR}_test1.pdf", "title": "test title"} + } + properties: + prop1: + type: file + prop2: + type: image + description: "Object output" +number_of_things: + type: integer + value: 1 + description: "Number of things" +percentage_of_things: + type: number + description: "Percentage of things" +name_of_something: + type: string + description: "Name of something" +switch_value: + type: boolean + value: True + description: "Is the switch on of off" +collection_of_things: + type: array + value: [1, 2, 3] + description: "This store collection of values" +output_object_missing_value: + type: object + description: "Object output" +output_file: + type: file + description: "This a path to the output file" +output_file1: + type: file + value: {"path": "{protocol}_{SRR}_output_file1.pdf", "title": "test title"} + description: "This a path to the output file" +output_image: + type: image + value: {"path": "test2.pdf", "thumbnail_path": "test2.png", "title": "test title"} + description: "This a path to the output image" diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml index 01bfacd..61c9317 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_project.yaml @@ -2,7 +2,7 @@ pipeline_name: PIPELINE1 pipeline_type: project var_templates: path: "{looper.piface_dir}/pipelines/proj_pipeline1.py" -output_schema: output_schema.yaml +output_schema: output_schema_project.yaml command_template: > {pipeline.var_templates.path} --project-name {project.name} diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml index a6a7bbe..aca6181 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface1_sample.yaml @@ -3,7 +3,7 @@ pipeline_type: sample var_templates: path: "{looper.piface_dir}/pipelines/pipeline1.py" input_schema: https://schema.databio.org/pep/2.0.0.yaml -output_schema: output_schema.yaml +output_schema: output_schema_sample.yaml command_template: > {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml index 2f10ce4..b3f619c 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_project.yaml @@ -2,7 +2,7 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: project var_templates: path: "{looper.piface_dir}/pipelines/proj_pipeline2.py" -output_schema: output_schema.yaml +output_schema: output_schema_project.yaml command_template: > {pipeline.var_templates.path} --project-name {project.name} compute: diff --git a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml index 178d1d0..9da96ea 100644 --- a/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml +++ b/inst/extdata/example_peps-master/example_piface/pipeline_interface2_sample.yaml @@ -2,7 +2,7 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: sample var_templates: path: "{looper.piface_dir}/pipelines/other_pipeline2.py" -output_schema: output_schema.yaml +output_schema: output_schema_sample.yaml command_template: > {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} compute: diff --git a/inst/extdata/example_peps-master/example_piface/project_config.yaml b/inst/extdata/example_peps-master/example_piface/project_config.yaml index a47098a..9ffc983 100644 --- a/inst/extdata/example_peps-master/example_piface/project_config.yaml +++ b/inst/extdata/example_peps-master/example_piface/project_config.yaml @@ -1,5 +1,7 @@ pep_version: "2.0.0" name: test +attribute1: value1 +attribute2: value2 sample_table: annotation_sheet.csv looper: From d88246c939cd21e75d4721f3660faafc53b30f14 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 12 Jan 2021 13:54:47 -0500 Subject: [PATCH 22/24] update piface vignette --- vignettes/vignette5piface.Rmd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vignettes/vignette5piface.Rmd b/vignettes/vignette5piface.Rmd index 95f9ac8..af927dd 100644 --- a/vignettes/vignette5piface.Rmd +++ b/vignettes/vignette5piface.Rmd @@ -25,7 +25,7 @@ sections: * `pipeline_name` - A string identifying the pipeline, * `pipeline_type` - A string indicating a pipeline type: "sample" (for run) or "project" (for runp), -* `command_template`- A Jinja2 template used to construct a pipeline command to run. +* `command_template`- A [Jinja2](https://jinja.palletsprojects.com/en/2.11.x/) template used to construct a pipeline command to run. Follow the pipeline interface [specification](http://looper.databio.org/en/latest/pipeline-interface-specification/) to learn more about all the features that `looper` provides via that file. @@ -76,6 +76,8 @@ schemaPath = file.path(dirname(pifaceSource), piface$output_schema) .printNestedList(yaml::read_yaml(schemaPath)) ``` +The output schema has to follow the [pipestat schema specification](http://pipestat.databio.org/en/latest/pipestat_specification/#pipestat-schema). Additionally, if the schema lists any path-like outputs, i.e. type `file` or `image` it is beneficial to add a `values` section that include path templates to populate. + Check out the [looper documentation on output schema](http://looper.databio.org/en/latest/pipeline-interface-specification/#output_schema) to learn more. ### Sample-level From 0a8643b035ca75d5e268685088e32d59098263ad Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 3 Jun 2021 15:41:59 -0400 Subject: [PATCH 23/24] add docs deployment gh action --- .github/workflows/pkgdown.yml | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 .github/workflows/pkgdown.yml diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml new file mode 100644 index 0000000..7fbed63 --- /dev/null +++ b/.github/workflows/pkgdown.yml @@ -0,0 +1,48 @@ +on: + push: + branches: + - dev + - master + +name: Deploy-package-documentation + +jobs: + pkgdown: + runs-on: macos-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v1 + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Restore R package cache + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + install.packages("pkgdown", type = "binary") + shell: Rscript {0} + + - name: Install package + run: R CMD INSTALL . + + - name: Deploy package documentation + run: | + git config --local user.email "actions@github.com" + git config --local user.name "GitHub Actions" + Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE, commit_message="deploy docs with a github action")' \ No newline at end of file From da0922bc877cbdf29402f77d991d58c26977a670 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 4 Jun 2021 12:00:57 -0400 Subject: [PATCH 24/24] fix docs --- R/pipeline_interface.R | 1 + man/dot-getOutputs.Rd | 23 ----------------------- man/dot-populateRecursively.Rd | 2 ++ vignettes/vignette5piface.Rmd | 2 +- 4 files changed, 4 insertions(+), 24 deletions(-) delete mode 100644 man/dot-getOutputs.Rd diff --git a/R/pipeline_interface.R b/R/pipeline_interface.R index dadc4b3..475b409 100644 --- a/R/pipeline_interface.R +++ b/R/pipeline_interface.R @@ -253,6 +253,7 @@ setMethod("getProjectOutputs", #' @param l list to populate paths in #' @param project \code{\link[pepr]{Project-class}} object #' @param sampleName name of the sample to populate the outputs for +#' @param projectContext a logical indicating whether project level attributes should be used #' #' @return list with populate paths .populateRecursively <- function(l, project, sampleName, projectContext=FALSE) { diff --git a/man/dot-getOutputs.Rd b/man/dot-getOutputs.Rd deleted file mode 100644 index 1671d7c..0000000 --- a/man/dot-getOutputs.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pipeline_interface.R -\name{.getOutputs} -\alias{.getOutputs} -\title{Get outputs from pipeline defined in an output schema} -\usage{ -.getOutputs(pipeline, parent, projectContext = FALSE) -} -\arguments{ -\item{pipeline}{an object of \code{\link[pepr]{Config-class}}} - -\item{parent}{a path to parent folder to use} - -\item{projectContext}{logical indicating whether a only project-level -pifaces should be considered. Otherwise, only sample-level ones are.} -} -\value{ -named list of output path templates, -like: \code{'aligned_{sample.genome}/{sample.sample_name}_sort.bam'} -} -\description{ -Extracts the output file templates defined for a given pipeline -} diff --git a/man/dot-populateRecursively.Rd b/man/dot-populateRecursively.Rd index 7b83852..1ff55a2 100644 --- a/man/dot-populateRecursively.Rd +++ b/man/dot-populateRecursively.Rd @@ -12,6 +12,8 @@ \item{project}{\code{\link[pepr]{Project-class}} object} \item{sampleName}{name of the sample to populate the outputs for} + +\item{projectContext}{a logical indicating whether project level attributes should be used} } \value{ list with populate paths diff --git a/vignettes/vignette5piface.Rmd b/vignettes/vignette5piface.Rmd index af927dd..ef0f493 100644 --- a/vignettes/vignette5piface.Rmd +++ b/vignettes/vignette5piface.Rmd @@ -59,7 +59,7 @@ configFile = system.file( "project_config.yaml", package = "BiocProject" ) -p=pepr::Project(configFile) +p=pepr::Project(file = configFile) .printNestedList(yaml::read_yaml(pipelineInterfacesBySample(p)[[1]][1])) ```