diff --git a/_viash.yaml b/_viash.yaml index 8fa3afb..92c9f4b 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,4 +1,4 @@ -viash_version: 0.9.3 +viash_version: 0.9.5-rc1 source: src target: target @@ -16,9 +16,9 @@ repositories: type: github tag: 2.1.2 - name: openpipeline_incubator - repo: openpipelines-bio/openpipeline_incubator - type: github - tag: main + repo: openpipeline_incubator + type: vsh + tag: build/main info: test_resources: @@ -27,5 +27,5 @@ info: dest: resources_test config_mods: | - .resources += {path: '/src/labels.config', dest: 'nextflow_labels.config'} + .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'} .runners[.type == 'nextflow'].config.script := 'includeConfig("nextflow_labels.config")' \ No newline at end of file diff --git a/src/authors/dries_schaumont.yaml b/src/authors/dries_schaumont.yaml new file mode 100644 index 0000000..fc28b84 --- /dev/null +++ b/src/authors/dries_schaumont.yaml @@ -0,0 +1,12 @@ +name: Dries Schaumont +info: + role: Core Team Member + links: + email: dries@data-intuitive.com + github: DriesSchaumont + orcid: "0000-0002-4389-0440" + linkedin: dries-schaumont + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Data Scientist \ No newline at end of file diff --git a/src/workflows/qc/spatial_ingestion_qc_report/config.vsh.yaml b/src/workflows/qc/spatial_ingestion_qc_report/config.vsh.yaml new file mode 100644 index 0000000..e87c325 --- /dev/null +++ b/src/workflows/qc/spatial_ingestion_qc_report/config.vsh.yaml @@ -0,0 +1,96 @@ +name: "spatial_ingestion_qc_report" +namespace: "workflows/qc" +description: Generate an interactive, stand-alone ingestion QC report for xenium data. +authors: + - __merge__: /src/authors/jakub_majercik.yaml + roles: [author] + - __merge__: /src/authors/dorien_roosen.yaml + roles: [author] + - __merge__: /src/authors/robrecht_cannoodt.yaml + roles: [author] + - __merge__: /src/authors/weiwei_schultz.yaml + roles: [contributor] +argument_groups: + - name: Inputs + arguments: + - name: --id + type: string + required: false + direction: input + description: | + The sample IDs to include in the report. If not provided, + the sample IDs will be extracted from the h5mu files. + example: sample1 + - name: --input + type: file + required: true + direction: input + description: The input h5mu files. + example: path/to/file1.h5mu + - name: --sample_metadata + type: file + required: false + direction: input + description: | + The sample metadata file corresponding to .obs fields in the h5mu input files, to be used for grouping in the report. + example: path/to/file.csv + - name: --max_samples_per_report + type: integer + default: 20 + description: | + The maximum number of samples to be included per report. + Multiple reports will be generated (with samples equally divided over all reports) if number of input samples exceeds this threshold. + - name: Options + arguments: + - name: "--var_gene_names" + example: "gene_symbol" + type: string + description: | + The column name in the .var h5mu files that contains the gene names. If not provided, .var_names will be used. + - name: --obs_metadata + type: string + multiple: true + description: The metadata keys in the h5mu .obs to include in the report. + example: [donor_id, cell_type, batch, condition] + - name: QC options + arguments: + - name: "--var_name_mitochondrial_genes" + type: string + required: false + default: "mitochondrial" + description: | + In which .var slot to store a boolean array corresponding the mitochondrial genes. + - name: "--var_name_ribosomal_genes" + type: string + required: false + default: "ribosomal" + description: | + In which .var slot to store a boolean array corresponding the ribosomal genes. + - name: Outputs + arguments: + - name: --output_qc_report + type: file + required: true + multiple: true + direction: output + description: The output HTML report + example: path/to/file.html + - name: --output_processed_h5mu + type: file + required: true + direction: output + description: Folder containing the processed h5mu files. + default: qc_h5mu + +resources: + - type: nextflow_script + entrypoint: run_wf + path: main.nf + +dependencies: + - name: ingestion_qc/generate_report + alias: spatial_qc_report + repository: openpipeline_incubator + +runners: + - type: nextflow diff --git a/src/workflows/qc/spatial_ingestion_qc_report/main.nf b/src/workflows/qc/spatial_ingestion_qc_report/main.nf new file mode 100644 index 0000000..1065fd6 --- /dev/null +++ b/src/workflows/qc/spatial_ingestion_qc_report/main.nf @@ -0,0 +1,48 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | map { id, state -> + def new_state = [ + state.id, + state + ["_meta": ["join_id": id]] + ] + new_state + } + | spatial_qc_report.run( + fromState: { id, state -> [ + "id": id, + "input": state.input, + "sample_metadata": state.sample_metadata, + "max_samples_per_report": state.max_samples_per_report, + "var_gene_names": state.var_gene_names, + "obs_metadata": state.obs_metadata, + "var_name_mitochondrial_genes": state.var_name_mitochondrial_genes, + "var_name_ribosomal_genes": state.var_name_ribosomal_genes, + "output_processed_h5mu": state.output_processed_h5mu, + "output_qc_report": state.output_qc_report + + ]}, + args: [ + "ingestion_method": "xenium", + "run_cellbender": false + ], + toState: {id, output, state -> [ + "output_processed_h5mu": output.output_processed_h5mu, + "output_qc_report": output.output_qc_report + ]} + ) + + | setState( + [ + "_meta": "_meta", + "output_processed_h5mu": "output_processed_h5mu", + "output_qc_report": "output_qc_report" + ] + ) + + emit: + output_ch +} \ No newline at end of file diff --git a/src/workflows/qc/spatial_ingestion_qc_report/nextflow.config b/src/workflows/qc/spatial_ingestion_qc_report/nextflow.config new file mode 100644 index 0000000..19f417c --- /dev/null +++ b/src/workflows/qc/spatial_ingestion_qc_report/nextflow.config @@ -0,0 +1,10 @@ +manifest { + nextflowVersion = '!>=20.12.1-edge' +} + +params { + rootDir = java.nio.file.Paths.get("$projectDir/../../").toAbsolutePath().normalize().toString() +} + +// include common settings +includeConfig("${params.rootDir}/src/workflows/utils/labels.config") \ No newline at end of file diff --git a/src/workflows/qc/spatial_ingestion_qc_report/test.sh b/src/workflows/qc/spatial_ingestion_qc_report/test.sh new file mode 100755 index 0000000..092f5ad --- /dev/null +++ b/src/workflows/qc/spatial_ingestion_qc_report/test.sh @@ -0,0 +1,16 @@ +aws s3 sync s3://openpipelines-bio/openpipeline_spatial/resources_test/xenium resources_test/xenium + +viash ns build -q spatial_ingestion_qc_report --setup cb + +nextflow run . \ +-resume \ +-profile docker \ +-c src/workflows/utils/labels_ci.config \ +-main-script target/nextflow/workflows/qc/spatial_ingestion_qc_report/main.nf \ +--input resources_test/xenium/xenium_tiny.h5mu \ +--ingestion_method xenium \ +--output_processed_h5mu test.h5mu \ +--output_qc_report test.html \ +--var_name_mitochondrial_genes mitochondrial \ +--var_name_ribosomal_genes ribosomal \ +--publish_dir test \ No newline at end of file diff --git a/src/labels.config b/src/workflows/utils/labels.config similarity index 100% rename from src/labels.config rename to src/workflows/utils/labels.config diff --git a/src/workflows/utils/labels_ci.config b/src/workflows/utils/labels_ci.config new file mode 100644 index 0000000..7a1c7d3 --- /dev/null +++ b/src/workflows/utils/labels_ci.config @@ -0,0 +1,105 @@ +process { + withLabel: lowmem { memory = 13.Gb } + withLabel: lowcpu { cpus = 4 } + withLabel: midmem { memory = 13.Gb } + withLabel: midcpu { cpus = 4 } + withLabel: highmem { memory = 13.Gb } + withLabel: highcpu { cpus = 4 } + withLabel: veryhighmem { memory = 13.Gb } + withLabel: lowdisk { + disk = {process.disk ? process.disk : null} + } + withLabel: middisk { + disk = {process.disk ? process.disk : null} + } + withLabel: highdisk { + disk = {process.disk ? process.disk : null} + } + withLabel: veryhighdisk { + disk = {process.disk ? process.disk : null} + } +} + +env.NUMBA_CACHE_DIR = '/tmp' + +trace { + enabled = true + overwrite = true +} +dag { + overwrite = true +} + +process.maxForks = 1 + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +}