Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`

- [#905](https://github.com/nf-core/mag/pull/905) - Add nf-test snapshot for `test_assembly_input` profile (by @dialvarezs)
- [#908](https://github.com/nf-core/mag/pull/908) - Add nf-test snapshot for `test_single_end` profile (by @dialvarezs)
- [#930](https://github.com/nf-core/mag/pull/930) - Add binner SemiBin2 (by @d4straub)

### `Changed`
Expand Down Expand Up @@ -37,6 +38,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Deprecated`

- [#908](https://github.com/nf-core/mag/pull/908) - Removed local `quast_bins_summary` in favor of `csvtk/concat` (by @dialvarezs)

## v5.2.0 - Puce Pangolin [2025-11-07]

### `Added`
Expand Down
21 changes: 5 additions & 16 deletions bin/summary_gtdbtk.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,7 @@ def parse_args(args=None):
def main(args=None):
args = parse_args(args)

if (
not args.summaries
and not args.filtered_bins
and not args.failed_bins
and not args.qc_discarded_bins
):
if not args.summaries and not args.filtered_bins and not args.failed_bins and not args.qc_discarded_bins:
sys.exit(
"Either --summaries, --filtered_bins, --failed_bins or --qc_discarded_bins must be specified!"
)
Expand Down Expand Up @@ -133,9 +128,7 @@ def main(args=None):
for file in args.summaries:
df_summary = pd.read_csv(file, sep="\t")[columns]
# add by GTDB-Tk substracted file extension again to bin names (at least until changed consistently in rest of pipeline)
df_summary["user_genome"] = (
df_summary["user_genome"].astype(str) + "." + args.extension
)
df_summary["user_genome"] = df_summary["user_genome"].astype(str) + "." + args.extension
df_summary.set_index("user_genome", inplace=True)
df_final = df_final.append(df_summary, verify_integrity=True)

Expand Down Expand Up @@ -171,9 +164,7 @@ def main(args=None):
filtered.append(bin_results)

df_filtered = pd.DataFrame(filtered, columns=columns)
df_filtered["user_genome"] = (
df_filtered["user_genome"].astype(str) + "." + args.extension
)
df_filtered["user_genome"] = df_filtered["user_genome"].astype(str) + "." + args.extension
df_filtered.set_index("user_genome", inplace=True)
df_final = df_final.append(df_filtered, verify_integrity=True)

Expand Down Expand Up @@ -209,14 +200,12 @@ def main(args=None):
failed.append(bin_results)

df_failed = pd.DataFrame(failed, columns=columns)
df_failed["user_genome"] = (
df_failed["user_genome"].astype(str) + "." + args.extension
)
df_failed["user_genome"] = df_failed["user_genome"].astype(str) + "." + args.extension
df_failed.set_index("user_genome", inplace=True)
df_final = df_final.append(df_failed, verify_integrity=True)

# write output
df_final.reset_index().rename(columns={"index": "user_genome"}).to_csv(
df_final.reset_index().rename(columns={"index": "user_genome"}).sort_values("user_genome").to_csv(
args.out, sep="\t", index=False
)

Expand Down
4 changes: 3 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ process {
publishDir = [path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}

withName: 'QUAST_BINS|QUAST_BINS_SUMMARY' {
withName: 'QUAST_BINS|CONCAT_QUAST_SUMMARY' {
publishDir = [
path: { "${params.outdir}/GenomeBinning/QC" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -715,6 +715,7 @@ process {
publishDir = [
path: { "${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

Expand All @@ -724,6 +725,7 @@ process {
publishDir = [
path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

Expand Down
2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ For each bin or refined bin the median sequencing depth is computed based on the
- `predicted_genes/barrnap.log`: Barrnap log file (ribosomal RNA predictor)
- `GenomeBinning/QC/`
- `[assembler]-[binner]-[domain]-[refinement]-[sample/group]-quast_summary.tsv`: QUAST output summarized per sample/condition.
- `quast_summary.tsv`: QUAST output for all bins summarized
- `quast_bin_summary.tsv`: QUAST output for all bins summarized

</details>

Expand Down
12 changes: 0 additions & 12 deletions modules/local/quast_bins_summary/environment.yml

This file was deleted.

32 changes: 0 additions & 32 deletions modules/local/quast_bins_summary/main.nf

This file was deleted.

6 changes: 6 additions & 0 deletions subworkflows/local/bin_qc/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ workflow BIN_QC {
.filter { meta, _bins ->
meta.domain != "eukarya"
}
.map { meta, bins ->
[meta, bins.sort { a, b -> a.getBaseName() <=> b.getBaseName() }]
}
.multiMap { meta, fa ->
reads: [meta, fa]
ext: fa.extension.unique().join("")
Expand All @@ -152,6 +155,9 @@ workflow BIN_QC {
ch_checkm_summaries = CHECKM_QA.out.output
.map { _meta, summary -> [[id: 'checkm'], summary] }
.groupTuple()
.map { meta, summaries ->
[meta, summaries.sort { a, b -> a.getBaseName() <=> b.getBaseName() }]
}
ch_multiqc_files = ch_multiqc_files.mix(
CHECKM_QA.out.output.map { _meta, summary -> summary }.flatten()
)
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/binning/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ workflow BINNING {
// SemiBin2
if (!params.skip_semibin) {
ch_semibin_input = ch_assemblies
.map { meta, assembly, bams, bais ->
.map { meta, assembly, bams, _bais ->
def meta_new = meta + [binner: 'SemiBin2'] + [sample_count: bams.size()]
[meta_new, assembly, bams]
}
Expand Down
1 change: 1 addition & 0 deletions subworkflows/local/catpack/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ workflow CATPACK {
name: 'bat_summary.tsv',
storeDir: "${params.outdir}/Taxonomy/CAT/",
keepHeader: true,
sort: 'deep',
)

if (!params.cat_allow_unofficial_lineages) {
Expand Down
13 changes: 13 additions & 0 deletions tests/.nftignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
.DS_Store
Ancient_DNA/pydamage/analyze/*/*.csv
Ancient_DNA/variant_calling/*/*.vcf.gz
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No string check for these? Or you could use https://github.com/seppinho/nft-vcf

Annotation/Prokka/**/*.{log,err,gbk,sqn}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No addition checks for gbk/sqn? These are common downstream files, so might be good to validate somehow

Annotation/Prokka/**/*.tmp.*
Assembly/MEGAHIT/*.log
Assembly/MEGAHIT/QC/*/*.bowtie2.log
Assembly/MEGAHIT/QC/*/QUAST/*.{pdf,html,log}
Assembly/MEGAHIT/QC/*/QUAST/**/*.{pdf,html}
GenomeBinning/CONCOCT/stats/*_{original,PCA_components,PCA_transformed}_data_gt1000.csv
GenomeBinning/CONCOCT/stats/*_log.txt
GenomeBinning/CONCOCT/stats/*.tsv
Expand All @@ -17,8 +23,13 @@ GenomeBinning/QC/BUSCO/**/*.log
GenomeBinning/QC/BUSCO/**/*.txt
GenomeBinning/QC/BUSCO/**/short_summary*.{txt,json}
GenomeBinning/QC/BUSCO/*/*{-busco.log,-busco.batch_summary.txt}
GenomeBinning/QC/CheckM/*/{checkm.log,lineage.ms}
GenomeBinning/QC/CheckM/*/bins/*/hmmer.{analyze,tree}.txt
GenomeBinning/QC/CheckM/*/storage/*
GenomeBinning/QC/CheckM2/**/DIAMOND_RESULTS.tsv
GenomeBinning/QC/CheckM2/*/checkm2.log
GenomeBinning/QC/QUAST/*/*.{pdf,html,log}
GenomeBinning/QC/QUAST/*/**/*.{pdf,html}
multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt
multiqc/multiqc_data/llms-full.txt
multiqc/multiqc_data/multiqc_data.json
Expand All @@ -33,3 +44,5 @@ multiqc/multiqc_report.html
pipeline_info/*.{html,json,txt,yml}
QC_shortreads/fastqc/*_fastqc.{html,zip}
QC_shortreads/remove_phix/*.log
Taxonomy/CAT/**/*.log
VirusIdentification/geNomad/**/*_aggregated_classification.tsv
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be removed, as you do actually check this explicitly in genomad_results

6 changes: 3 additions & 3 deletions tests/default.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
"CONCAT_BUSCO_TSV": {
"csvtk": "0.31.0"
},
"CONCAT_QUAST_SUMMARY": {
"csvtk": "0.31.0"
},
"CONVERT_DEPTHS": {
"bioawk": 20110810
},
Expand Down Expand Up @@ -122,9 +125,6 @@
"metaquast": "5.0.2",
"python": "3.7.6"
},
"QUAST_BINS_SUMMARY": {
"cp": 9.5
},
"SEMIBIN_SINGLEEASYBIN": {
"SemiBin": "2.2.0"
},
Expand Down
6 changes: 3 additions & 3 deletions tests/test_assembly_input.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ nextflow_pipeline {
workflow.trace.succeeded().size(),
removeNextflowVersion("${outputDir}/pipeline_info/nf_core_mag_software_mqc_versions.yml"),
).match() },
{ assert snapshot(stable_name_assembly).match('assembly') },
{ assert snapshot(stable_name_assembly ).match('assembly') },
{ assert snapshot(stable_name_annotation, stable_path_annotation).match('annotation') },
{ assert snapshot(stable_name_binning, stable_path_binning ).match('binning') },
{ assert snapshot(stable_name_multiqc, stable_path_multiqc).match('multiqc') },
{ assert snapshot(stable_name_binning, stable_path_binning ).match('binning') },
{ assert snapshot(stable_name_multiqc, stable_path_multiqc ).match('multiqc') },

// FASTA checks
{ assert concoct_bins.collect { file -> path("${file}").fasta.size() > 0 }.every() },
Expand Down
6 changes: 3 additions & 3 deletions tests/test_hybrid.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
"CONCAT_BUSCO_TSV": {
"csvtk": "0.31.0"
},
"CONCAT_QUAST_SUMMARY": {
"csvtk": "0.31.0"
},
"CONVERT_DEPTHS": {
"bioawk": 20110810
},
Expand Down Expand Up @@ -114,9 +117,6 @@
"metaquast": "5.0.2",
"python": "3.7.6"
},
"QUAST_BINS_SUMMARY": {
"cp": 9.5
},
"SAMTOOLS_HOSTREMOVED_INDEX": {
"samtools": "1.22.1"
},
Expand Down
6 changes: 3 additions & 3 deletions tests/test_longreadonly.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
"CONCAT_BUSCO_TSV": {
"csvtk": "0.31.0"
},
"CONCAT_QUAST_SUMMARY": {
"csvtk": "0.31.0"
},
"CONVERT_DEPTHS": {
"bioawk": 20110810
},
Expand Down Expand Up @@ -95,9 +98,6 @@
"metaquast": "5.0.2",
"python": "3.7.6"
},
"QUAST_BINS_SUMMARY": {
"cp": 9.5
},
"SAMTOOLS_HOSTREMOVED_INDEX": {
"samtools": "1.22.1"
},
Expand Down
Loading