Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ jobs:
"test_10x_sc",
"test_takara_smartseq_umi_bcr",
"test_nebnext_umi",
"test_reassign_false",
"test_rnaseq_bulk",
"test_rnaseq_sc",
"test_maskprimers_extract",
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- [#416](https://github.com/nf-core/airrflow/pull/416) Fixed bug in number of sequences report.
- [#416](https://github.com/nf-core/airrflow/pull/416) Fixed bug in report file size.
- [#422](https://github.com/nf-core/airrflow/pull/422) Fixed annoying automatic clonal threshold message on any failure.
- Fixed reassign false option and added tests.

### `Dependencies`

Expand Down
41 changes: 28 additions & 13 deletions bin/reveal_filter_quality.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@ if (packageVersion("alakazam") < "1.0.3") {
}
}

# Concordant locus
same_locus <- getLocus(db[["v_call"]]) == db[["locus"]]

# Max 10% N
n_count <- stri_count(db$sequence_alignment, regex = "Nn")
positions_count <- stri_count(db$sequence_alignment, regex = "[^-.]")
Expand All @@ -71,18 +68,36 @@ low_n <- n_count <= 0.10
# Min length 200 nt
long_seq <- stri_count(db$sequence_alignment, regex = "[^-.Nn]") >= 200

log <- data.frame(
"same_locus" = same_locus,
"low_n" = low_n,
"long_seq" = long_seq, stringsAsFactors = F
)
if ("locus" %in% colnames(db)) {
# Concordant locus
same_locus <- getLocus(db[["v_call"]]) == db[["locus"]]

# Generate logs and summary
log <- data.frame(
"same_locus" = same_locus,
"low_n" = low_n,
"long_seq" = long_seq, stringsAsFactors = F
)
summary <- log %>%
group_by(same_locus, low_n, long_seq) %>%
summarize(n = n(), .groups = "drop_last")
filter_pass <- same_locus & low_n & long_seq
} else {
db$locus <- getLocus(db[["v_call"]])

summary <- log %>%
group_by(same_locus, low_n, long_seq) %>%
summarize(n = n(), .groups = "drop_last")
# Generate logs and summary
log <- data.frame(
"low_n" = low_n,
"long_seq" = long_seq, stringsAsFactors = F
)

summary <- log %>%
group_by(low_n, long_seq) %>%
summarize(n = n(), .groups = "drop_last")

filter_pass <- low_n & long_seq
}

# Filter and save
filter_pass <- same_locus & low_n & long_seq

if (!is.null(opt$OUTPUT)) {
output_fn <- paste0(opt$OUTPUT, "_quality-pass.tsv")
Expand Down
32 changes: 32 additions & 0 deletions conf/test_reassign_false.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/airrflow -profile test_reassign_false,<docker/singularity>
*/

process {
resourceLimits = [
cpus: 4,
memory: '15.GB',
time: '1.h'
]
}

params {
config_profile_name = 'Test assembled mode'
config_profile_description = 'Minimal test dataset to test assembled mode'

// Input data
mode = 'assembled'
input = pipelines_testdata_base_path + 'testdata-reveal/test_assembled_metadata_assigned.tsv'
reference_fasta = pipelines_testdata_base_path + 'database-cache/imgtdb_base.zip'
reference_igblast = pipelines_testdata_base_path + 'database-cache/igblast_base.zip'

reassign = false
productive_only = true
collapseby = 'filename'
cloneby = 'subject_id'
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ profiles {
test_maskprimers_extract { includeConfig 'conf/test_maskprimers_extract.config' }
test_maskprimers_align {includeConfig 'conf/test_maskprimers_align.config' }
test_nebnext_umi { includeConfig 'conf/test_nebnext_umi.config' }
test_reassign_false { includeConfig 'conf/test_reassign_false.config' }
test_rnaseq_bulk { includeConfig 'conf/test_rnaseq_bulk.config' }
test_rnaseq_sc { includeConfig 'conf/test_rnaseq_sc.config' }
test_embeddings_H { includeConfig 'conf/test_embeddings_H.config' }
Expand Down
4 changes: 3 additions & 1 deletion subworkflows/local/vdj_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ workflow VDJ_ANNOTATION {

take:
ch_fasta // [meta, fasta]
ch_tsv // [meta, tsv]
ch_validated_samplesheet
ch_igblast
ch_reference_fasta
Expand All @@ -35,7 +36,8 @@ workflow VDJ_ANNOTATION {
ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs)
ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions)

ch_assigned_tab = CHANGEO_MAKEDB.out.tab
ch_assigned_tab = ch_tsv.mix(CHANGEO_MAKEDB.out.tab)

ch_assignment_logs = CHANGEO_MAKEDB.out.logs

if (!params.skip_alignment_filter){
Expand Down
94 changes: 50 additions & 44 deletions workflows/airrflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -97,51 +97,52 @@ workflow AIRRFLOW {

ch_validated_samplesheet = SC_RAW_INPUT.out.samplesheet.collect()

ch_presto_filterseq_logs = Channel.empty()
ch_presto_maskprimers_logs = Channel.empty()
ch_presto_pairseq_logs = Channel.empty()
ch_presto_clustersets_logs = Channel.empty()
ch_presto_buildconsensus_logs = Channel.empty()
ch_presto_postconsensus_pairseq_logs = Channel.empty()
ch_presto_assemblepairs_logs = Channel.empty()
ch_presto_collapseseq_logs = Channel.empty()
ch_presto_splitseq_logs = Channel.empty()
ch_fastp_html = Channel.empty()
ch_fastp_json = Channel.empty()
ch_fastqc_postassembly_mqc = Channel.empty()

} else if (params.library_generation_method == "trust4") {
// Extract VDJ sequences from "general" RNA seq data using TRUST4

RNASEQ_INPUT (
ch_input,
DATABASES.out.igblast.collect()
)
ch_presto_filterseq_logs = Channel.empty()
ch_presto_maskprimers_logs = Channel.empty()
ch_presto_pairseq_logs = Channel.empty()
ch_presto_clustersets_logs = Channel.empty()
ch_presto_buildconsensus_logs = Channel.empty()
ch_presto_postconsensus_pairseq_logs = Channel.empty()
ch_presto_assemblepairs_logs = Channel.empty()
ch_presto_collapseseq_logs = Channel.empty()
ch_presto_splitseq_logs = Channel.empty()
ch_fastp_html = Channel.empty()
ch_fastp_json = Channel.empty()
ch_fastqc_postassembly_mqc = Channel.empty()
ch_tsv_files = Channel.empty()

} else if (params.library_generation_method == "trust4") {
// Extract VDJ sequences from "general" RNA seq data using TRUST4

RNASEQ_INPUT (
ch_input,
DATABASES.out.igblast.collect()
)

ch_fasta = RNASEQ_INPUT.out.fasta
ch_versions = ch_versions.mix(RNASEQ_INPUT.out.versions)

ch_validated_samplesheet = RNASEQ_INPUT.out.samplesheet.collect()

ch_presto_filterseq_logs = Channel.empty()
ch_presto_maskprimers_logs = Channel.empty()
ch_presto_pairseq_logs = Channel.empty()
ch_presto_clustersets_logs = Channel.empty()
ch_presto_buildconsensus_logs = Channel.empty()
ch_presto_postconsensus_pairseq_logs = Channel.empty()
ch_presto_assemblepairs_logs = Channel.empty()
ch_presto_collapseseq_logs = Channel.empty()
ch_presto_splitseq_logs = Channel.empty()
ch_fastp_html = RNASEQ_INPUT.out.fastp_reads_html
ch_fastp_json = RNASEQ_INPUT.out.fastp_reads_json
ch_fastqc_postassembly_mqc = Channel.empty()
}
else {
// Perform sequence assembly if input type is fastq from bulk sequencing data
SEQUENCE_ASSEMBLY(
ch_input,
DATABASES.out.igblast.collect()
)
ch_fasta = RNASEQ_INPUT.out.fasta
ch_versions = ch_versions.mix(RNASEQ_INPUT.out.versions)

ch_validated_samplesheet = RNASEQ_INPUT.out.samplesheet.collect()

ch_presto_filterseq_logs = Channel.empty()
ch_presto_maskprimers_logs = Channel.empty()
ch_presto_pairseq_logs = Channel.empty()
ch_presto_clustersets_logs = Channel.empty()
ch_presto_buildconsensus_logs = Channel.empty()
ch_presto_postconsensus_pairseq_logs = Channel.empty()
ch_presto_assemblepairs_logs = Channel.empty()
ch_presto_collapseseq_logs = Channel.empty()
ch_presto_splitseq_logs = Channel.empty()
ch_fastp_html = RNASEQ_INPUT.out.fastp_reads_html
ch_fastp_json = RNASEQ_INPUT.out.fastp_reads_json
ch_fastqc_postassembly_mqc = Channel.empty()
ch_tsv_files = Channel.empty()
} else {
// Perform sequence assembly if input type is fastq from bulk sequencing data
SEQUENCE_ASSEMBLY(
ch_input,
DATABASES.out.igblast.collect()
)

ch_fasta = SEQUENCE_ASSEMBLY.out.fasta
ch_versions = ch_versions.mix(SEQUENCE_ASSEMBLY.out.versions)
Expand All @@ -158,6 +159,7 @@ workflow AIRRFLOW {
ch_presto_assemblepairs_logs = SEQUENCE_ASSEMBLY.out.presto_assemblepairs_logs.ifEmpty([])
ch_presto_collapseseq_logs = SEQUENCE_ASSEMBLY.out.presto_collapseseq_logs.ifEmpty([])
ch_presto_splitseq_logs = SEQUENCE_ASSEMBLY.out.presto_splitseq_logs.ifEmpty([])
ch_tsv_files = Channel.empty()
}

} else if ( params.mode == "assembled" ) {
Expand All @@ -178,8 +180,10 @@ workflow AIRRFLOW {
ch_fasta_from_tsv = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta
ch_versions = ch_versions.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.versions)
ch_reassign_logs = ch_reassign_logs.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.logs)
ch_tsv_files = Channel.empty()
} else {
ch_fasta_from_tsv = Channel.empty()
ch_tsv_files = ASSEMBLED_INPUT_CHECK.out.ch_tsv
}

ch_fasta = ASSEMBLED_INPUT_CHECK.out.ch_fasta.mix(ch_fasta_from_tsv)
Expand All @@ -201,9 +205,11 @@ workflow AIRRFLOW {
} else {
error "Mode parameter value not valid."
}

// Perform V(D)J annotation and filtering
VDJ_ANNOTATION(
ch_fasta,
ch_tsv_files,
ch_validated_samplesheet.collect(),
DATABASES.out.igblast.collect(),
DATABASES.out.reference_fasta.collect()
Expand Down
Loading