diff --git a/artic/minion.py b/artic/minion.py index 5666751..c00b414 100755 --- a/artic/minion.py +++ b/artic/minion.py @@ -2,6 +2,7 @@ from clint.textui import colored import os +import subprocess import sys import time from artic.utils import get_scheme, choose_model @@ -131,18 +132,18 @@ def run(parser, args): ## create a holder to keep the pipeline commands in cmds = [] - # 2) check the reference fasta has an index and create one if not + # check the reference fasta has an index and create one if not if not os.path.exists("%s.fai" % (ref)): cmds.append("samtools faidx %s" % (ref)) - # 3) index the ref & align with minimap + # index the ref & align with minimap cmds.append( f"minimap2 -a -x map-ont -t {args.threads} {ref} {read_file} | samtools view -bS -F 4 - | samtools sort -o {args.sample}.sorted.bam -" ) cmds.append(f"samtools index {args.sample}.sorted.bam") - # 4) trim the alignments to the primer start sites and normalise the coverage to save time + # trim the alignments to the primer start sites and normalise the coverage to save time normalise_string = f"--normalise {args.normalise}" if args.normalise else "" incorrect_pairs_string = ( @@ -160,7 +161,7 @@ def run(parser, args): cmds.append(f"samtools index {args.sample}.primertrimmed.rg.sorted.bam") - # 6) do variant calling on each read group + # do variant calling on each read group for p in pools: if os.path.exists("%s.%s.hdf" % (args.sample, p)): os.remove("%s.%s.hdf" % (args.sample, p)) @@ -182,7 +183,7 @@ def run(parser, args): cmds.append(f"rm {args.sample}.{p}.primertrimmed.rg.sorted.bam") - # 7) merge the called variants for each read group + # merge the called variants for each read group merge_vcf_cmd = "artic_vcf_merge %s %s 2> %s.primersitereport.txt" % ( args.sample, bed, @@ -203,7 +204,7 @@ def run(parser, args): f"artic_vcf_filter {fs_str} {indel_str} --min-depth {args.min_depth} {pre_filter_vcf}.gz {args.sample}.pass.vcf {args.sample}.fail.vcf" ) - # 9) get the depth of coverage for each readgroup, create a coverage mask and plots, and add failed variants to the coverage mask (artic_mask must be run before bcftools consensus) + # get the depth of coverage for each readgroup, create a coverage mask and plots, and add failed variants to the coverage mask (artic_mask must be run before bcftools consensus) cmds.append( f"artic_make_depth_mask --depth {args.min_depth} {ref} {args.sample}.primertrimmed.rg.sorted.bam {args.sample}.coverage_mask.txt" ) @@ -214,7 +215,7 @@ def run(parser, args): post_filter_vcf_file = f"{args.sample}.pass.vcf" - # 10) generate the consensus sequence + # generate the consensus sequence cmds.append(f"bgzip -kf {post_filter_vcf_file}") cmds.append(f"tabix -f -p vcf {post_filter_vcf_file}.gz") @@ -223,13 +224,12 @@ def run(parser, args): cmds.append( f"bcftools norm --check-ref x --fasta-ref {args.sample}.preconsensus.fasta -O z -o {post_normalisation_vcf_file} {post_filter_vcf_file}.gz" ) - # cmds.append(f"bgzip -kf {post_normalisation_vcf_file}") cmds.append(f"tabix -f -p vcf {post_normalisation_vcf_file}") cmds.append( f"bcftools consensus -f {args.sample}.preconsensus.fasta {post_normalisation_vcf_file} -m {args.sample}.coverage_mask.txt -o {args.sample}.consensus.fasta" ) - # 11) apply the header to the consensus sequence and run alignment against the reference sequence + # apply the header to the consensus sequence and run alignment against the reference sequence caller = "clair3" linearise_fasta = "--linearise-fasta" if args.linearise_fasta else "" cmds.append( @@ -241,17 +241,97 @@ def run(parser, args): f"mafft --6merpair --addfragments {args.sample}.consensus.fasta {ref} > {args.sample}.aligned.fasta" ) - # 13) setup the log file and run the pipeline commands + # define anticipated error codes and messages + general_error_codes = { + 137: { + "message": "Process ran out of memory, please try running on a machine with more memory available.", + }, + 143: { + "message": "Process was terminated, this is likely due to a user or system interrupt.", + }, + 127: { + "message": "A required tool was not found, please ensure all dependencies are installed and available in your PATH.", + }, + } + + anticipated_tool_specific_error_codes = { + "align_trim": [ + { + "input_exit_code": 1, + "message": "No reads aligned to the reference sequence, this could be due to an incorrect reference / primer scheme, or very low depth input data. Please check your input files.", + "output_exit_code": 2, + } + ] + } + + # setup the log file and run the pipeline commands log = "%s.minion.log.txt" % (args.sample) logfh = open(log, "w") for cmd in cmds: print(colored.green("Running: ") + cmd, file=sys.stderr) if not args.dry_run: timerStart = time.perf_counter() - retval = os.system(cmd) - if retval != 0: - print(colored.red("Command failed:") + cmd, file=sys.stderr) - raise SystemExit(20) + subprocess_return = subprocess.run(cmd, shell=True, capture_output=True) + if subprocess_return.returncode != 0: + ## Check for general anticipated errors + if subprocess_return.returncode in general_error_codes: + print( + colored.yellow( + general_error_codes[subprocess_return.returncode]["message"] + ), + file=sys.stderr, + ) + print(colored.red("Command failed: ") + cmd, file=sys.stderr) + print( + colored.red("Command stderr: ") + + subprocess_return.stderr.decode(errors="replace"), + ) + raise SystemExit(subprocess_return.returncode) + + ## check for anticipated tool-specific errors + cmd_parts = [] + for cmd_part in cmd.split(" ")[0:1]: + if "-" not in cmd_part: + cmd_parts.append(cmd_part) + continue + + break + + if cmd_parts: + base_cmd = " ".join(cmd_parts) + + if base_cmd in anticipated_tool_specific_error_codes: + for error_case in anticipated_tool_specific_error_codes[ + base_cmd + ]: + if ( + subprocess_return.returncode + == error_case["input_exit_code"] + ): + print( + colored.yellow(error_case["message"]), + file=sys.stderr, + ) + print( + colored.red("Command failed: ") + cmd, + file=sys.stderr, + ) + print( + colored.red("Command stderr: ") + + subprocess_return.stderr.decode(errors="replace"), + ) + raise SystemExit(error_case["output_exit_code"]) + + print( + colored.red("Unexpected command failure: ") + cmd, file=sys.stderr + ) + print( + colored.red("Command stderr: ") + + subprocess_return.stderr.decode(errors="replace"), + file=sys.stderr, + ) + + raise SystemExit(subprocess_return.returncode) timerStop = time.perf_counter() ## print the executed command and the runtime to the log file diff --git a/artic/utils.py b/artic/utils.py index 7adba47..415bce8 100644 --- a/artic/utils.py +++ b/artic/utils.py @@ -322,7 +322,7 @@ def get_scheme( scheme_version: str, scheme_directory: str, scheme_length: int = False, - read_file: str = False, + read_file: str = "", ): """Get the primer scheme and reference fasta file from the manifest diff --git a/environment.yml b/environment.yml index 8384ba5..5cd6abf 100644 --- a/environment.yml +++ b/environment.yml @@ -23,4 +23,4 @@ dependencies: - tqdm - seqtk - align_trim>=1.0.2 - - primalbedtools>=0.10.1 + - primalbedtools>=1.0.0 diff --git a/pyproject.toml b/pyproject.toml index 1c10bed..d7d5b0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "artic" -version = "1.8.5" +version = "1.9.0" authors = [ { name = "Nick Loman", email = "n.j.loman@bham.ac.uk" }, ] diff --git a/tests/get_scheme_unit_test.py b/tests/get_scheme_unit_test.py index a100b97..ed03be2 100644 --- a/tests/get_scheme_unit_test.py +++ b/tests/get_scheme_unit_test.py @@ -8,18 +8,18 @@ class get_scheme_unit_test(unittest.TestCase): def test_get_scheme_no_length(self): bed, ref, scheme_version = get_scheme( - scheme_name="nCoV-2019", + scheme_name="artic-pan-dengue", scheme_version="v1.0.0", scheme_directory=f"{os.getcwd()}/primer-schemes", ) self.assertEqual( - bed, f"{os.getcwd()}/primer-schemes/artic-sars-cov-2/400/v1.0.0/primer.bed" + bed, f"{os.getcwd()}/primer-schemes/artic-pan-dengue/400/v1.0.0/primer.bed" ) self.assertEqual( ref, - f"{os.getcwd()}/primer-schemes/artic-sars-cov-2/400/v1.0.0/reference.fasta", + f"{os.getcwd()}/primer-schemes/artic-pan-dengue/400/v1.0.0/reference.fasta", ) def test_get_scheme_unclear_length(self): diff --git a/tests/minion_validator.py b/tests/minion_validator.py index fe2f908..b97de38 100644 --- a/tests/minion_validator.py +++ b/tests/minion_validator.py @@ -164,6 +164,8 @@ def genCommand(sampleID, workflow): cmd.append("SARS-CoV-2") cmd.append("--scheme-version") cmd.append("v1.0.0") + cmd.append("--scheme-length") + cmd.append("400") cmd.append(sampleID) return cmd