Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 94 additions & 14 deletions artic/minion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from clint.textui import colored
import os
import subprocess
import sys
import time
from artic.utils import get_scheme, choose_model
Expand Down Expand Up @@ -131,18 +132,18 @@ def run(parser, args):
## create a holder to keep the pipeline commands in
cmds = []

# 2) check the reference fasta has an index and create one if not
# check the reference fasta has an index and create one if not
if not os.path.exists("%s.fai" % (ref)):
cmds.append("samtools faidx %s" % (ref))

# 3) index the ref & align with minimap
# index the ref & align with minimap
cmds.append(
f"minimap2 -a -x map-ont -t {args.threads} {ref} {read_file} | samtools view -bS -F 4 - | samtools sort -o {args.sample}.sorted.bam -"
)

cmds.append(f"samtools index {args.sample}.sorted.bam")

# 4) trim the alignments to the primer start sites and normalise the coverage to save time
# trim the alignments to the primer start sites and normalise the coverage to save time
normalise_string = f"--normalise {args.normalise}" if args.normalise else ""

incorrect_pairs_string = (
Expand All @@ -160,7 +161,7 @@ def run(parser, args):

cmds.append(f"samtools index {args.sample}.primertrimmed.rg.sorted.bam")

# 6) do variant calling on each read group
# do variant calling on each read group
for p in pools:
if os.path.exists("%s.%s.hdf" % (args.sample, p)):
os.remove("%s.%s.hdf" % (args.sample, p))
Expand All @@ -182,7 +183,7 @@ def run(parser, args):

cmds.append(f"rm {args.sample}.{p}.primertrimmed.rg.sorted.bam")

# 7) merge the called variants for each read group
# merge the called variants for each read group
merge_vcf_cmd = "artic_vcf_merge %s %s 2> %s.primersitereport.txt" % (
args.sample,
bed,
Expand All @@ -203,7 +204,7 @@ def run(parser, args):
f"artic_vcf_filter {fs_str} {indel_str} --min-depth {args.min_depth} {pre_filter_vcf}.gz {args.sample}.pass.vcf {args.sample}.fail.vcf"
)

# 9) get the depth of coverage for each readgroup, create a coverage mask and plots, and add failed variants to the coverage mask (artic_mask must be run before bcftools consensus)
# get the depth of coverage for each readgroup, create a coverage mask and plots, and add failed variants to the coverage mask (artic_mask must be run before bcftools consensus)
cmds.append(
f"artic_make_depth_mask --depth {args.min_depth} {ref} {args.sample}.primertrimmed.rg.sorted.bam {args.sample}.coverage_mask.txt"
)
Expand All @@ -214,7 +215,7 @@ def run(parser, args):

post_filter_vcf_file = f"{args.sample}.pass.vcf"

# 10) generate the consensus sequence
# generate the consensus sequence
cmds.append(f"bgzip -kf {post_filter_vcf_file}")
cmds.append(f"tabix -f -p vcf {post_filter_vcf_file}.gz")

Expand All @@ -223,13 +224,12 @@ def run(parser, args):
cmds.append(
f"bcftools norm --check-ref x --fasta-ref {args.sample}.preconsensus.fasta -O z -o {post_normalisation_vcf_file} {post_filter_vcf_file}.gz"
)
# cmds.append(f"bgzip -kf {post_normalisation_vcf_file}")
cmds.append(f"tabix -f -p vcf {post_normalisation_vcf_file}")
cmds.append(
f"bcftools consensus -f {args.sample}.preconsensus.fasta {post_normalisation_vcf_file} -m {args.sample}.coverage_mask.txt -o {args.sample}.consensus.fasta"
)

# 11) apply the header to the consensus sequence and run alignment against the reference sequence
# apply the header to the consensus sequence and run alignment against the reference sequence
caller = "clair3"
linearise_fasta = "--linearise-fasta" if args.linearise_fasta else ""
cmds.append(
Expand All @@ -241,17 +241,97 @@ def run(parser, args):
f"mafft --6merpair --addfragments {args.sample}.consensus.fasta {ref} > {args.sample}.aligned.fasta"
)

# 13) setup the log file and run the pipeline commands
# define anticipated error codes and messages
general_error_codes = {
137: {
"message": "Process ran out of memory, please try running on a machine with more memory available.",
},
143: {
"message": "Process was terminated, this is likely due to a user or system interrupt.",
},
127: {
"message": "A required tool was not found, please ensure all dependencies are installed and available in your PATH.",
},
}

anticipated_tool_specific_error_codes = {
"align_trim": [
{
"input_exit_code": 1,
"message": "No reads aligned to the reference sequence, this could be due to an incorrect reference / primer scheme, or very low depth input data. Please check your input files.",
"output_exit_code": 2,
}
]
}

# setup the log file and run the pipeline commands
log = "%s.minion.log.txt" % (args.sample)
logfh = open(log, "w")
for cmd in cmds:
print(colored.green("Running: ") + cmd, file=sys.stderr)
if not args.dry_run:
timerStart = time.perf_counter()
retval = os.system(cmd)
if retval != 0:
print(colored.red("Command failed:") + cmd, file=sys.stderr)
raise SystemExit(20)
subprocess_return = subprocess.run(cmd, shell=True, capture_output=True)
if subprocess_return.returncode != 0:
## Check for general anticipated errors
if subprocess_return.returncode in general_error_codes:
print(
colored.yellow(
general_error_codes[subprocess_return.returncode]["message"]
),
file=sys.stderr,
)
print(colored.red("Command failed: ") + cmd, file=sys.stderr)
print(
colored.red("Command stderr: ")
+ subprocess_return.stderr.decode(errors="replace"),
)
raise SystemExit(subprocess_return.returncode)

## check for anticipated tool-specific errors
cmd_parts = []
for cmd_part in cmd.split(" ")[0:1]:
if "-" not in cmd_part:
cmd_parts.append(cmd_part)
continue

break

if cmd_parts:
base_cmd = " ".join(cmd_parts)

if base_cmd in anticipated_tool_specific_error_codes:
for error_case in anticipated_tool_specific_error_codes[
base_cmd
]:
if (
subprocess_return.returncode
== error_case["input_exit_code"]
):
print(
colored.yellow(error_case["message"]),
file=sys.stderr,
)
print(
colored.red("Command failed: ") + cmd,
file=sys.stderr,
)
print(
colored.red("Command stderr: ")
+ subprocess_return.stderr.decode(errors="replace"),
)
raise SystemExit(error_case["output_exit_code"])

print(
colored.red("Unexpected command failure: ") + cmd, file=sys.stderr
)
print(
colored.red("Command stderr: ")
+ subprocess_return.stderr.decode(errors="replace"),
file=sys.stderr,
)

raise SystemExit(subprocess_return.returncode)
timerStop = time.perf_counter()

## print the executed command and the runtime to the log file
Expand Down
2 changes: 1 addition & 1 deletion artic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def get_scheme(
scheme_version: str,
scheme_directory: str,
scheme_length: int = False,
read_file: str = False,
read_file: str = "",
):
"""Get the primer scheme and reference fasta file from the manifest

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ dependencies:
- tqdm
- seqtk
- align_trim>=1.0.2
- primalbedtools>=0.10.1
- primalbedtools>=1.0.0
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "artic"
version = "1.8.5"
version = "1.9.0"
authors = [
{ name = "Nick Loman", email = "n.j.loman@bham.ac.uk" },
]
Expand Down
6 changes: 3 additions & 3 deletions tests/get_scheme_unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ class get_scheme_unit_test(unittest.TestCase):
def test_get_scheme_no_length(self):

bed, ref, scheme_version = get_scheme(
scheme_name="nCoV-2019",
scheme_name="artic-pan-dengue",
scheme_version="v1.0.0",
scheme_directory=f"{os.getcwd()}/primer-schemes",
)

self.assertEqual(
bed, f"{os.getcwd()}/primer-schemes/artic-sars-cov-2/400/v1.0.0/primer.bed"
bed, f"{os.getcwd()}/primer-schemes/artic-pan-dengue/400/v1.0.0/primer.bed"
)

self.assertEqual(
ref,
f"{os.getcwd()}/primer-schemes/artic-sars-cov-2/400/v1.0.0/reference.fasta",
f"{os.getcwd()}/primer-schemes/artic-pan-dengue/400/v1.0.0/reference.fasta",
)

def test_get_scheme_unclear_length(self):
Expand Down
2 changes: 2 additions & 0 deletions tests/minion_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def genCommand(sampleID, workflow):
cmd.append("SARS-CoV-2")
cmd.append("--scheme-version")
cmd.append("v1.0.0")
cmd.append("--scheme-length")
cmd.append("400")
cmd.append(sampleID)
return cmd

Expand Down
Loading