Skip to content

Commit c91a092

Browse files
committed
fix: allow creation of subtractions from gzipped FASTA files
This functionality was lost in previous commits. Add back and initial step that decompressed the file is necessary.
1 parent 12559ad commit c91a092

File tree

1 file changed

+35
-8
lines changed

1 file changed

+35
-8
lines changed

workflow.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import shutil
12
from pathlib import Path
23
from types import SimpleNamespace
34

45
import aiofiles
5-
from virtool_core.utils import compress_file
66
from fixtures import fixture
7+
from virtool_core.utils import compress_file
8+
from virtool_core.utils import is_gzipped, decompress_file
79
from virtool_workflow import hooks, step
810
from virtool_workflow.api.subtractions import SubtractionProvider
911
from virtool_workflow.execution.run_in_executor import FunctionExecutor
@@ -28,11 +30,40 @@ def intermediate():
2830

2931

3032
@fixture
31-
def fasta_path(input_files: dict) -> Path:
32-
"""The path to the fasta file for the subtraction."""
33+
def input_path(input_files: dict) -> Path:
34+
"""The path to the input FASTA file for the subtraction."""
3335
return list(input_files.values())[0]
3436

3537

38+
@fixture
39+
def fasta_path(work_path: Path) -> Path:
40+
"""The path to the decompressed FASTA file."""
41+
return work_path / "subtraction.fa"
42+
43+
44+
@step
45+
async def decompress(
46+
fasta_path: Path,
47+
input_path: Path,
48+
run_in_executor: FunctionExecutor,
49+
):
50+
"""
51+
Decompress the input file to `fasta_path` if it is gzipped or copy it if it is uncompressed.
52+
"""
53+
if is_gzipped(input_path):
54+
await run_in_executor(
55+
decompress_file,
56+
input_path,
57+
fasta_path,
58+
)
59+
else:
60+
await run_in_executor(
61+
shutil.copyfile,
62+
input_path,
63+
fasta_path
64+
)
65+
66+
3667
@step
3768
async def compute_fasta_gc_and_count(
3869
fasta_path: Path,
@@ -49,7 +80,7 @@ async def compute_fasta_gc_and_count(
4980

5081
count = 0
5182

52-
# Go through the fasta file getting the nucleotide counts, lengths, and number of sequences
83+
# Go through the FASTA file getting the nucleotide counts, lengths, and number of sequences
5384
async with aiofiles.open(fasta_path, "r") as f:
5485
async for line in f:
5586
if line[0] == ">":
@@ -66,8 +97,6 @@ async def compute_fasta_gc_and_count(
6697
intermediate.gc = {
6798
k: round(nucleotides[k] / nucleotides_sum, 3) for k in nucleotides}
6899

69-
return "Fasta GC computed."
70-
71100

72101
@step
73102
async def bowtie2_build(
@@ -93,8 +122,6 @@ async def bowtie2_build(
93122

94123
intermediate.bowtie_path = bowtie_path
95124

96-
return "Finished bowtie2 build."
97-
98125

99126
@step
100127
async def compress_fasta(

0 commit comments

Comments
 (0)