1+ import shutil
12from pathlib import Path
23from types import SimpleNamespace
34
45import aiofiles
5- from virtool_core .utils import compress_file
66from fixtures import fixture
7+ from virtool_core .utils import compress_file
8+ from virtool_core .utils import is_gzipped , decompress_file
79from virtool_workflow import hooks , step
810from virtool_workflow .api .subtractions import SubtractionProvider
911from virtool_workflow .execution .run_in_executor import FunctionExecutor
@@ -28,11 +30,40 @@ def intermediate():
2830
2931
3032@fixture
31- def fasta_path (input_files : dict ) -> Path :
32- """The path to the fasta file for the subtraction."""
33+ def input_path (input_files : dict ) -> Path :
34+ """The path to the input FASTA file for the subtraction."""
3335 return list (input_files .values ())[0 ]
3436
3537
38+ @fixture
39+ def fasta_path (work_path : Path ) -> Path :
40+ """The path to the decompressed FASTA file."""
41+ return work_path / "subtraction.fa"
42+
43+
44+ @step
45+ async def decompress (
46+ fasta_path : Path ,
47+ input_path : Path ,
48+ run_in_executor : FunctionExecutor ,
49+ ):
50+ """
51+ Decompress the input file to `fasta_path` if it is gzipped or copy it if it is uncompressed.
52+ """
53+ if is_gzipped (input_path ):
54+ await run_in_executor (
55+ decompress_file ,
56+ input_path ,
57+ fasta_path ,
58+ )
59+ else :
60+ await run_in_executor (
61+ shutil .copyfile ,
62+ input_path ,
63+ fasta_path
64+ )
65+
66+
3667@step
3768async def compute_fasta_gc_and_count (
3869 fasta_path : Path ,
@@ -49,7 +80,7 @@ async def compute_fasta_gc_and_count(
4980
5081 count = 0
5182
52- # Go through the fasta file getting the nucleotide counts, lengths, and number of sequences
83+ # Go through the FASTA file getting the nucleotide counts, lengths, and number of sequences
5384 async with aiofiles .open (fasta_path , "r" ) as f :
5485 async for line in f :
5586 if line [0 ] == ">" :
@@ -66,8 +97,6 @@ async def compute_fasta_gc_and_count(
6697 intermediate .gc = {
6798 k : round (nucleotides [k ] / nucleotides_sum , 3 ) for k in nucleotides }
6899
69- return "Fasta GC computed."
70-
71100
72101@step
73102async def bowtie2_build (
@@ -93,8 +122,6 @@ async def bowtie2_build(
93122
94123 intermediate .bowtie_path = bowtie_path
95124
96- return "Finished bowtie2 build."
97-
98125
99126@step
100127async def compress_fasta (
0 commit comments