From 935d7b31a2cb95e841041feb5b135bcc024f0071 Mon Sep 17 00:00:00 2001 From: MrTomRod Date: Fri, 5 Dec 2025 14:36:23 +0100 Subject: [PATCH] feat: add --assembler option to genome_size helper command This adds an optional `--assembler` argument to the `autocycler helper genome_size` command, allowing users to choose between Raven (default) and LJA for genome size estimation. LJA offers significantly faster performance for PacBio HiFi reads compared to Raven (approx. 10x faster in testing), making it a valuable alternative for large datasets. - Modified `src/main.rs` to parse the new `--assembler` flag. - Updated `src/helper.rs` to dispatch to `genome_size_raven` or `genome_size_lja` based on the argument. --- src/helper.rs | 32 ++++++++++++++++++++++++++++++-- src/main.rs | 8 ++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/helper.rs b/src/helper.rs index e724334..640da65 100644 --- a/src/helper.rs +++ b/src/helper.rs @@ -37,12 +37,18 @@ use crate::subsample::parse_genome_size; #[allow(clippy::too_many_arguments)] pub fn helper(task: Task, reads: PathBuf, out_prefix: Option, genome_size: Option, threads: usize, dir: Option, read_type: ReadType, - min_depth_abs: Option, min_depth_rel: Option, extra_args: Vec) { + min_depth_abs: Option, min_depth_rel: Option, extra_args: Vec, + assembler: Option) { check_if_file_exists(&reads); let (dir, _guard) = get_working_dir(dir); if task == Task::GenomeSize { - genome_size_raven(reads, threads, dir, extra_args); + let assembler = assembler.unwrap_or_else(|| "raven".to_string()); + match assembler.to_lowercase().as_str() { + "raven" => genome_size_raven(reads, threads, dir, extra_args), + "lja" => genome_size_lja(reads, threads, dir, extra_args), + _ => quit_with_error(&format!("unknown assembler: {assembler}")), + } return; } @@ -403,6 +409,28 @@ fn genome_size_raven(reads: PathBuf, threads: usize, dir: PathBuf, extra_args: V } +fn genome_size_lja(reads: PathBuf, threads: usize, dir: PathBuf, extra_args: Vec) { + check_requirements(&["lja"]); + + let assembly_path = dir.join("assembly.fasta"); + + let mut cmd = Command::new("lja"); + + cmd.arg("--threads").arg(threads.to_string()) + .arg("--output-dir").arg(&dir) + .arg("--reads").arg(&reads); + + for token in extra_args { cmd.arg(token); } + + run_command(&mut cmd); + + if is_fasta_empty(&assembly_path) { + quit_with_error("LJA assembly failed"); + } + println!("{}", total_fasta_length(&assembly_path)); +} + + fn redbean(reads: PathBuf, out_prefix: &Path, genome_size: Option, threads: usize, dir: PathBuf, read_type: ReadType, extra_args: Vec) { // https://github.com/ruanjue/wtdbg2 diff --git a/src/main.rs b/src/main.rs index 6d4e99b..15b933b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -233,6 +233,10 @@ enum Commands { #[clap(long = "args", value_parser = clap::builder::NonEmptyStringValueParser::new(), num_args = 1.., action = clap::ArgAction::Append, allow_hyphen_values = true)] args: Vec, + + /// Assembler to use (for genome_size task) + #[clap(long = "assembler")] + assembler: Option, }, /// resolve repeats in the the unitig graph @@ -349,9 +353,9 @@ fn main() { gfa2fasta::gfa2fasta(in_gfa, out_fasta); }, Some(Commands::Helper { task, reads, out_prefix, genome_size, threads, dir, read_type, - min_depth_abs, min_depth_rel, args }) => { + min_depth_abs, min_depth_rel, args, assembler }) => { helper::helper(task, reads, out_prefix, genome_size, threads, dir, read_type, - min_depth_abs, min_depth_rel, args); + min_depth_abs, min_depth_rel, args, assembler); }, Some(Commands::Resolve { cluster_dir, verbose }) => { resolve::resolve(cluster_dir, verbose);