From a5ac4016bf9ff79aea5f46d6ed1074c1c00ded13 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 10:38:00 -0700 Subject: [PATCH 01/11] Add search with option for clique --- src/assembly.rs | 80 ++++++++++-- src/lib.rs | 3 + src/reductions.rs | 311 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 383 insertions(+), 11 deletions(-) create mode 100644 src/reductions.rs diff --git a/src/assembly.rs b/src/assembly.rs index e8660f44..69380033 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -37,6 +37,7 @@ use crate::{ kernels::KernelMode, molecule::Molecule, utils::connected_components_under_edges, + reductions::CompatGraph, }; /// Parallelization strategy for the search phase. @@ -191,11 +192,14 @@ fn fractures( fn recurse_index_search_serial( mol: &Molecule, matches: &[(BitSet, BitSet)], + matches_index: usize, fragments: &[BitSet], state_index: usize, mut best_index: usize, largest_remove: usize, bounds: &[Bound], + matches_graph: Option<&CompatGraph>, + subgraph: Option, ) -> (usize, usize) { // If any bounds are exceeded, halt this search branch. if bound_exceeded( @@ -209,6 +213,15 @@ fn recurse_index_search_serial( return (state_index, 1); } + let nodes = { + if let Some(s) = &subgraph { + s.iter().collect::>() + } + else { + (matches_index..matches.len()).collect::>() + } + }; + // Keep track of the best assembly index found in any of this assembly // state's children and the number of states searched, including this one. let mut best_child_index = state_index; @@ -217,17 +230,30 @@ fn recurse_index_search_serial( // For every pair of duplicatable subgraphs compatible with the current set // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. - for (i, (h1, h2)) in matches.iter().enumerate() { + for v in nodes { + let (h1, h2) = &matches[v]; if let Some(fractures) = fractures(mol, fragments, h1, h2) { + let subgraph_clone = { + if let (Some(g), Some(s)) = (matches_graph, &subgraph) { + Some(g.forward_neighbors(v, &s)) + } + else { + None + } + }; + // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_serial( mol, - &matches[i + 1..], + &matches, + v + 1, &fractures, state_index - h1.len() + 1, best_index, h1.len(), bounds, + matches_graph, + subgraph_clone, ); // Update the best assembly indices (across children states and @@ -518,15 +544,47 @@ pub fn index_search( // Search for the shortest assembly pathway recursively. let (index, states_searched) = match parallel_mode { - ParallelMode::None => recurse_index_search_serial( - mol, - &matches, - &[init], - edge_count - 1, - edge_count - 1, - edge_count, - bounds, - ), + ParallelMode::None => { + let clique_bounds = vec![Bound::CoverNoSort, Bound::CoverSort, Bound::CliqueBudget]; + let use_clique = kernel_mode != KernelMode::None || bounds.iter().any(|b| clique_bounds.contains(b)); + let graph; + + let matches_graph = { + if use_clique { + graph = CompatGraph::new(&matches); + Some(&graph) + } + else { + None + } + }; + + let subgraph = { + if use_clique { + let mut temp = BitSet::with_capacity(matches.len()); + for i in 0..matches.len() { + temp.insert(i); + } + Some(temp) + } + else { + None + } + }; + + recurse_index_search_serial( + mol, + &matches, + 0, + &[init], + edge_count - 1, + edge_count - 1, + edge_count, + bounds, + matches_graph, + subgraph, + ) + } ParallelMode::DepthOne => { let best_index = Arc::new(AtomicUsize::from(edge_count - 1)); recurse_index_search_depthone( diff --git a/src/lib.rs b/src/lib.rs index 18a3077f..306f8b52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,6 +55,9 @@ pub mod kernels; // Utility functions mod utils; +// Reductions to other problem instances +pub mod reductions; + // Python library #[cfg(feature = "python")] pub mod python; diff --git a/src/reductions.rs b/src/reductions.rs new file mode 100644 index 00000000..3f1a768b --- /dev/null +++ b/src/reductions.rs @@ -0,0 +1,311 @@ +use bit_set::BitSet; + +pub struct CompatGraph { + graph: Vec, + weights: Vec, + matches: Vec<(BitSet, BitSet)> +} + +impl CompatGraph { + pub fn new(init_matches: &Vec<(BitSet, BitSet)>) -> Self{ + let size = init_matches.len(); + + // Initialize weights and empty graph + let mut init_graph: Vec = Vec::with_capacity(size); + let mut init_weights: Vec = Vec::with_capacity(size); + for m in init_matches.iter() { + init_graph.push(BitSet::with_capacity(size)); + init_weights.push(m.0.len() - 1); + } + + // Populate graph + for (idx1, (h1, h2)) in init_matches.iter().enumerate() { + for (idx2, (h1p, h2p)) in init_matches[idx1 + 1..].iter().enumerate() { + let idx2 = idx2 + idx1 + 1; + + let forward_compatible = { + h2.is_disjoint(h1p) && + h2.is_disjoint(h2p) && + (h1.is_disjoint(h1p) || h1.is_superset(h1p)) && + (h1.is_disjoint(h2p) || h1.is_superset(h2p)) + }; + + if forward_compatible { + init_graph[idx1].insert(idx2); + init_graph[idx2].insert(idx1); + } + } + } + + Self { + graph: init_graph, + weights: init_weights, + matches: Vec::new(), + } + } + + /*pub fn savings_ground_truth(&self, subgraph: &BitSet) -> usize { + self.savings_ground_truth_recurse(0, 0, subgraph) + } + + fn savings_ground_truth_recurse(&self, ix: usize, mut best: usize, subgraph: &BitSet) -> usize { + if subgraph.len() == 0 { + return ix; + } + let mut cx = ix; + + /*if ix + subgraph.iter().count() <= best && ix + self.remaining_weight_bound(&subgraph) <= best { + return ix; + }*/ + /*if ix + self.color_bound(&subgraph) <= best { + return ix; + }*/ + /*if ix + self.cover_bound(&subgraph, true) <= best{ + return ix; + }*/ + + // Search for duplicatable fragment + for v in subgraph.iter() { + let subgraph_clone = self.forward_neighbors(v, &subgraph); + + cx = cx.max(self.savings_ground_truth_recurse( + ix + self.weights[v], + best, + &subgraph_clone, + )); + best = best.max(cx); + } + + cx + }*/ + + pub fn len(&self) -> usize { + self.graph.len() + } + + pub fn degree(&self, v: usize, subgraph: &BitSet) -> usize { + self.graph[v].intersection(subgraph).count() + } + + pub fn neighbors(&self, v: usize, subgraph: &BitSet) -> BitSet { + let mut neighbors = self.graph[v].clone(); + neighbors.intersect_with(subgraph); + + neighbors + } + + pub fn forward_neighbors(&self, v: usize, subgraph: &BitSet) -> BitSet { + let mut neighbors = self.graph[v].clone(); + neighbors.intersect_with(subgraph); + let mut to_remove = vec![]; + for u in neighbors.iter() { + if u <= v { + to_remove.push(u); + } + if u > v { + break; + } + } + for u in to_remove { + neighbors.remove(u); + } + + neighbors + } + + pub fn are_adjacent(&self, v: usize, u: usize) -> bool { + self.graph[v].contains(u) + } + + pub fn remaining_weight_bound(&self, subgraph: &BitSet) -> usize { + let deg_sum = subgraph.iter().map(|v| self.degree(v, subgraph)).sum::() as f32; + let max_clique = ((1_f32 + (4_f32 * deg_sum + 1_f32).sqrt()) / 2_f32).floor() as usize; + let mut sum = 0; + let mut iter = subgraph.iter(); + for _ in 0..max_clique { + sum += self.weights[iter.next().unwrap()]; + }; + + sum + } + + pub fn color_bound(&self, subgraph: &BitSet) -> usize{ + // Greedy coloring + let mut colors: Vec = vec![-1; self.len()]; + let mut num_colors = 0; + let mut largest: Vec = Vec::new(); + + + for v in (0..self.matches.len()).rev() { + if !subgraph.contains(v) { + continue; + } + + let mut used: Vec = vec![0; num_colors]; + + for u in subgraph.intersection(&self.graph[v]) { + if colors[u] != -1 { + used[colors[u] as usize] = 1; + } + } + + let mut max = 0; + let mut max_idx = num_colors; + for i in 0..num_colors { + if used[i] == 0 && largest[i] > max { + max = largest[i]; + max_idx = i; + } + } + + if max_idx == num_colors { + num_colors += 1; + largest.push(0); + } + if self.weights[v] > largest[max_idx] { + largest[max_idx] = self.weights[v] + } + + colors[v] = max_idx as i32; + } + + largest.iter().sum::() + } + + pub fn cover_bound(&self, subgraph: &BitSet, sort: bool) -> usize { + // Sort vertices + if sort { + let mut vertices: Vec<(usize, usize)> = Vec::with_capacity(subgraph.len()); + for v in subgraph { + vertices.push((v, self.degree(v, subgraph))); + } + vertices.sort_by(|a, b| b.1.cmp(&a.1)); + self.cover_bound_helper(subgraph, vertices.iter().map(|(v, _)| *v)) + } + else { + let vertices = (0..self.matches.len()).rev().filter(|v| subgraph.contains(*v)); + self.cover_bound_helper(subgraph, vertices) + } + } + + fn cover_bound_helper(&self, subgraph: &BitSet, iter: impl Iterator) -> usize { + let mut colors: Vec>> = vec![None; self.len()]; + let mut col_weights = vec![]; + let mut num_col = 0; + + for v in iter { + let mut v_col = Vec::new(); + let mut used = vec![0; num_col]; + + // Find colors used in neighborhood of v + for u in subgraph.intersection(&self.graph[v]) { + let Some(u_col) = &colors[u] else { + continue; + }; + + for c in u_col { + used[*c] = 1; + } + } + + let mut total_weight = 0; + let v_val = self.weights[v]; + // Find colors to give to v + for c in 0..num_col { + if used[c] == 1 { + continue; + } + + v_col.push(c); + total_weight += col_weights[c]; + + if total_weight >= v_val { + break; + } + } + + if total_weight == 0 { + v_col.push(num_col); + col_weights.push(v_val); + num_col += 1 + } + else if total_weight < v_val { + let mut k = num_col - 1; + while used[k] == 1 { + k -= 1 + } + col_weights[k] += v_val - total_weight + } + + colors[v] = Some(v_col); + }; + + col_weights.iter().sum() + } + + pub fn frag_bound(&self, subgraph: &BitSet, fragments: &[BitSet]) -> usize { + let total_bonds = fragments.iter().map(|x| x.len()).sum::(); + let mut bound = 0; + let sizes = { + let mut vec = vec![]; + let mut prev = 0; + for s in subgraph.iter().map(|v| self.weights[v] + 1) { + if s != prev { + vec.push(s); + prev = s; + } + } + + vec + }; + + for i in sizes { + let mut bound_temp = 0; + let mut has_bonds = fragments.len(); + let mut num_bonds: Vec = fragments.iter().map(|x| x.len()).collect(); + let mut smallest_remove = i; + + for v in subgraph.iter() { + if has_bonds == 0 { + break; + } + if self.weights[v] + 1 > i { + continue; + } + + + let dup = &self.matches[v]; + let bond = dup.1.iter().next().unwrap(); + let mut j = 0; + while !fragments[j].contains(bond) { + j += 1; + } + + if num_bonds[j] > 0 { + let remove = std::cmp::min(dup.0.len(), num_bonds[j]); + bound_temp += 1; + num_bonds[j] -= remove; + smallest_remove = std::cmp::min(smallest_remove, remove); + + if num_bonds[j] == 0 { + has_bonds -= 1; + } + } + } + + let leftover = num_bonds.iter().sum::(); + let log = { + if leftover > 0 { + 0 + } + else { + (smallest_remove as f32).log2().ceil() as usize + } + }; + bound = std::cmp::max(bound, total_bonds - bound_temp - leftover - log); + } + + bound + } + +} \ No newline at end of file From 62f595aaabe952468331c4ac14c5a64c2bb32e47 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 10:56:08 -0700 Subject: [PATCH 02/11] Make search clique only --- src/assembly.rs | 60 +++++++++---------------------------------------- 1 file changed, 10 insertions(+), 50 deletions(-) diff --git a/src/assembly.rs b/src/assembly.rs index 69380033..2f91f9db 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -192,14 +192,13 @@ fn fractures( fn recurse_index_search_serial( mol: &Molecule, matches: &[(BitSet, BitSet)], - matches_index: usize, fragments: &[BitSet], state_index: usize, mut best_index: usize, largest_remove: usize, bounds: &[Bound], - matches_graph: Option<&CompatGraph>, - subgraph: Option, + matches_graph: &CompatGraph, + subgraph: BitSet, ) -> (usize, usize) { // If any bounds are exceeded, halt this search branch. if bound_exceeded( @@ -213,15 +212,6 @@ fn recurse_index_search_serial( return (state_index, 1); } - let nodes = { - if let Some(s) = &subgraph { - s.iter().collect::>() - } - else { - (matches_index..matches.len()).collect::>() - } - }; - // Keep track of the best assembly index found in any of this assembly // state's children and the number of states searched, including this one. let mut best_child_index = state_index; @@ -230,23 +220,15 @@ fn recurse_index_search_serial( // For every pair of duplicatable subgraphs compatible with the current set // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. - for v in nodes { + for v in subgraph.iter() { let (h1, h2) = &matches[v]; if let Some(fractures) = fractures(mol, fragments, h1, h2) { - let subgraph_clone = { - if let (Some(g), Some(s)) = (matches_graph, &subgraph) { - Some(g.forward_neighbors(v, &s)) - } - else { - None - } - }; + let subgraph_clone = matches_graph.forward_neighbors(v, &subgraph); // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_serial( mol, &matches, - v + 1, &fractures, state_index - h1.len() + 1, best_index, @@ -545,43 +527,21 @@ pub fn index_search( // Search for the shortest assembly pathway recursively. let (index, states_searched) = match parallel_mode { ParallelMode::None => { - let clique_bounds = vec![Bound::CoverNoSort, Bound::CoverSort, Bound::CliqueBudget]; - let use_clique = kernel_mode != KernelMode::None || bounds.iter().any(|b| clique_bounds.contains(b)); - let graph; - - let matches_graph = { - if use_clique { - graph = CompatGraph::new(&matches); - Some(&graph) - } - else { - None - } - }; - - let subgraph = { - if use_clique { - let mut temp = BitSet::with_capacity(matches.len()); - for i in 0..matches.len() { - temp.insert(i); - } - Some(temp) - } - else { - None - } - }; + let matches_graph = CompatGraph::new(&matches); + let mut subgraph = BitSet::with_capacity(matches.len()); + for i in 0..matches.len() { + subgraph.insert(i); + } recurse_index_search_serial( mol, &matches, - 0, &[init], edge_count - 1, edge_count - 1, edge_count, bounds, - matches_graph, + &matches_graph, subgraph, ) } From b7a5703a32b8a32151abd494a9279dbf73e86adf Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 11:25:18 -0700 Subject: [PATCH 03/11] Smarter largest removed calculation --- src/assembly.rs | 11 ++++++++--- src/reductions.rs | 4 ++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/assembly.rs b/src/assembly.rs index 2f91f9db..52167b73 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -195,11 +195,18 @@ fn recurse_index_search_serial( fragments: &[BitSet], state_index: usize, mut best_index: usize, - largest_remove: usize, bounds: &[Bound], matches_graph: &CompatGraph, subgraph: BitSet, ) -> (usize, usize) { + let largest_remove = { + if let Some(v) = subgraph.iter().next() { + matches_graph.weight(v) + 1 + } + else { + return state_index; + } + }; // If any bounds are exceeded, halt this search branch. if bound_exceeded( mol, @@ -232,7 +239,6 @@ fn recurse_index_search_serial( &fractures, state_index - h1.len() + 1, best_index, - h1.len(), bounds, matches_graph, subgraph_clone, @@ -539,7 +545,6 @@ pub fn index_search( &[init], edge_count - 1, edge_count - 1, - edge_count, bounds, &matches_graph, subgraph, diff --git a/src/reductions.rs b/src/reductions.rs index 3f1a768b..8e6d6164 100644 --- a/src/reductions.rs +++ b/src/reductions.rs @@ -44,6 +44,10 @@ impl CompatGraph { } } + pub fn weight(&self, v: usize) -> usize { + self.weights[v] + } + /*pub fn savings_ground_truth(&self, subgraph: &BitSet) -> usize { self.savings_ground_truth_recurse(0, 0, subgraph) } From 8b67dbb1ddc6714cf7bbe376ee1e574451b9752a Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 11:31:30 -0700 Subject: [PATCH 04/11] Store matches in CompatGraph --- src/assembly.rs | 11 +++++------ src/reductions.rs | 16 ++++++++++------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/assembly.rs b/src/assembly.rs index 52167b73..c2c4d0c5 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -191,7 +191,6 @@ fn fractures( #[allow(clippy::too_many_arguments)] fn recurse_index_search_serial( mol: &Molecule, - matches: &[(BitSet, BitSet)], fragments: &[BitSet], state_index: usize, mut best_index: usize, @@ -228,7 +227,7 @@ fn recurse_index_search_serial( // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. for v in subgraph.iter() { - let (h1, h2) = &matches[v]; + let (h1, h2) = matches_graph.matches(v); if let Some(fractures) = fractures(mol, fragments, h1, h2) { let subgraph_clone = matches_graph.forward_neighbors(v, &subgraph); @@ -533,15 +532,15 @@ pub fn index_search( // Search for the shortest assembly pathway recursively. let (index, states_searched) = match parallel_mode { ParallelMode::None => { - let matches_graph = CompatGraph::new(&matches); - let mut subgraph = BitSet::with_capacity(matches.len()); - for i in 0..matches.len() { + // TODO: remove matches.clone() call + let matches_graph = CompatGraph::new(matches.clone()); + let mut subgraph = BitSet::with_capacity(matches_graph.len()); + for i in 0..matches_graph.len() { subgraph.insert(i); } recurse_index_search_serial( mol, - &matches, &[init], edge_count - 1, edge_count - 1, diff --git a/src/reductions.rs b/src/reductions.rs index 8e6d6164..79a822d8 100644 --- a/src/reductions.rs +++ b/src/reductions.rs @@ -7,7 +7,7 @@ pub struct CompatGraph { } impl CompatGraph { - pub fn new(init_matches: &Vec<(BitSet, BitSet)>) -> Self{ + pub fn new(init_matches: Vec<(BitSet, BitSet)>) -> Self { let size = init_matches.len(); // Initialize weights and empty graph @@ -40,7 +40,7 @@ impl CompatGraph { Self { graph: init_graph, weights: init_weights, - matches: Vec::new(), + matches: init_matches, } } @@ -48,6 +48,14 @@ impl CompatGraph { self.weights[v] } + pub fn matches(&self, v: usize) -> &(BitSet, BitSet) { + &self.matches[v] + } + + pub fn len(&self) -> usize { + self.graph.len() + } + /*pub fn savings_ground_truth(&self, subgraph: &BitSet) -> usize { self.savings_ground_truth_recurse(0, 0, subgraph) } @@ -83,10 +91,6 @@ impl CompatGraph { cx }*/ - pub fn len(&self) -> usize { - self.graph.len() - } - pub fn degree(&self, v: usize, subgraph: &BitSet) -> usize { self.graph[v].intersection(subgraph).count() } From 1fc0e264378e54c98dc9169382b0407b46fa6e32 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 12:26:14 -0700 Subject: [PATCH 05/11] Fix bugs from merging --- src/assembly.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/assembly.rs b/src/assembly.rs index c2c4d0c5..b605fd2d 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -203,7 +203,7 @@ fn recurse_index_search_serial( matches_graph.weight(v) + 1 } else { - return state_index; + return (state_index, 1); } }; // If any bounds are exceeded, halt this search branch. @@ -234,7 +234,6 @@ fn recurse_index_search_serial( // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_serial( mol, - &matches, &fractures, state_index - h1.len() + 1, best_index, From 12a47b6b127425bb020eab21578c6710e14b1e17 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 12:32:41 -0700 Subject: [PATCH 06/11] Refactor clique bounds --- src/bounds.rs | 213 ++++++++++++++++++++++++++++++++++++++--- src/reductions.rs | 238 ++++------------------------------------------ 2 files changed, 222 insertions(+), 229 deletions(-) diff --git a/src/bounds.rs b/src/bounds.rs index c3b516e6..3c3bc6bc 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -1,10 +1,12 @@ -//! Bounds for identifying assembly states (i.e., collections of fragments) -//! from which no further assembly index improvements can be found. +//! TODO use bit_set::BitSet; use clap::ValueEnum; -use crate::molecule::{Bond, Element, Molecule}; +use crate::{ + molecule::{Bond, Element, Molecule}, + reductions::{CompatGraph}, +}; /// Bounding strategies for the search phase of assembly index calcluation. #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] @@ -39,20 +41,18 @@ struct EdgeType { pub fn bound_exceeded( mol: &Molecule, fragments: &[BitSet], - state_index: usize, - best_index: usize, + ix: usize, + best: usize, largest_remove: usize, bounds: &[Bound], ) -> bool { for bound_type in bounds { let exceeds = match bound_type { - Bound::Log => state_index - log_bound(fragments) >= best_index, - Bound::Int => state_index - int_bound(fragments, largest_remove) >= best_index, - Bound::VecSimple => { - state_index - vec_simple_bound(fragments, largest_remove, mol) >= best_index - } + Bound::Log => ix - log_bound(fragments) >= best, + Bound::Int => ix - int_bound(fragments, largest_remove) >= best, + Bound::VecSimple => ix - vec_simple_bound(fragments, largest_remove, mol) >= best, Bound::VecSmallFrags => { - state_index - vec_small_frags_bound(fragments, largest_remove, mol) >= best_index + ix - vec_small_frags_bound(fragments, largest_remove, mol) >= best } _ => { panic!("One of the chosen bounds is not implemented yet!") @@ -211,3 +211,194 @@ pub fn vec_small_frags_bound(fragments: &[BitSet], m: usize, mol: &Molecule) -> s - (z + size_two_types.len() + size_two_fragments.len()) - ((sl - z) as f32 / m as f32).ceil() as usize } + +pub fn remaining_weight_bound(graph: &CompatGraph, subgraph: &BitSet) -> usize { + let deg_sum = subgraph.iter().map(|v| graph.degree(v, subgraph)).sum::() as f32; + let max_clique = ((1_f32 + (4_f32 * deg_sum + 1_f32).sqrt()) / 2_f32).floor() as usize; + let mut sum = 0; + let mut iter = subgraph.iter(); + for _ in 0..max_clique { + sum += graph.weight(iter.next().unwrap()); + }; + + sum +} + +pub fn color_bound(graph: &CompatGraph, subgraph: &BitSet) -> usize{ + // Greedy coloring + let mut colors: Vec = vec![-1; graph.len()]; + let mut num_colors = 0; + let mut largest: Vec = Vec::new(); + + + for v in (0..graph.len()).rev() { + if !subgraph.contains(v) { + continue; + } + + let mut used: Vec = vec![0; num_colors]; + + for u in subgraph.intersection(graph.compatible_with(v)) { + if colors[u] != -1 { + used[colors[u] as usize] = 1; + } + } + + let mut max = 0; + let mut max_idx = num_colors; + for i in 0..num_colors { + if used[i] == 0 && largest[i] > max { + max = largest[i]; + max_idx = i; + } + } + + if max_idx == num_colors { + num_colors += 1; + largest.push(0); + } + if graph.weight(v) > largest[max_idx] { + largest[max_idx] = graph.weights(v) + } + + colors[v] = max_idx as i32; + } + + largest.iter().sum::() +} + +pub fn cover_bound(graph: &CompatGraph, subgraph: &BitSet, sort: bool) -> usize { + // Sort vertices + if sort { + let mut vertices: Vec<(usize, usize)> = Vec::with_capacity(subgraph.len()); + for v in subgraph { + vertices.push((v, graph.degree(v, subgraph))); + } + vertices.sort_by(|a, b| b.1.cmp(&a.1)); + cover_bound_helper(graph, subgraph, vertices.iter().map(|(v, _)| *v)) + } + else { + let vertices = (0..graph.len()).rev().filter(|v| subgraph.contains(*v)); + cover_bound_helper(graph, subgraph, vertices) + } +} + +fn cover_bound_helper(graph: &CompatGraph, subgraph: &BitSet, iter: impl Iterator) -> usize { + let mut colors: Vec>> = vec![None; graph.len()]; + let mut col_weights = vec![]; + let mut num_col = 0; + + for v in iter { + let mut v_col = Vec::new(); + let mut used = vec![0; num_col]; + + // Find colors used in neighborhood of v + for u in subgraph.intersection(graph.compatible_with(v)) { + let Some(u_col) = &colors[u] else { + continue; + }; + + for c in u_col { + used[*c] = 1; + } + } + + let mut total_weight = 0; + let v_val = graph.weight(v); + // Find colors to give to v + for c in 0..num_col { + if used[c] == 1 { + continue; + } + + v_col.push(c); + total_weight += col_weights[c]; + + if total_weight >= v_val { + break; + } + } + + if total_weight == 0 { + v_col.push(num_col); + col_weights.push(v_val); + num_col += 1 + } + else if total_weight < v_val { + let mut k = num_col - 1; + while used[k] == 1 { + k -= 1 + } + col_weights[k] += v_val - total_weight + } + + colors[v] = Some(v_col); + }; + + col_weights.iter().sum() +} + +pub fn frag_bound(graph: &CompatGraph, subgraph: &BitSet, fragments: &[BitSet]) -> usize { + let total_bonds = fragments.iter().map(|x| x.len()).sum::(); + let mut bound = 0; + let sizes = { + let mut vec = vec![]; + let mut prev = 0; + for s in subgraph.iter().map(|v| graph.weight(v) + 1) { + if s != prev { + vec.push(s); + prev = s; + } + } + + vec + }; + + for i in sizes { + let mut bound_temp = 0; + let mut has_bonds = fragments.len(); + let mut num_bonds: Vec = fragments.iter().map(|x| x.len()).collect(); + let mut smallest_remove = i; + + for v in subgraph.iter() { + if has_bonds == 0 { + break; + } + if graph.weight(v) + 1 > i { + continue; + } + + + let dup = &graph.matches(v); + let bond = dup.1.iter().next().unwrap(); + let mut j = 0; + while !fragments[j].contains(bond) { + j += 1; + } + + if num_bonds[j] > 0 { + let remove = std::cmp::min(dup.0.len(), num_bonds[j]); + bound_temp += 1; + num_bonds[j] -= remove; + smallest_remove = std::cmp::min(smallest_remove, remove); + + if num_bonds[j] == 0 { + has_bonds -= 1; + } + } + } + + let leftover = num_bonds.iter().sum::(); + let log = { + if leftover > 0 { + 0 + } + else { + (smallest_remove as f32).log2().ceil() as usize + } + }; + bound = std::cmp::max(bound, total_bonds - bound_temp - leftover - log); + } + + bound +} diff --git a/src/reductions.rs b/src/reductions.rs index 79a822d8..aa4ee875 100644 --- a/src/reductions.rs +++ b/src/reductions.rs @@ -56,45 +56,14 @@ impl CompatGraph { self.graph.len() } - /*pub fn savings_ground_truth(&self, subgraph: &BitSet) -> usize { - self.savings_ground_truth_recurse(0, 0, subgraph) - } - - fn savings_ground_truth_recurse(&self, ix: usize, mut best: usize, subgraph: &BitSet) -> usize { - if subgraph.len() == 0 { - return ix; - } - let mut cx = ix; - - /*if ix + subgraph.iter().count() <= best && ix + self.remaining_weight_bound(&subgraph) <= best { - return ix; - }*/ - /*if ix + self.color_bound(&subgraph) <= best { - return ix; - }*/ - /*if ix + self.cover_bound(&subgraph, true) <= best{ - return ix; - }*/ - - // Search for duplicatable fragment - for v in subgraph.iter() { - let subgraph_clone = self.forward_neighbors(v, &subgraph); - - cx = cx.max(self.savings_ground_truth_recurse( - ix + self.weights[v], - best, - &subgraph_clone, - )); - best = best.max(cx); - } - - cx - }*/ - pub fn degree(&self, v: usize, subgraph: &BitSet) -> usize { self.graph[v].intersection(subgraph).count() } + pub fn compatible_with(&self, v: usize) -> &BitSet { + &self.graph[v] + } + pub fn neighbors(&self, v: usize, subgraph: &BitSet) -> BitSet { let mut neighbors = self.graph[v].clone(); neighbors.intersect_with(subgraph); @@ -125,195 +94,28 @@ impl CompatGraph { self.graph[v].contains(u) } - pub fn remaining_weight_bound(&self, subgraph: &BitSet) -> usize { - let deg_sum = subgraph.iter().map(|v| self.degree(v, subgraph)).sum::() as f32; - let max_clique = ((1_f32 + (4_f32 * deg_sum + 1_f32).sqrt()) / 2_f32).floor() as usize; - let mut sum = 0; - let mut iter = subgraph.iter(); - for _ in 0..max_clique { - sum += self.weights[iter.next().unwrap()]; - }; - - sum - } - - pub fn color_bound(&self, subgraph: &BitSet) -> usize{ - // Greedy coloring - let mut colors: Vec = vec![-1; self.len()]; - let mut num_colors = 0; - let mut largest: Vec = Vec::new(); - - - for v in (0..self.matches.len()).rev() { - if !subgraph.contains(v) { - continue; - } - - let mut used: Vec = vec![0; num_colors]; - - for u in subgraph.intersection(&self.graph[v]) { - if colors[u] != -1 { - used[colors[u] as usize] = 1; - } - } - - let mut max = 0; - let mut max_idx = num_colors; - for i in 0..num_colors { - if used[i] == 0 && largest[i] > max { - max = largest[i]; - max_idx = i; - } - } - - if max_idx == num_colors { - num_colors += 1; - largest.push(0); - } - if self.weights[v] > largest[max_idx] { - largest[max_idx] = self.weights[v] - } - - colors[v] = max_idx as i32; - } - - largest.iter().sum::() + pub fn savings_ground_truth(&self, subgraph: &BitSet) -> usize { + self.savings_ground_truth_recurse(0, 0, subgraph) } - pub fn cover_bound(&self, subgraph: &BitSet, sort: bool) -> usize { - // Sort vertices - if sort { - let mut vertices: Vec<(usize, usize)> = Vec::with_capacity(subgraph.len()); - for v in subgraph { - vertices.push((v, self.degree(v, subgraph))); - } - vertices.sort_by(|a, b| b.1.cmp(&a.1)); - self.cover_bound_helper(subgraph, vertices.iter().map(|(v, _)| *v)) - } - else { - let vertices = (0..self.matches.len()).rev().filter(|v| subgraph.contains(*v)); - self.cover_bound_helper(subgraph, vertices) + fn savings_ground_truth_recurse(&self, ix: usize, mut best: usize, subgraph: &BitSet) -> usize { + if subgraph.len() == 0 { + return ix; } - } - - fn cover_bound_helper(&self, subgraph: &BitSet, iter: impl Iterator) -> usize { - let mut colors: Vec>> = vec![None; self.len()]; - let mut col_weights = vec![]; - let mut num_col = 0; - - for v in iter { - let mut v_col = Vec::new(); - let mut used = vec![0; num_col]; - - // Find colors used in neighborhood of v - for u in subgraph.intersection(&self.graph[v]) { - let Some(u_col) = &colors[u] else { - continue; - }; - - for c in u_col { - used[*c] = 1; - } - } - - let mut total_weight = 0; - let v_val = self.weights[v]; - // Find colors to give to v - for c in 0..num_col { - if used[c] == 1 { - continue; - } - - v_col.push(c); - total_weight += col_weights[c]; - - if total_weight >= v_val { - break; - } - } - - if total_weight == 0 { - v_col.push(num_col); - col_weights.push(v_val); - num_col += 1 - } - else if total_weight < v_val { - let mut k = num_col - 1; - while used[k] == 1 { - k -= 1 - } - col_weights[k] += v_val - total_weight - } - - colors[v] = Some(v_col); - }; - - col_weights.iter().sum() - } - - pub fn frag_bound(&self, subgraph: &BitSet, fragments: &[BitSet]) -> usize { - let total_bonds = fragments.iter().map(|x| x.len()).sum::(); - let mut bound = 0; - let sizes = { - let mut vec = vec![]; - let mut prev = 0; - for s in subgraph.iter().map(|v| self.weights[v] + 1) { - if s != prev { - vec.push(s); - prev = s; - } - } - - vec - }; - - for i in sizes { - let mut bound_temp = 0; - let mut has_bonds = fragments.len(); - let mut num_bonds: Vec = fragments.iter().map(|x| x.len()).collect(); - let mut smallest_remove = i; - - for v in subgraph.iter() { - if has_bonds == 0 { - break; - } - if self.weights[v] + 1 > i { - continue; - } - - - let dup = &self.matches[v]; - let bond = dup.1.iter().next().unwrap(); - let mut j = 0; - while !fragments[j].contains(bond) { - j += 1; - } - - if num_bonds[j] > 0 { - let remove = std::cmp::min(dup.0.len(), num_bonds[j]); - bound_temp += 1; - num_bonds[j] -= remove; - smallest_remove = std::cmp::min(smallest_remove, remove); + let mut cx = ix; - if num_bonds[j] == 0 { - has_bonds -= 1; - } - } - } + // Search for duplicatable fragment + for v in subgraph.iter() { + let subgraph_clone = self.forward_neighbors(v, &subgraph); - let leftover = num_bonds.iter().sum::(); - let log = { - if leftover > 0 { - 0 - } - else { - (smallest_remove as f32).log2().ceil() as usize - } - }; - bound = std::cmp::max(bound, total_bonds - bound_temp - leftover - log); + cx = cx.max(self.savings_ground_truth_recurse( + ix + self.weights[v], + best, + &subgraph_clone, + )); + best = best.max(cx); } - bound + cx } - } \ No newline at end of file From f7503a370235c531b3bbb094869b8a11ff66fff8 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 12:40:58 -0700 Subject: [PATCH 07/11] Link new clique bounds to CLI params --- src/assembly.rs | 2 ++ src/bounds.rs | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/assembly.rs b/src/assembly.rs index b605fd2d..3182fff3 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -214,6 +214,8 @@ fn recurse_index_search_serial( best_index, largest_remove, bounds, + matches_graph, + &subgraph, ) { return (state_index, 1); } diff --git a/src/bounds.rs b/src/bounds.rs index 3c3bc6bc..cb562016 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -45,6 +45,8 @@ pub fn bound_exceeded( best: usize, largest_remove: usize, bounds: &[Bound], + matches_graph: &CompatGraph, + subgraph: &BitSet, ) -> bool { for bound_type in bounds { let exceeds = match bound_type { @@ -54,6 +56,9 @@ pub fn bound_exceeded( Bound::VecSmallFrags => { ix - vec_small_frags_bound(fragments, largest_remove, mol) >= best } + Bound::CliqueBudget => ix - clique_budget_bound(matches_graph, subgraph, fragments) >= best, + Bound::CoverNoSort => ix - cover_bound(matches_graph, subgraph, false), + Bound::CoverSort => ix - cover_bound(matches_graph, subgraph, true), _ => { panic!("One of the chosen bounds is not implemented yet!") } @@ -338,7 +343,7 @@ fn cover_bound_helper(graph: &CompatGraph, subgraph: &BitSet, iter: impl Iterato col_weights.iter().sum() } -pub fn frag_bound(graph: &CompatGraph, subgraph: &BitSet, fragments: &[BitSet]) -> usize { +pub fn clique_budget_bound(graph: &CompatGraph, subgraph: &BitSet, fragments: &[BitSet]) -> usize { let total_bonds = fragments.iter().map(|x| x.len()).sum::(); let mut bound = 0; let sizes = { From 306b8e50cb563db9f369f34737e509794d74b532 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 13:01:12 -0700 Subject: [PATCH 08/11] Make parallel depthone use clique --- src/assembly.rs | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/assembly.rs b/src/assembly.rs index 3182fff3..76e7cfbe 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -275,6 +275,8 @@ fn recurse_index_search_depthone( state_index: usize, best_index: Arc, bounds: &[Bound], + matches_graph: &CompatGraph, + subgraph: BitSet, ) -> (usize, usize) { // Keep track of the number of states searched, including this one. let states_searched = Arc::new(AtomicUsize::from(1)); @@ -282,8 +284,11 @@ fn recurse_index_search_depthone( // For every pair of duplicatable subgraphs compatible with the current set // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. - matches.par_iter().enumerate().for_each(|(i, (h1, h2))| { + subgraph.par_iter().for_each(|v| { + let (h1, h2) = matches_graph.matches(v); if let Some(fractures) = fractures(mol, fragments, h1, h2) { + let subgraph_clone = matches_graph.forward_neighbors(v, &subgraph); + // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_depthone_helper( mol, @@ -291,8 +296,9 @@ fn recurse_index_search_depthone( &fractures, state_index - h1.len() + 1, best_index.clone(), - h1.len(), bounds, + matches_graph, + subgraph_clone, ); // Update the best assembly indices (across children states and @@ -327,9 +333,19 @@ fn recurse_index_search_depthone_helper( fragments: &[BitSet], state_index: usize, best_index: Arc, - largest_remove: usize, bounds: &[Bound], + matches_graph: &CompatGraph, + subgraph: BitSet, ) -> (usize, usize) { + let largest_remove = { + if let Some(v) = subgraph.iter().next() { + matches_graph.weight(v) + 1 + } + else { + return (state_index, 1); + } + }; + // If any bounds are exceeded, halt this search branch. if bound_exceeded( mol, @@ -338,6 +354,8 @@ fn recurse_index_search_depthone_helper( best_index.load(Relaxed), largest_remove, bounds, + matches_graph, + &subgraph, ) { return (state_index, 1); } @@ -350,7 +368,7 @@ fn recurse_index_search_depthone_helper( // For every pair of duplicatable subgraphs compatible with the current set // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. - for (i, (h1, h2)) in matches.iter().enumerate() { + for v in subgraph.iter() { if let Some(fractures) = fractures(mol, fragments, h1, h2) { // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_depthone_helper( @@ -359,8 +377,9 @@ fn recurse_index_search_depthone_helper( &fractures, state_index - h1.len() + 1, best_index.clone(), - h1.len(), bounds, + matches_graph, + subgraph_clone, ); // Update the best assembly indices (across children states and @@ -523,6 +542,11 @@ pub fn index_search( // Enumerate non-overlapping isomorphic subgraph pairs. let matches = matches(mol, enumerate_mode, canonize_mode).collect::>(); + let matches_graph = CompatGraph::new(matches); + let mut subgraph = BitSet::with_capacity(matches_graph.len()); + for i in 0..matches_graph.len() { + subgraph.insert(i); + } // Initialize the first fragment as the entire graph. let mut init = BitSet::new(); @@ -534,11 +558,6 @@ pub fn index_search( let (index, states_searched) = match parallel_mode { ParallelMode::None => { // TODO: remove matches.clone() call - let matches_graph = CompatGraph::new(matches.clone()); - let mut subgraph = BitSet::with_capacity(matches_graph.len()); - for i in 0..matches_graph.len() { - subgraph.insert(i); - } recurse_index_search_serial( mol, @@ -559,6 +578,8 @@ pub fn index_search( edge_count - 1, best_index, bounds, + &matches_graph, + subgraph, ) } ParallelMode::Always => { @@ -575,7 +596,7 @@ pub fn index_search( } }; - (index as u32, matches.len() as u32, states_searched) + (index as u32, matches_graph.len() as u32, states_searched) } /// Computes a molecule's assembly index using an efficient default strategy. From 88132101ca9284c16ffbe14afe2bfe2c88953f06 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 13:28:22 -0700 Subject: [PATCH 09/11] Make always parallel mode use clique --- src/assembly.rs | 32 +++++++++++++++++++------------- src/bounds.rs | 9 +++------ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/assembly.rs b/src/assembly.rs index 76e7cfbe..79d22ed0 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -28,7 +28,7 @@ use std::{ use bit_set::BitSet; use clap::ValueEnum; -use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use crate::{ bounds::{bound_exceeded, Bound}, @@ -270,7 +270,6 @@ fn recurse_index_search_serial( #[allow(clippy::too_many_arguments)] fn recurse_index_search_depthone( mol: &Molecule, - matches: &[(BitSet, BitSet)], fragments: &[BitSet], state_index: usize, best_index: Arc, @@ -284,15 +283,14 @@ fn recurse_index_search_depthone( // For every pair of duplicatable subgraphs compatible with the current set // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. - subgraph.par_iter().for_each(|v| { - let (h1, h2) = matches_graph.matches(v); + subgraph.iter().collect::>().par_iter().for_each(|v| { + let (h1, h2) = matches_graph.matches(*v); if let Some(fractures) = fractures(mol, fragments, h1, h2) { - let subgraph_clone = matches_graph.forward_neighbors(v, &subgraph); + let subgraph_clone = matches_graph.forward_neighbors(*v, &subgraph); // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_depthone_helper( mol, - &matches[i + 1..], &fractures, state_index - h1.len() + 1, best_index.clone(), @@ -329,7 +327,6 @@ fn recurse_index_search_depthone( #[allow(clippy::too_many_arguments)] fn recurse_index_search_depthone_helper( mol: &Molecule, - matches: &[(BitSet, BitSet)], fragments: &[BitSet], state_index: usize, best_index: Arc, @@ -369,11 +366,13 @@ fn recurse_index_search_depthone_helper( // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. for v in subgraph.iter() { + let (h1, h2) = matches_graph.matches(v); if let Some(fractures) = fractures(mol, fragments, h1, h2) { + let subgraph_clone = matches_graph.forward_neighbors(v, &subgraph); + // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_depthone_helper( mol, - &matches[i + 1..], &fractures, state_index - h1.len() + 1, best_index.clone(), @@ -410,12 +409,13 @@ fn recurse_index_search_depthone_helper( #[allow(clippy::too_many_arguments)] fn recurse_index_search_parallel( mol: &Molecule, - matches: &[(BitSet, BitSet)], fragments: &[BitSet], state_index: usize, best_index: Arc, largest_remove: usize, bounds: &[Bound], + matches_graph: &CompatGraph, + subgraph: BitSet, ) -> (usize, usize) { // If any bounds are exceeded, halt this search branch. if bound_exceeded( @@ -425,6 +425,8 @@ fn recurse_index_search_parallel( best_index.load(Relaxed), largest_remove, bounds, + matches_graph, + &subgraph, ) { return (state_index, 1); } @@ -437,17 +439,21 @@ fn recurse_index_search_parallel( // For every pair of duplicatable subgraphs compatible with the current set // of fragments, recurse using the fragments obtained by removing this pair // and adding one subgraph back. - matches.par_iter().enumerate().for_each(|(i, (h1, h2))| { + subgraph.iter().collect::>().par_iter().for_each(|v| { + let (h1, h2) = matches_graph.matches(*v); if let Some(fractures) = fractures(mol, fragments, h1, h2) { + let subgraph_clone = matches_graph.forward_neighbors(*v, &subgraph); + // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_parallel( mol, - &matches[i + 1..], &fractures, state_index - h1.len() + 1, best_index.clone(), h1.len(), bounds, + matches_graph, + subgraph_clone, ); // Update the best assembly indices (across children states and @@ -573,7 +579,6 @@ pub fn index_search( let best_index = Arc::new(AtomicUsize::from(edge_count - 1)); recurse_index_search_depthone( mol, - &matches, &[init], edge_count - 1, best_index, @@ -586,12 +591,13 @@ pub fn index_search( let best_index = Arc::new(AtomicUsize::from(edge_count - 1)); recurse_index_search_parallel( mol, - &matches, &[init], edge_count - 1, best_index, edge_count, bounds, + &matches_graph, + subgraph, ) } }; diff --git a/src/bounds.rs b/src/bounds.rs index cb562016..c694ef6e 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -57,11 +57,8 @@ pub fn bound_exceeded( ix - vec_small_frags_bound(fragments, largest_remove, mol) >= best } Bound::CliqueBudget => ix - clique_budget_bound(matches_graph, subgraph, fragments) >= best, - Bound::CoverNoSort => ix - cover_bound(matches_graph, subgraph, false), - Bound::CoverSort => ix - cover_bound(matches_graph, subgraph, true), - _ => { - panic!("One of the chosen bounds is not implemented yet!") - } + Bound::CoverNoSort => ix - cover_bound(matches_graph, subgraph, false) >= best, + Bound::CoverSort => ix - cover_bound(matches_graph, subgraph, true) >= best, }; if exceeds { return true; @@ -263,7 +260,7 @@ pub fn color_bound(graph: &CompatGraph, subgraph: &BitSet) -> usize{ largest.push(0); } if graph.weight(v) > largest[max_idx] { - largest[max_idx] = graph.weights(v) + largest[max_idx] = graph.weight(v); } colors[v] = max_idx as i32; From b4894206973f87f15b70424b881ee3a7da1396f6 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 13:55:57 -0700 Subject: [PATCH 10/11] Remove parallel depth one helper --- src/assembly.rs | 101 +++++------------------------------------------- 1 file changed, 9 insertions(+), 92 deletions(-) diff --git a/src/assembly.rs b/src/assembly.rs index 79d22ed0..31b15275 100644 --- a/src/assembly.rs +++ b/src/assembly.rs @@ -193,7 +193,7 @@ fn recurse_index_search_serial( mol: &Molecule, fragments: &[BitSet], state_index: usize, - mut best_index: usize, + best_index: Arc, bounds: &[Bound], matches_graph: &CompatGraph, subgraph: BitSet, @@ -206,12 +206,13 @@ fn recurse_index_search_serial( return (state_index, 1); } }; + // If any bounds are exceeded, halt this search branch. if bound_exceeded( mol, fragments, state_index, - best_index, + best_index.load(Relaxed), largest_remove, bounds, matches_graph, @@ -232,13 +233,13 @@ fn recurse_index_search_serial( let (h1, h2) = matches_graph.matches(v); if let Some(fractures) = fractures(mol, fragments, h1, h2) { let subgraph_clone = matches_graph.forward_neighbors(v, &subgraph); - + // Recurse using the remaining matches and updated fragments. let (child_index, child_states_searched) = recurse_index_search_serial( mol, &fractures, state_index - h1.len() + 1, - best_index, + best_index.clone(), bounds, matches_graph, subgraph_clone, @@ -247,14 +248,13 @@ fn recurse_index_search_serial( // Update the best assembly indices (across children states and // the entire search) and the number of descendant states searched. best_child_index = best_child_index.min(child_index); - best_index = best_index.min(best_child_index); + best_index.fetch_min(best_child_index, Relaxed); states_searched += child_states_searched; } } (best_child_index, states_searched) } - /// Recursive helper for the depth-one parallel version of index_search. /// /// Inputs: @@ -289,7 +289,7 @@ fn recurse_index_search_depthone( let subgraph_clone = matches_graph.forward_neighbors(*v, &subgraph); // Recurse using the remaining matches and updated fragments. - let (child_index, child_states_searched) = recurse_index_search_depthone_helper( + let (child_index, child_states_searched) = recurse_index_search_serial( mol, &fractures, state_index - h1.len() + 1, @@ -309,88 +309,6 @@ fn recurse_index_search_depthone( (best_index.load(Relaxed), states_searched.load(Relaxed)) } -/// Recursive helper for the depth-one parallel version of index_search. -/// -/// Inputs: -/// - `mol`: The molecule whose assembly index is being calculated. -/// - `matches`: The remaining non-overlapping isomorphic subgraph pairs. -/// - `fragments`: TODO -/// - `state_index`: The assembly index of this assembly state. -/// - `best_index`: The smallest assembly index for all assembly states so far. -/// - `largest_remove`: An upper bound on the size of fragments that can be -/// removed from this or any descendant state. -/// - `bounds`: The list of bounding strategies to apply. -/// -/// Returns, from this assembly state and any of its descendents: -/// - `usize`: The best assembly index found. -/// - `usize`: The number of assembly states searched. -#[allow(clippy::too_many_arguments)] -fn recurse_index_search_depthone_helper( - mol: &Molecule, - fragments: &[BitSet], - state_index: usize, - best_index: Arc, - bounds: &[Bound], - matches_graph: &CompatGraph, - subgraph: BitSet, -) -> (usize, usize) { - let largest_remove = { - if let Some(v) = subgraph.iter().next() { - matches_graph.weight(v) + 1 - } - else { - return (state_index, 1); - } - }; - - // If any bounds are exceeded, halt this search branch. - if bound_exceeded( - mol, - fragments, - state_index, - best_index.load(Relaxed), - largest_remove, - bounds, - matches_graph, - &subgraph, - ) { - return (state_index, 1); - } - - // Keep track of the best assembly index found in any of this assembly - // state's children and the number of states searched, including this one. - let mut best_child_index = state_index; - let mut states_searched = 1; - - // For every pair of duplicatable subgraphs compatible with the current set - // of fragments, recurse using the fragments obtained by removing this pair - // and adding one subgraph back. - for v in subgraph.iter() { - let (h1, h2) = matches_graph.matches(v); - if let Some(fractures) = fractures(mol, fragments, h1, h2) { - let subgraph_clone = matches_graph.forward_neighbors(v, &subgraph); - - // Recurse using the remaining matches and updated fragments. - let (child_index, child_states_searched) = recurse_index_search_depthone_helper( - mol, - &fractures, - state_index - h1.len() + 1, - best_index.clone(), - bounds, - matches_graph, - subgraph_clone, - ); - - // Update the best assembly indices (across children states and - // the entire search) and the number of descendant states searched. - best_child_index = best_child_index.min(child_index); - best_index.fetch_min(best_child_index, Relaxed); - states_searched += child_states_searched; - } - } - - (best_child_index, states_searched) -} /// Recursive helper for the parallel version of index_search. /// @@ -563,13 +481,12 @@ pub fn index_search( // Search for the shortest assembly pathway recursively. let (index, states_searched) = match parallel_mode { ParallelMode::None => { - // TODO: remove matches.clone() call - + let best_index = Arc::new(AtomicUsize::from(edge_count - 1)); recurse_index_search_serial( mol, &[init], edge_count - 1, - edge_count - 1, + best_index, bounds, &matches_graph, subgraph, From 99a2d8dbe2c5bcc85f01998d6a0b562b0a30ff85 Mon Sep 17 00:00:00 2001 From: Garrett-Pz Date: Fri, 18 Jul 2025 14:57:45 -0700 Subject: [PATCH 11/11] Change default bounds --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 1f723c31..efecbb99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -94,7 +94,7 @@ fn main() -> Result<()> { // Handle bounding strategy CLI arguments. let boundlist: &[Bound] = match cli.boundsgroup { // By default, use a combination of the integer and vector bounds. - None => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags], + None => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags, Bound::CliqueBudget, Bound::CoverNoSort], // If --no-bounds is set, do not use any bounds. Some(BoundsGroup { no_bounds: true, ..