From d80d7341aee718851baf799cb2b54fa357a89af3 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Fri, 25 Jul 2025 15:34:59 -0700
Subject: [PATCH 01/51] Add clique reduction

---
 src/lib.rs        |  1 +
 src/reductions.rs | 96 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 src/reductions.rs
diff --git a/src/lib.rs b/src/lib.rs
index a8f8c8c2..4310afda 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -52,6 +52,7 @@ pub mod canonize;
 pub mod enumerate;
 pub mod kernels;
 pub mod memoize;
+pub mod reductions;
 mod vf3;
 
 // Python wrapper.
diff --git a/src/reductions.rs b/src/reductions.rs
new file mode 100644
index 00000000..5ed0da68
--- /dev/null
+++ b/src/reductions.rs
@@ -0,0 +1,96 @@
+use bit_set::BitSet;
+
+pub struct CompatGraph {
+    graph: Vec<BitSet>,
+    weights: Vec<usize>,
+    matches: Vec<(BitSet, BitSet)>
+}
+
+impl CompatGraph {
+    pub fn new(init_matches: Vec<(BitSet, BitSet)>) -> Self {
+        let size = init_matches.len();
+
+        // Initialize weights and empty graph
+        let mut init_graph: Vec<BitSet> = Vec::with_capacity(size);
+        let mut init_weights: Vec<usize> = Vec::with_capacity(size);
+        for m in init_matches.iter() {
+            init_graph.push(BitSet::with_capacity(size));
+            init_weights.push(m.0.len() - 1);
+        }
+
+        // Populate graph
+        for (idx1, (h1, h2)) in init_matches.iter().enumerate() {
+            for (idx2, (h1p, h2p)) in init_matches[idx1 + 1..].iter().enumerate() {
+                let idx2 = idx2 + idx1 + 1;
+
+                let forward_compatible = {
+                    h2.is_disjoint(h1p) && 
+                    h2.is_disjoint(h2p) &&
+                    (h1.is_disjoint(h1p) || h1.is_superset(h1p)) &&
+                    (h1.is_disjoint(h2p) || h1.is_superset(h2p))
+                };
+
+                if forward_compatible {
+                    init_graph[idx1].insert(idx2);
+                    init_graph[idx2].insert(idx1);
+                }
+            }
+        }
+
+        Self {
+            graph: init_graph,
+            weights: init_weights,
+            matches: init_matches,
+        }
+    }
+
+    pub fn weight(&self, v: usize) -> usize {
+        self.weights[v]
+    }
+
+    pub fn matches(&self, v: usize) -> &(BitSet, BitSet) {
+        &self.matches[v]
+    }
+
+    pub fn len(&self) -> usize {
+        self.graph.len()
+    }
+
+    pub fn degree(&self, v: usize, subgraph: &BitSet) -> usize {
+        self.graph[v].intersection(subgraph).count()
+    }
+
+    pub fn compatible_with(&self, v: usize) -> &BitSet {
+        &self.graph[v]
+    }
+
+    pub fn neighbors(&self, v: usize, subgraph: &BitSet) -> BitSet {
+        let mut neighbors = self.graph[v].clone();
+        neighbors.intersect_with(subgraph);
+        
+        neighbors
+    }
+
+    pub fn forward_neighbors(&self, v: usize, subgraph: &BitSet) -> BitSet {
+        let mut neighbors = self.graph[v].clone();
+        neighbors.intersect_with(subgraph);
+        let mut to_remove = vec![];
+        for u in neighbors.iter() {
+            if u <= v {
+                to_remove.push(u);
+            }
+            if u > v {
+                break;
+            }
+        }
+        for u in to_remove {
+            neighbors.remove(u);
+        }
+
+        neighbors
+    }
+
+    pub fn are_adjacent(&self, v: usize, u: usize) -> bool {
+        self.graph[v].contains(u)
+    }
+}
\ No newline at end of file

From 5ebfad0e4efc8a631f936c8ff759bd7fcf72605a Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 14:08:12 -0700
Subject: [PATCH 02/51] Implement subgraph and new loop in index search

---
 src/assembly.rs   | 39 ++++++++++++++++++++++++++++++---------
 src/reductions.rs | 14 ++++++++++----
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 79aa3f9a..75a4908a 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -38,6 +38,7 @@ use crate::{
     memoize::{Cache, MemoizeMode},
     molecule::Molecule,
     utils::connected_components_under_edges,
+    reductions::CompatGraph,
 };
 
 /// Parallelization strategy for the recursive search phase.
@@ -217,7 +218,9 @@ fn fragments(mol: &Molecule, state: &[BitSet], h1: &BitSet, h2: &BitSet) -> Opti
 #[allow(clippy::too_many_arguments)]
 pub fn recurse_index_search(
     mol: &Molecule,
-    matches: &[(BitSet, BitSet)],
+    matches: &Vec<(BitSet, BitSet)>,
+    graph: &CompatGraph,
+    subgraph: BitSet,
     removal_order: Vec<usize>,
     state: &[BitSet],
     state_index: usize,
@@ -248,7 +251,8 @@ pub fn recurse_index_search(
 
     // Define a closure that handles recursing to a new assembly state based on
     // the given (enumerated) pair of non-overlapping isomorphic subgraphs.
-    let recurse_on_match = |i: usize, h1: &BitSet, h2: &BitSet| {
+    let recurse_on_match = |i: usize| {
+        let (h1, h2) = &matches[i];
         if let Some(fragments) = fragments(mol, state, h1, h2) {
             // If using depth-one parallelism, all descendant states should be
             // computed serially.
@@ -258,10 +262,20 @@ pub fn recurse_index_search(
                 parallel_mode
             };
 
+            // TODO: should create a method for updating subgraph
+            let mut sub_clone = subgraph.clone();
+            let mut node = subgraph.iter().next().unwrap();
+            while node <= i {
+                sub_clone.remove(node);
+                node += 1;
+            }
+
             // Recurse using the remaining matches and updated fragments.
             let (child_index, child_states_searched) = recurse_index_search(
                 mol,
-                &matches[i + 1..],
+                matches,
+                graph,
+                sub_clone,
                 {
                     let mut clone = removal_order.clone();
                     clone.push(i);
@@ -286,15 +300,15 @@ pub fn recurse_index_search(
 
     // Use the iterator type corresponding to the specified parallelism mode.
     if parallel_mode == ParallelMode::None {
-        matches
+        subgraph
             .iter()
-            .enumerate()
-            .for_each(|(i, (h1, h2))| recurse_on_match(i, h1, h2));
+            .for_each(|i| recurse_on_match(i));
     } else {
-        matches
+        subgraph
+            .iter()
+            .collect::<Vec<usize>>()
             .par_iter()
-            .enumerate()
-            .for_each(|(i, (h1, h2))| recurse_on_match(i, h1, h2));
+            .for_each(|i | recurse_on_match(*i));
     }
 
     (
@@ -390,12 +404,19 @@ pub fn index_search(
     let mut init = BitSet::new();
     init.extend(mol.graph().edge_indices().map(|ix| ix.index()));
 
+    let mut subgraph = BitSet::with_capacity(matches.len());
+    for i in 0..matches.len() {
+        subgraph.insert(i);
+    }
+
     // Search for the shortest assembly pathway recursively.
     let edge_count = mol.graph().edge_count();
     let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
+        &CompatGraph::new(),
+        subgraph,
         Vec::new(),
         &[init],
         edge_count - 1,
diff --git a/src/reductions.rs b/src/reductions.rs
index 5ed0da68..6bca30b8 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -2,12 +2,18 @@ use bit_set::BitSet;
 
 pub struct CompatGraph {
     graph: Vec<BitSet>,
-    weights: Vec<usize>,
-    matches: Vec<(BitSet, BitSet)>
+    nodes: Vec<usize>,
 }
 
 impl CompatGraph {
-    pub fn new(init_matches: Vec<(BitSet, BitSet)>) -> Self {
+    pub fn new () -> Self {
+        Self {
+            graph: Vec::new(),
+            nodes: Vec::new(),
+        }
+    }
+
+    /*pub fn new(init_matches: Vec<(BitSet, BitSet)>) -> Self {
         let size = init_matches.len();
 
         // Initialize weights and empty graph
@@ -92,5 +98,5 @@ impl CompatGraph {
 
     pub fn are_adjacent(&self, v: usize, u: usize) -> bool {
         self.graph[v].contains(u)
-    }
+    }*/
 }
\ No newline at end of file

From 6cb9fe18d6f8c9e0dc4ffa1e8ba89adcff3d5034 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 15:10:12 -0700
Subject: [PATCH 03/51] Implement using CompatGraph to choose next match

---
 src/assembly.rs   | 22 +++++++++++-----------
 src/reductions.rs | 20 ++++----------------
 2 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 75a4908a..67a8fcfd 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -251,8 +251,8 @@ pub fn recurse_index_search(
 
     // Define a closure that handles recursing to a new assembly state based on
     // the given (enumerated) pair of non-overlapping isomorphic subgraphs.
-    let recurse_on_match = |i: usize| {
-        let (h1, h2) = &matches[i];
+    let recurse_on_match = |v: usize| {
+        let (h1, h2) = &matches[v];
         if let Some(fragments) = fragments(mol, state, h1, h2) {
             // If using depth-one parallelism, all descendant states should be
             // computed serially.
@@ -263,12 +263,12 @@ pub fn recurse_index_search(
             };
 
             // TODO: should create a method for updating subgraph
-            let mut sub_clone = subgraph.clone();
-            let mut node = subgraph.iter().next().unwrap();
-            while node <= i {
-                sub_clone.remove(node);
-                node += 1;
-            }
+            let mut sub_clone = BitSet::with_capacity(matches.len());
+            /*let mut node = subgraph.iter().next().unwrap();
+            for j in (i+1)..matches.len() {
+                sub_clone.insert(j);
+            }*/
+            sub_clone.intersect_with(&graph.forward_neighbors(v, &subgraph));
 
             // Recurse using the remaining matches and updated fragments.
             let (child_index, child_states_searched) = recurse_index_search(
@@ -302,13 +302,13 @@ pub fn recurse_index_search(
     if parallel_mode == ParallelMode::None {
         subgraph
             .iter()
-            .for_each(|i| recurse_on_match(i));
+            .for_each(|v| recurse_on_match(v));
     } else {
         subgraph
             .iter()
             .collect::<Vec<usize>>()
             .par_iter()
-            .for_each(|i | recurse_on_match(*i));
+            .for_each(|v | recurse_on_match(*v));
     }
 
     (
@@ -415,7 +415,7 @@ pub fn index_search(
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
-        &CompatGraph::new(),
+        &CompatGraph::new(&matches),
         subgraph,
         Vec::new(),
         &[init],
diff --git a/src/reductions.rs b/src/reductions.rs
index 6bca30b8..09862c05 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -2,26 +2,16 @@ use bit_set::BitSet;
 
 pub struct CompatGraph {
     graph: Vec<BitSet>,
-    nodes: Vec<usize>,
 }
 
 impl CompatGraph {
-    pub fn new () -> Self {
-        Self {
-            graph: Vec::new(),
-            nodes: Vec::new(),
-        }
-    }
-
-    /*pub fn new(init_matches: Vec<(BitSet, BitSet)>) -> Self {
+    pub fn new(init_matches: &Vec<(BitSet, BitSet)>) -> Self {
         let size = init_matches.len();
 
         // Initialize weights and empty graph
         let mut init_graph: Vec<BitSet> = Vec::with_capacity(size);
-        let mut init_weights: Vec<usize> = Vec::with_capacity(size);
         for m in init_matches.iter() {
             init_graph.push(BitSet::with_capacity(size));
-            init_weights.push(m.0.len() - 1);
         }
 
         // Populate graph
@@ -45,18 +35,16 @@ impl CompatGraph {
 
         Self {
             graph: init_graph,
-            weights: init_weights,
-            matches: init_matches,
         }
     }
 
-    pub fn weight(&self, v: usize) -> usize {
+    /*pub fn weight(&self, v: usize) -> usize {
         self.weights[v]
     }
 
     pub fn matches(&self, v: usize) -> &(BitSet, BitSet) {
         &self.matches[v]
-    }
+    }*/
 
     pub fn len(&self) -> usize {
         self.graph.len()
@@ -98,5 +86,5 @@ impl CompatGraph {
 
     pub fn are_adjacent(&self, v: usize, u: usize) -> bool {
         self.graph[v].contains(u)
-    }*/
+    }
 }
\ No newline at end of file

From 6ccb952d764dd157b4ae445342e74d90cdff0d13 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 15:24:20 -0700
Subject: [PATCH 04/51] Add CLI argument for clique

---
 src/assembly.rs   | 32 +++++++++++++++++++++++---------
 src/main.rs       |  4 ++++
 src/python.rs     |  2 ++
 src/reductions.rs |  2 +-
 4 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 67a8fcfd..fde929aa 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -28,7 +28,7 @@ use std::{
 
 use bit_set::BitSet;
 use clap::ValueEnum;
-use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
+use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 
 use crate::{
     bounds::{bound_exceeded, Bound},
@@ -219,7 +219,7 @@ fn fragments(mol: &Molecule, state: &[BitSet], h1: &BitSet, h2: &BitSet) -> Opti
 pub fn recurse_index_search(
     mol: &Molecule,
     matches: &Vec<(BitSet, BitSet)>,
-    graph: &CompatGraph,
+    graph: &Option<CompatGraph>,
     subgraph: BitSet,
     removal_order: Vec<usize>,
     state: &[BitSet],
@@ -262,13 +262,16 @@ pub fn recurse_index_search(
                 parallel_mode
             };
 
-            // TODO: should create a method for updating subgraph
+            // Update subgraph
             let mut sub_clone = BitSet::with_capacity(matches.len());
-            /*let mut node = subgraph.iter().next().unwrap();
-            for j in (i+1)..matches.len() {
-                sub_clone.insert(j);
-            }*/
-            sub_clone.intersect_with(&graph.forward_neighbors(v, &subgraph));
+            if let Some(g) = graph {
+                sub_clone.intersect_with(&g.forward_neighbors(v, &subgraph));
+            }
+            else {
+                for j in (v+1)..matches.len() {
+                    sub_clone.insert(j);
+                }
+            }
 
             // Recurse using the remaining matches and updated fragments.
             let (child_index, child_states_searched) = recurse_index_search(
@@ -388,6 +391,7 @@ pub fn index_search(
     memoize_mode: MemoizeMode,
     kernel_mode: KernelMode,
     bounds: &[Bound],
+    clique: bool,
 ) -> (u32, u32, usize) {
     // Catch not-yet-implemented modes.
     if kernel_mode != KernelMode::None {
@@ -400,6 +404,15 @@ pub fn index_search(
     // Create memoization cache.
     let mut cache = Cache::new(memoize_mode, canonize_mode);
 
+    let graph = {
+        if clique {
+            Some(CompatGraph::new(&matches))
+        }
+        else {
+            None
+        }
+    };
+
     // Initialize the first fragment as the entire graph.
     let mut init = BitSet::new();
     init.extend(mol.graph().edge_indices().map(|ix| ix.index()));
@@ -415,7 +428,7 @@ pub fn index_search(
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
-        &CompatGraph::new(&matches),
+        &graph,
         subgraph,
         Vec::new(),
         &[init],
@@ -458,6 +471,7 @@ pub fn index(mol: &Molecule) -> u32 {
         MemoizeMode::CanonIndex,
         KernelMode::None,
         &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags],
+        false,
     )
     .0
 }
diff --git a/src/main.rs b/src/main.rs
index 6df6f4e1..9fec220e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -55,6 +55,9 @@ struct Cli {
     /// Strategy for performing graph kernelization during the search phase.
     #[arg(long, value_enum, default_value_t = KernelMode::None)]
     kernel: KernelMode,
+
+    #[arg(long)]
+    clique: bool,
 }
 
 #[derive(Args, Debug)]
@@ -116,6 +119,7 @@ fn main() -> Result<()> {
         cli.memoize,
         cli.kernel,
         boundlist,
+        cli.clique,
     );
 
     // Print final output, depending on --verbose.
diff --git a/src/python.rs b/src/python.rs
index df74f5f2..e224e0c0 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -470,6 +470,8 @@ pub fn _index_search(
         memoize_mode,
         kernel_mode,
         &boundlist,
+        // TODO: implement clique CLI param for python interface
+        false,
     ))
 }
 
diff --git a/src/reductions.rs b/src/reductions.rs
index 09862c05..93519e74 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -10,7 +10,7 @@ impl CompatGraph {
 
         // Initialize weights and empty graph
         let mut init_graph: Vec<BitSet> = Vec::with_capacity(size);
-        for m in init_matches.iter() {
+        for _ in 0..init_matches.len() {
             init_graph.push(BitSet::with_capacity(size));
         }
 

From f8f043774779735289a86e444bd82fc49e8dd2de Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 15:29:34 -0700
Subject: [PATCH 05/51] Fix search with clique

---
 src/assembly.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index fde929aa..e3743f23 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -263,11 +263,13 @@ pub fn recurse_index_search(
             };
 
             // Update subgraph
-            let mut sub_clone = BitSet::with_capacity(matches.len());
+            let mut sub_clone;
             if let Some(g) = graph {
+                sub_clone = subgraph.clone();
                 sub_clone.intersect_with(&g.forward_neighbors(v, &subgraph));
             }
             else {
+                sub_clone = BitSet::with_capacity(matches.len());
                 for j in (v+1)..matches.len() {
                     sub_clone.insert(j);
                 }

From 39e91db5b31fe30a634e34bef6eea4f996ff2869 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 15:48:42 -0700
Subject: [PATCH 06/51] Add clique budget bound

---
 src/assembly.rs |  2 ++
 src/bounds.rs   | 73 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index e3743f23..1022fdb9 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -234,7 +234,9 @@ pub fn recurse_index_search(
     // enabled and this assembly state is preempted by the cached state, halt.
     if bound_exceeded(
         mol,
+        matches,
         state,
+        &subgraph,
         state_index,
         best_index.load(Relaxed),
         largest_remove,
diff --git a/src/bounds.rs b/src/bounds.rs
index 5cff5ec5..296ae4cf 100644
--- a/src/bounds.rs
+++ b/src/bounds.rs
@@ -14,7 +14,10 @@
 use bit_set::BitSet;
 use clap::ValueEnum;
 
-use crate::molecule::{Bond, Element, Molecule};
+use crate::{
+    molecule::{Bond, Element, Molecule},
+    reductions::CompatGraph,
+};
 
 /// Type of upper bound on the "savings" possible from an assembly state.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
@@ -66,7 +69,9 @@ struct EdgeType {
 /// Returns `true` iff any of the given bounds would prune this assembly state.
 pub fn bound_exceeded(
     mol: &Molecule,
+    matches: &Vec<(BitSet, BitSet)>,
     fragments: &[BitSet],
+    subgraph: &BitSet,
     state_index: usize,
     best_index: usize,
     largest_remove: usize,
@@ -82,6 +87,7 @@ pub fn bound_exceeded(
             Bound::VecSmallFrags => {
                 state_index - vec_small_frags_bound(fragments, largest_remove, mol) >= best_index
             }
+            Bound::CliqueBudget => best_index + clique_budget_bound(matches, subgraph, fragments) <= state_index,
             _ => {
                 panic!("One of the chosen bounds is not implemented yet!")
             }
@@ -239,3 +245,68 @@ fn vec_small_frags_bound(fragments: &[BitSet], m: usize, mol: &Molecule) -> usiz
     s - (z + size_two_types.len() + size_two_fragments.len())
         - ((sl - z) as f32 / m as f32).ceil() as usize
 }
+
+pub fn clique_budget_bound(matches: &Vec<(BitSet, BitSet)>, subgraph: &BitSet, fragments: &[BitSet]) -> usize {
+    let total_bonds = fragments.iter().map(|x| x.len()).sum::<usize>();
+    let mut bound = 0;
+    let sizes = {
+        let mut vec = vec![];
+        let mut prev = 0;
+        for s in subgraph.iter().map(|v| matches[v].0.len()) {
+            if s != prev {
+                vec.push(s);
+                prev = s;
+            }
+        }
+
+        vec
+    };
+    
+    for i in sizes {
+        let mut bound_temp = 0;
+        let mut has_bonds = fragments.len();
+        let mut num_bonds: Vec<usize> = fragments.iter().map(|x| x.len()).collect();
+        let mut smallest_remove = i;
+
+        for v in subgraph.iter() {
+            if has_bonds == 0 {
+                break;
+            }
+            if matches[v].0.len() > i {
+                continue;
+            }
+
+
+            let dup = &matches[v];
+            let bond = dup.1.iter().next().unwrap();
+            let mut j = 0;
+            while !fragments[j].contains(bond) {
+                j += 1;
+            }
+
+            if num_bonds[j] > 0 {
+                let remove = std::cmp::min(dup.0.len(), num_bonds[j]);
+                bound_temp += 1;
+                num_bonds[j] -= remove;
+                smallest_remove = std::cmp::min(smallest_remove, remove);
+
+                if num_bonds[j] == 0 {
+                    has_bonds -= 1;
+                }
+            }
+        }
+
+        let leftover = num_bonds.iter().sum::<usize>();
+        let log = {
+            if leftover > 0 {
+                0
+            }
+            else {
+                (smallest_remove as f32).log2().ceil() as usize
+            }
+        };
+        bound = std::cmp::max(bound, total_bonds - bound_temp - leftover - log);
+    }
+
+    bound
+}

From a981ef41f86feecd9b8a364b4359ebf9f6ba4014 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 16:01:22 -0700
Subject: [PATCH 07/51] Add default clique bounds. Only allow clique bounds
 with clique

---
 src/main.rs | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 9fec220e..c71f7ede 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -96,20 +96,29 @@ fn main() -> Result<()> {
     }
 
     // Handle bounding strategy CLI arguments.
-    let boundlist: &[Bound] = match cli.boundsgroup {
-        // By default, use a combination of the integer and vector bounds.
-        None => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags],
+    let boundlist: &[Bound] = match (cli.boundsgroup, cli.clique) {
+        // If clique not enable, default to a combination of the integer and vector bounds.
+        (None, false) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags],
+        // If clique is enabled, default to combination of int, vec, and clique bounds.
+        (None, true) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags, Bound::CliqueBudget],
         // If --no-bounds is set, do not use any bounds.
-        Some(BoundsGroup {
+        (Some(BoundsGroup {
             no_bounds: true, ..
-        }) => &[],
+        }), _) => &[],
         // Otherwise, use the bounds that were specified.
-        Some(BoundsGroup {
+        (Some(BoundsGroup {
             no_bounds: false,
             bounds,
-        }) => &bounds.clone(),
+        }), _) => &bounds.clone(),
     };
 
+    let clique_bounds = vec![Bound::CliqueBudget, Bound::CoverNoSort, Bound::CoverSort];
+    for bound in clique_bounds {
+        if !cli.clique && boundlist.contains(&bound) {
+            panic!("Bound {:?} can not be used without clique enabled", bound);
+        }
+    }
+
     // Call index calculation with all the various options.
     let (index, num_matches, states_searched) = index_search(
         &mol,

From 2bac102808a4771678143e36e08a18daae460fb2 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 16:13:18 -0700
Subject: [PATCH 08/51] Add cover bound

---
 src/assembly.rs |  1 +
 src/bounds.rs   | 89 +++++++++++++++++++++++++++++++++++++++++++++++--
 src/main.rs     |  2 +-
 3 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 1022fdb9..07b8f4e6 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -235,6 +235,7 @@ pub fn recurse_index_search(
     if bound_exceeded(
         mol,
         matches,
+        graph,
         state,
         &subgraph,
         state_index,
diff --git a/src/bounds.rs b/src/bounds.rs
index 296ae4cf..73307cc4 100644
--- a/src/bounds.rs
+++ b/src/bounds.rs
@@ -70,6 +70,7 @@ struct EdgeType {
 pub fn bound_exceeded(
     mol: &Molecule,
     matches: &Vec<(BitSet, BitSet)>,
+    graph: &Option<CompatGraph>,
     fragments: &[BitSet],
     subgraph: &BitSet,
     state_index: usize,
@@ -88,8 +89,21 @@ pub fn bound_exceeded(
                 state_index - vec_small_frags_bound(fragments, largest_remove, mol) >= best_index
             }
             Bound::CliqueBudget => best_index + clique_budget_bound(matches, subgraph, fragments) <= state_index,
-            _ => {
-                panic!("One of the chosen bounds is not implemented yet!")
+            Bound::CoverNoSort => {
+                if let Some(g) = graph {
+                    best_index + cover_bound(matches, g, subgraph, false) <= state_index
+                }
+                else {
+                    false
+                }
+            }
+            Bound::CoverSort => {
+                if let Some(g) = graph {
+                    best_index + cover_bound(matches, g, subgraph, true) <= state_index
+                }
+                else {
+                    false
+                }
             }
         };
         if exceeds {
@@ -310,3 +324,74 @@ pub fn clique_budget_bound(matches: &Vec<(BitSet, BitSet)>, subgraph: &BitSet, f
 
     bound
 }
+
+pub fn cover_bound(matches: &Vec<(BitSet, BitSet)>, graph: &CompatGraph, subgraph: &BitSet, sort: bool) -> usize {
+    // Sort vertices
+    if sort {
+        let mut vertices: Vec<(usize, usize)> = Vec::with_capacity(subgraph.len());
+        for v in subgraph {
+            vertices.push((v, graph.degree(v, subgraph)));
+        }   
+        vertices.sort_by(|a, b| b.1.cmp(&a.1));
+        cover_bound_helper(matches, graph, subgraph, vertices.iter().map(|(v, _)| *v))
+    }
+    else {
+        let vertices = (0..graph.len()).rev().filter(|v| subgraph.contains(*v));
+        cover_bound_helper(matches, graph, subgraph, vertices)
+    }
+}
+
+fn cover_bound_helper(matches: &Vec<(BitSet, BitSet)>, graph: &CompatGraph, subgraph: &BitSet, iter: impl Iterator<Item = usize>) -> usize {
+    let mut colors: Vec<Option<Vec<usize>>> = vec![None; graph.len()];
+    let mut col_weights = vec![];
+    let mut num_col = 0;
+
+    for v in iter {
+        let mut v_col = Vec::new();
+        let mut used = vec![0; num_col];
+
+        // Find colors used in neighborhood of v
+        for u in subgraph.intersection(graph.compatible_with(v)) {
+            let Some(u_col) = &colors[u] else {
+                continue;
+            };
+
+            for c in u_col {
+                used[*c] = 1;
+            }
+        }
+
+        let mut total_weight = 0;
+        let v_val = matches[v].0.len() - 1;
+        // Find colors to give to v
+        for c in 0..num_col {
+            if used[c] == 1 {
+                continue;
+            }
+
+            v_col.push(c);
+            total_weight += col_weights[c];
+
+            if total_weight >= v_val {
+                break;
+            }
+        }
+
+        if total_weight == 0 {
+            v_col.push(num_col);
+            col_weights.push(v_val);
+            num_col += 1
+        }
+        else if total_weight < v_val {
+            let mut k = num_col - 1;
+            while used[k] == 1 {
+                k -= 1
+            }
+            col_weights[k] += v_val - total_weight
+        }
+
+        colors[v] = Some(v_col);
+    };
+
+    col_weights.iter().sum()
+}
diff --git a/src/main.rs b/src/main.rs
index c71f7ede..46881e12 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -100,7 +100,7 @@ fn main() -> Result<()> {
         // If clique not enable, default to a combination of the integer and vector bounds.
         (None, false) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags],
         // If clique is enabled, default to combination of int, vec, and clique bounds.
-        (None, true) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags, Bound::CliqueBudget],
+        (None, true) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags, Bound::CliqueBudget, Bound::CoverNoSort],
         // If --no-bounds is set, do not use any bounds.
         (Some(BoundsGroup {
             no_bounds: true, ..

From fe7b47ac52a2b47234aa98e8c0be8301cab541bb Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 16:35:23 -0700
Subject: [PATCH 09/51] Add deletion kernel. Not fully implemented yet

---
 src/kernels.rs    | 40 ++++++++++++++++++++++++++++++++++++++++
 src/reductions.rs |  8 --------
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/src/kernels.rs b/src/kernels.rs
index 5b1f1292..0e65e994 100644
--- a/src/kernels.rs
+++ b/src/kernels.rs
@@ -12,6 +12,8 @@
 //! the equivalent problem of weighted independent set.)
 
 use clap::ValueEnum;
+use bit_set::BitSet;
+use crate::reductions::CompatGraph;
 
 /// Graph kernelization strategy when searching using the clique reduction.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
@@ -27,3 +29,41 @@ pub enum KernelMode {
     /// Apply kernels after every fragmentation step.
     Always,
 }
+
+pub fn deletion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, mut subgraph: BitSet) -> BitSet{
+    let subgraph_copy = subgraph.clone();
+
+    for v in subgraph_copy.iter() {
+        let v_val = matches[v].0.len();
+        let neighbors_v = g.neighbors(v, &subgraph);
+
+        let Some(w1) = neighbors_v.iter().next() else {
+            continue;
+        };
+        let Some(w2) = neighbors_v.iter().last() else {
+            continue;
+        };
+
+        let mut s = subgraph.clone();
+        s.intersect_with(&g.compatible_with(w1));
+        for u in s.intersection(&&g.compatible_with(w2)) {
+            if g.are_adjacent(v, u) || v == u {
+                continue;
+            }
+
+            let u_val = matches[u].0.len();
+            if v_val > u_val {
+                continue;
+            }
+
+            let neighbors_u = g.neighbors(u, &subgraph);
+
+            if neighbors_v.is_subset(&neighbors_u) {
+                subgraph.remove(v);
+                break;
+            }
+        }
+    }
+
+    subgraph
+}
\ No newline at end of file
diff --git a/src/reductions.rs b/src/reductions.rs
index 93519e74..184b3df0 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -38,14 +38,6 @@ impl CompatGraph {
         }
     }
 
-    /*pub fn weight(&self, v: usize) -> usize {
-        self.weights[v]
-    }
-
-    pub fn matches(&self, v: usize) -> &(BitSet, BitSet) {
-        &self.matches[v]
-    }*/
-
     pub fn len(&self) -> usize {
         self.graph.len()
     }

From e94d09ced1b0a89396dcf6f7ab7c2a68fdc321ad Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 16:57:05 -0700
Subject: [PATCH 10/51] Add support for kernel modes. Implement deletion kernel

---
 src/assembly.rs | 26 +++++++++++++++++++++-----
 src/main.rs     |  3 +++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 07b8f4e6..c796757c 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -34,7 +34,7 @@ use crate::{
     bounds::{bound_exceeded, Bound},
     canonize::{canonize, CanonizeMode, Labeling},
     enumerate::{enumerate_subgraphs, EnumerateMode},
-    kernels::KernelMode,
+    kernels::{KernelMode, deletion_kernel},
     memoize::{Cache, MemoizeMode},
     molecule::Molecule,
     utils::connected_components_under_edges,
@@ -220,7 +220,7 @@ pub fn recurse_index_search(
     mol: &Molecule,
     matches: &Vec<(BitSet, BitSet)>,
     graph: &Option<CompatGraph>,
-    subgraph: BitSet,
+    mut subgraph: BitSet,
     removal_order: Vec<usize>,
     state: &[BitSet],
     state_index: usize,
@@ -229,6 +229,7 @@ pub fn recurse_index_search(
     bounds: &[Bound],
     cache: &mut Cache,
     parallel_mode: ParallelMode,
+    kernel_mode: KernelMode,
 ) -> (usize, usize) {
     // If any bounds would prune this assembly state or if memoization is
     // enabled and this assembly state is preempted by the cached state, halt.
@@ -247,6 +248,12 @@ pub fn recurse_index_search(
         return (state_index, 1);
     }
 
+    // Apply kernels
+    if kernel_mode != KernelMode::None {
+        let g = graph.as_ref().unwrap();
+        subgraph = deletion_kernel(matches, g, subgraph);
+    }
+
     // Keep track of the best assembly index found in any of this assembly
     // state's children and the number of states searched, including this one.
     let best_child_index = AtomicUsize::from(state_index);
@@ -265,6 +272,16 @@ pub fn recurse_index_search(
                 parallel_mode
             };
 
+            // If kernelizing once, do not kernelize again. If kernelizing at depth-one,
+            // kernelize once more at the beginning of each descendent.
+            let new_kernel = if kernel_mode == KernelMode::Once {
+                KernelMode::None
+            } else if kernel_mode == KernelMode::DepthOne {
+                KernelMode::Once
+            } else {
+                kernel_mode
+            };
+
             // Update subgraph
             let mut sub_clone;
             if let Some(g) = graph {
@@ -296,6 +313,7 @@ pub fn recurse_index_search(
                 bounds,
                 &mut cache.clone(),
                 new_parallel,
+                new_kernel,
             );
 
             // Update the best assembly indices (across children states and
@@ -399,9 +417,6 @@ pub fn index_search(
     clique: bool,
 ) -> (u32, u32, usize) {
     // Catch not-yet-implemented modes.
-    if kernel_mode != KernelMode::None {
-        panic!("The chosen --kernel mode is not implemented yet!")
-    }
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
     let matches = matches(mol, enumerate_mode, canonize_mode);
@@ -443,6 +458,7 @@ pub fn index_search(
         bounds,
         &mut cache,
         parallel_mode,
+        kernel_mode,
     );
 
     (index as u32, matches.len() as u32, states_searched)
diff --git a/src/main.rs b/src/main.rs
index 46881e12..fa81b6aa 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -117,6 +117,9 @@ fn main() -> Result<()> {
         if !cli.clique && boundlist.contains(&bound) {
             panic!("Bound {:?} can not be used without clique enabled", bound);
         }
+        if !cli.clique && cli.kernel != KernelMode::None {
+            panic!("Cannot use kernels without clique enabled");
+        }
     }
 
     // Call index calculation with all the various options.

From 51c961dd1219dc66fc8fb8036ff5757f1e910083 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 28 Jul 2025 18:04:06 -0700
Subject: [PATCH 11/51] Add inclusion clone for serial search

---
 src/assembly.rs | 43 +++++++++++++++++++++++++++++++++++--------
 src/kernels.rs  | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index c796757c..b680561f 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -19,11 +19,10 @@
 //! ```
 
 use std::{
-    collections::HashMap,
-    sync::{
+    collections::HashMap, ops::ControlFlow, sync::{
         atomic::{AtomicUsize, Ordering::Relaxed},
         Arc,
-    },
+    }
 };
 
 use bit_set::BitSet;
@@ -34,7 +33,7 @@ use crate::{
     bounds::{bound_exceeded, Bound},
     canonize::{canonize, CanonizeMode, Labeling},
     enumerate::{enumerate_subgraphs, EnumerateMode},
-    kernels::{KernelMode, deletion_kernel},
+    kernels::{KernelMode, deletion_kernel, inclusion_kernel},
     memoize::{Cache, MemoizeMode},
     molecule::Molecule,
     utils::connected_components_under_edges,
@@ -221,6 +220,7 @@ pub fn recurse_index_search(
     matches: &Vec<(BitSet, BitSet)>,
     graph: &Option<CompatGraph>,
     mut subgraph: BitSet,
+    must_include: &Option<Vec<usize>>,
     removal_order: Vec<usize>,
     state: &[BitSet],
     state_index: usize,
@@ -249,9 +249,17 @@ pub fn recurse_index_search(
     }
 
     // Apply kernels
+    let mut must_include_clone = None;
     if kernel_mode != KernelMode::None {
         let g = graph.as_ref().unwrap();
+
+        // Deletion kernel
         subgraph = deletion_kernel(matches, g, subgraph);
+        
+        //Inclusion kernel
+        let mut include_temp = must_include.as_ref().unwrap().clone();
+        include_temp.append(&mut inclusion_kernel(matches, g, &subgraph));
+        must_include_clone = Some(include_temp);
     }
 
     // Keep track of the best assembly index found in any of this assembly
@@ -301,6 +309,7 @@ pub fn recurse_index_search(
                 matches,
                 graph,
                 sub_clone,
+                &must_include_clone,
                 {
                     let mut clone = removal_order.clone();
                     clone.push(i);
@@ -326,15 +335,23 @@ pub fn recurse_index_search(
 
     // Use the iterator type corresponding to the specified parallelism mode.
     if parallel_mode == ParallelMode::None {
-        subgraph
+        let _ = subgraph
             .iter()
-            .for_each(|v| recurse_on_match(v));
+            .try_for_each(|v| {
+                recurse_on_match(v);
+                if let Some(vec) = &must_include_clone {
+                    if vec.contains(&v) {
+                        return ControlFlow::Break(());
+                    }
+                }
+                ControlFlow::Continue(())
+            });
     } else {
-        subgraph
+        let _ = subgraph
             .iter()
             .collect::<Vec<usize>>()
             .par_iter()
-            .for_each(|v | recurse_on_match(*v));
+            .for_each(|v| recurse_on_match(*v));
     }
 
     (
@@ -442,6 +459,15 @@ pub fn index_search(
         subgraph.insert(i);
     }
 
+    let must_include = {
+        if kernel_mode != KernelMode::None {
+            Some(Vec::new())
+        }
+        else {
+            None
+        }
+    };
+
     // Search for the shortest assembly pathway recursively.
     let edge_count = mol.graph().edge_count();
     let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
@@ -450,6 +476,7 @@ pub fn index_search(
         &matches,
         &graph,
         subgraph,
+        &must_include,
         Vec::new(),
         &[init],
         edge_count - 1,
diff --git a/src/kernels.rs b/src/kernels.rs
index 0e65e994..d083ef5e 100644
--- a/src/kernels.rs
+++ b/src/kernels.rs
@@ -66,4 +66,41 @@ pub fn deletion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, mut sub
     }
 
     subgraph
+}
+
+pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgraph: &BitSet) -> Vec<usize> {
+    let mut kernel = Vec::new();
+    let tot = subgraph.iter().map(|v| matches[v].0.len() - 1).sum::<usize>();
+
+    'outer: for v in subgraph {
+        let v_val = matches[v].0.len() - 1;
+        let neighbors_val = g.neighbors(v, subgraph).iter().map(|u| matches[u].0.len() - 1).sum::<usize>();
+        if v_val >= tot - neighbors_val - v_val { 
+            kernel.push(v);
+            continue;
+        }
+
+        let mut neighbors: Vec<usize> = vec![];
+
+        for u in subgraph.difference(&g.compatible_with(v)) {
+            if u == v {
+                continue;
+            }
+            if matches[u].0.len() - 1 > v_val {
+                continue 'outer;
+            }
+
+            for w in neighbors.iter() {
+                if g.are_adjacent(u, *w) {
+                    continue 'outer;
+                }   
+            }
+
+            neighbors.push(u);
+        }
+
+        kernel.push(v);
+    }
+
+    kernel
 }
\ No newline at end of file

From 6aec6a9082e91c512f2141db75e38bb7e9da2bd7 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 29 Jul 2025 13:17:14 -0700
Subject: [PATCH 12/51] Change must_include_kernel return to usize

---
 src/assembly.rs | 33 ++++++---------------------------
 src/kernels.rs  |  9 +++++++--
 2 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index b680561f..5c9a0268 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -19,7 +19,7 @@
 //! ```
 
 use std::{
-    collections::HashMap, ops::ControlFlow, sync::{
+    collections::HashMap, sync::{
         atomic::{AtomicUsize, Ordering::Relaxed},
         Arc,
     }
@@ -220,7 +220,6 @@ pub fn recurse_index_search(
     matches: &Vec<(BitSet, BitSet)>,
     graph: &Option<CompatGraph>,
     mut subgraph: BitSet,
-    must_include: &Option<Vec<usize>>,
     removal_order: Vec<usize>,
     state: &[BitSet],
     state_index: usize,
@@ -249,7 +248,7 @@ pub fn recurse_index_search(
     }
 
     // Apply kernels
-    let mut must_include_clone = None;
+    let mut must_include = matches.len();
     if kernel_mode != KernelMode::None {
         let g = graph.as_ref().unwrap();
 
@@ -257,9 +256,7 @@ pub fn recurse_index_search(
         subgraph = deletion_kernel(matches, g, subgraph);
         
         //Inclusion kernel
-        let mut include_temp = must_include.as_ref().unwrap().clone();
-        include_temp.append(&mut inclusion_kernel(matches, g, &subgraph));
-        must_include_clone = Some(include_temp);
+        must_include = inclusion_kernel(matches, g, &subgraph);
     }
 
     // Keep track of the best assembly index found in any of this assembly
@@ -309,7 +306,6 @@ pub fn recurse_index_search(
                 matches,
                 graph,
                 sub_clone,
-                &must_include_clone,
                 {
                     let mut clone = removal_order.clone();
                     clone.push(i);
@@ -335,17 +331,10 @@ pub fn recurse_index_search(
 
     // Use the iterator type corresponding to the specified parallelism mode.
     if parallel_mode == ParallelMode::None {
-        let _ = subgraph
+        subgraph
             .iter()
-            .try_for_each(|v| {
-                recurse_on_match(v);
-                if let Some(vec) = &must_include_clone {
-                    if vec.contains(&v) {
-                        return ControlFlow::Break(());
-                    }
-                }
-                ControlFlow::Continue(())
-            });
+            .filter(|v| *v <= must_include)
+            .for_each(|v| recurse_on_match(v));
     } else {
         let _ = subgraph
             .iter()
@@ -459,15 +448,6 @@ pub fn index_search(
         subgraph.insert(i);
     }
 
-    let must_include = {
-        if kernel_mode != KernelMode::None {
-            Some(Vec::new())
-        }
-        else {
-            None
-        }
-    };
-
     // Search for the shortest assembly pathway recursively.
     let edge_count = mol.graph().edge_count();
     let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
@@ -476,7 +456,6 @@ pub fn index_search(
         &matches,
         &graph,
         subgraph,
-        &must_include,
         Vec::new(),
         &[init],
         edge_count - 1,
diff --git a/src/kernels.rs b/src/kernels.rs
index d083ef5e..4b8d99d4 100644
--- a/src/kernels.rs
+++ b/src/kernels.rs
@@ -68,7 +68,7 @@ pub fn deletion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, mut sub
     subgraph
 }
 
-pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgraph: &BitSet) -> Vec<usize> {
+pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgraph: &BitSet) -> usize {
     let mut kernel = Vec::new();
     let tot = subgraph.iter().map(|v| matches[v].0.len() - 1).sum::<usize>();
 
@@ -102,5 +102,10 @@ pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgra
         kernel.push(v);
     }
 
-    kernel
+    if let Some(x) = kernel.iter().min() {
+        *x
+    }
+    else {
+        matches.len()
+    }
 }
\ No newline at end of file

From 82c6701752a7fc4c8208505f2a1ab3df3ff303cd Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 29 Jul 2025 13:29:58 -0700
Subject: [PATCH 13/51] Implement inclusion kernel for parallel modes

---
 src/assembly.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/assembly.rs b/src/assembly.rs
index 5c9a0268..ee42483b 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -338,6 +338,7 @@ pub fn recurse_index_search(
     } else {
         let _ = subgraph
             .iter()
+            .filter(|v| *v <= must_include)
             .collect::<Vec<usize>>()
             .par_iter()
             .for_each(|v| recurse_on_match(*v));

From a4684138749ed8d66e5f231ab647323d6f96d427 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 29 Jul 2025 13:34:41 -0700
Subject: [PATCH 14/51] Optimize largest_remove computation

---
 src/assembly.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index ee42483b..c3be6a40 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -224,12 +224,20 @@ pub fn recurse_index_search(
     state: &[BitSet],
     state_index: usize,
     best_index: Arc<AtomicUsize>,
-    largest_remove: usize,
     bounds: &[Bound],
     cache: &mut Cache,
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
+    let largest_remove = {
+        if let Some(v) = subgraph.iter().next() {
+            matches[v].0.len()
+        }
+        else {
+            return (state_index, 1);
+        }
+    };
+
     // If any bounds would prune this assembly state or if memoization is
     // enabled and this assembly state is preempted by the cached state, halt.
     if bound_exceeded(
@@ -314,7 +322,6 @@ pub fn recurse_index_search(
                 &fragments,
                 state_index - h1.len() + 1,
                 best_index.clone(),
-                h1.len(),
                 bounds,
                 &mut cache.clone(),
                 new_parallel,
@@ -461,7 +468,6 @@ pub fn index_search(
         &[init],
         edge_count - 1,
         best_index,
-        edge_count,
         bounds,
         &mut cache,
         parallel_mode,

From 0f45f5aa599d804e4620ca80ddfd6aa2f04bfea8 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 29 Jul 2025 14:50:07 -0700
Subject: [PATCH 15/51] Add some comments and documentation

---
 src/assembly.rs   | 15 ++++++++++++---
 src/kernels.rs    | 10 +++++++++-
 src/reductions.rs | 26 ++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index c3be6a40..94c3e317 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -198,16 +198,17 @@ fn fragments(mol: &Molecule, state: &[BitSet], h1: &BitSet, h2: &BitSet) -> Opti
 /// Inputs:
 /// - `mol`: The molecule whose assembly index is being calculated.
 /// - `matches`: The remaining non-overlapping isomorphic subgraph pairs.
+/// - `graph`: If clique is enabled, the graph of compatible isomorphic subgraph paris.
+/// - `subgraph`: A bitset with length of matches. Has a 1 for every match to be searched in this state.
 /// - `removal_order`: TODO
 /// - `state`: The current assembly state, i.e., a list of fragments.
 /// - `state_index`: This assembly state's upper bound on the assembly index,
 ///   i.e., edges(mol) - 1 - [edges(subgraphs removed) - #(subgraphs removed)].
 /// - `best_index`: The smallest assembly index for all assembly states so far.
-/// - `largest_remove`: An upper bound on the size of fragments that can be
-///   removed from this or any descendant assembly state.
 /// - `bounds`: The list of bounding strategies to apply.
 /// - `cache`: TODO
 /// - `parallel_mode`: The parallelism mode for this state's match iteration.
+/// - `kernel_mode`: The kernelization mode for this state.
 ///
 /// Returns, from this assembly state and any of its descendents:
 /// - `usize`: An updated upper bound on the assembly index. (Note: If this
@@ -229,6 +230,8 @@ pub fn recurse_index_search(
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
+    // An upper bound on the size of fragments that can be
+    // removed from this or any descendant assembly state.
     let largest_remove = {
         if let Some(v) = subgraph.iter().next() {
             matches[v].0.len()
@@ -261,9 +264,11 @@ pub fn recurse_index_search(
         let g = graph.as_ref().unwrap();
 
         // Deletion kernel
+        // Returns a subgraph without nodes that never occur in an optimal solution.
         subgraph = deletion_kernel(matches, g, subgraph);
         
-        //Inclusion kernel
+        // Inclusion kernel.
+        // must_include is the first match in the matches list that will be included in an optimal solution.
         must_include = inclusion_kernel(matches, g, &subgraph);
     }
 
@@ -296,6 +301,8 @@ pub fn recurse_index_search(
             };
 
             // Update subgraph
+            // If using CompatGraph, only consider the neighborhood of the node removed.
+            // Otherwise, only consider the matches that occur after the removed match in the list.
             let mut sub_clone;
             if let Some(g) = graph {
                 sub_clone = subgraph.clone();
@@ -337,6 +344,8 @@ pub fn recurse_index_search(
     };
 
     // Use the iterator type corresponding to the specified parallelism mode.
+    // Only search on nodes with v <= must_include since any searches after that would
+    // not use must_include.
     if parallel_mode == ParallelMode::None {
         subgraph
             .iter()
diff --git a/src/kernels.rs b/src/kernels.rs
index 4b8d99d4..0351bf5d 100644
--- a/src/kernels.rs
+++ b/src/kernels.rs
@@ -30,6 +30,8 @@ pub enum KernelMode {
     Always,
 }
 
+/// Takes a subgraph as input and removes all nodes that will not be part of any maximum weight clique.
+/// Uses the strategy of domination as described in Lamm et al.
 pub fn deletion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, mut subgraph: BitSet) -> BitSet{
     let subgraph_copy = subgraph.clone();
 
@@ -68,20 +70,26 @@ pub fn deletion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, mut sub
     subgraph
 }
 
+/// Takes a subgraph as input and returns the first vertex that will be included in some maximum weight clique.
+/// Uses the strategies of neighborhood removal and isolated vertex removal from Lamm et al.
 pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgraph: &BitSet) -> usize {
     let mut kernel = Vec::new();
     let tot = subgraph.iter().map(|v| matches[v].0.len() - 1).sum::<usize>();
 
     'outer: for v in subgraph {
         let v_val = matches[v].0.len() - 1;
+
+        // Neighborhood removal
         let neighbors_val = g.neighbors(v, subgraph).iter().map(|u| matches[u].0.len() - 1).sum::<usize>();
         if v_val >= tot - neighbors_val - v_val { 
             kernel.push(v);
             continue;
         }
 
+        // Isolated vertex removal.
+        // Only makes it through this loop if the non-neighborhood of v is independent and
+        // contains vertices with weight no higher than v.
         let mut neighbors: Vec<usize> = vec![];
-
         for u in subgraph.difference(&g.compatible_with(v)) {
             if u == v {
                 continue;
diff --git a/src/reductions.rs b/src/reductions.rs
index 184b3df0..fc0e82ce 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -1,10 +1,25 @@
+//! Reduces the optimal molecular assembly index problem into maximum weighted clique.
+//! 
+//! Removing a duplicatable subgraph pair (h1, h2) gives a savings of |h1| - 1.
+//! The problem of finding the optimal molecular assembly index can thus be thought of as finding
+//! a set of duplicatable subgraph removals that are all compatible with each other (i.e. can be removed
+//! at the same time) with the largest savings possible. Thus, this problem is close to that of finding
+//! a maximum clique in a graph with duplicatable subgraph pairs for vertices, vertex weights |h1| - 1, and
+//! edges representing compatibility. However, the order of duplicatable subgraph removals can affect
+//! compatibility, and so edges must be directed. By ordering the vertices in a nice way, we can ignore
+//! edge directionality.
+
 use bit_set::BitSet;
 
+/// Graph representation of the compatibility of duplicatable subgraph pairs.
 pub struct CompatGraph {
+    /// The graph is implemented as an adjacency matrix. The ith element of graph
+    /// has a 1 at position j if {i, j} is an edge.
     graph: Vec<BitSet>,
 }
 
 impl CompatGraph {
+    /// Constructs a compatibility graph given a set of matches.
     pub fn new(init_matches: &Vec<(BitSet, BitSet)>) -> Self {
         let size = init_matches.len();
 
@@ -38,18 +53,25 @@ impl CompatGraph {
         }
     }
 
+    /// Returns the number of vertices in the graph.
     pub fn len(&self) -> usize {
         self.graph.len()
     }
 
+    /// Returns the degree vertex v in the subgraph
     pub fn degree(&self, v: usize, subgraph: &BitSet) -> usize {
         self.graph[v].intersection(subgraph).count()
     }
 
+    /// Returns the set of matches compatible with v. I.e. the set neighbors
+    /// of v.
     pub fn compatible_with(&self, v: usize) -> &BitSet {
         &self.graph[v]
     }
 
+    /// Returns the neighbors of v in the given subgraph.
+    /// The returned BitSet is a clone, and thus can be safely modified by the calling function
+    /// without changing the graph.
     pub fn neighbors(&self, v: usize, subgraph: &BitSet) -> BitSet {
         let mut neighbors = self.graph[v].clone();
         neighbors.intersect_with(subgraph);
@@ -57,6 +79,9 @@ impl CompatGraph {
         neighbors
     }
 
+    /// Returns the neighbors of v that occur after v in the matches list.
+    /// The returned BitSet is a clone, and thus can be safely modified by the calling function
+    /// without changing the graph.
     pub fn forward_neighbors(&self, v: usize, subgraph: &BitSet) -> BitSet {
         let mut neighbors = self.graph[v].clone();
         neighbors.intersect_with(subgraph);
@@ -76,6 +101,7 @@ impl CompatGraph {
         neighbors
     }
 
+    /// Returns true if vertices v and u are adjacent.
     pub fn are_adjacent(&self, v: usize, u: usize) -> bool {
         self.graph[v].contains(u)
     }

From 69082d05f4d1268d2e09287650f343848ef45ead Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 29 Jul 2025 14:52:51 -0700
Subject: [PATCH 16/51] Optimize inclusion_kernel return

---
 src/kernels.rs | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/kernels.rs b/src/kernels.rs
index 0351bf5d..f4574df0 100644
--- a/src/kernels.rs
+++ b/src/kernels.rs
@@ -73,7 +73,6 @@ pub fn deletion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, mut sub
 /// Takes a subgraph as input and returns the first vertex that will be included in some maximum weight clique.
 /// Uses the strategies of neighborhood removal and isolated vertex removal from Lamm et al.
 pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgraph: &BitSet) -> usize {
-    let mut kernel = Vec::new();
     let tot = subgraph.iter().map(|v| matches[v].0.len() - 1).sum::<usize>();
 
     'outer: for v in subgraph {
@@ -82,8 +81,7 @@ pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgra
         // Neighborhood removal
         let neighbors_val = g.neighbors(v, subgraph).iter().map(|u| matches[u].0.len() - 1).sum::<usize>();
         if v_val >= tot - neighbors_val - v_val { 
-            kernel.push(v);
-            continue;
+            return v;
         }
 
         // Isolated vertex removal.
@@ -107,13 +105,9 @@ pub fn inclusion_kernel(matches: &Vec<(BitSet, BitSet)>, g: &CompatGraph, subgra
             neighbors.push(u);
         }
 
-        kernel.push(v);
+        return v;
     }
 
-    if let Some(x) = kernel.iter().min() {
-        *x
-    }
-    else {
-        matches.len()
-    }
+    matches.len()
+
 }
\ No newline at end of file

From 30612c07076d43605d8c41424b76f23865cb48e1 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Mon, 4 Aug 2025 13:05:00 -0700
Subject: [PATCH 17/51] Fix removal_order

---
 src/assembly.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 94c3e317..bd69cfc7 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -323,7 +323,7 @@ pub fn recurse_index_search(
                 sub_clone,
                 {
                     let mut clone = removal_order.clone();
-                    clone.push(i);
+                    clone.push(v);
                     clone
                 },
                 &fragments,

From 26cea2fd03bb7559e77ad6dd7b50eaa4be156277 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 21 Aug 2025 15:36:22 -0700
Subject: [PATCH 18/51] Add new int bound

---
 src/assembly.rs | 45 ++++++++++++++++++++++++++++---
 src/bounds.rs   | 72 ++++++++++++++++++++++++++++++++++++++++++++++++-
 src/main.rs     |  2 +-
 3 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index bd69cfc7..374dcc3b 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -193,6 +193,19 @@ fn fragments(mol: &Molecule, state: &[BitSet], h1: &BitSet, h2: &BitSet) -> Opti
     Some(fragments)
 }
 
+fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<BitSet>>) -> bool {
+    let f1 = state.iter().enumerate().find(|(_, c)| h1.is_subset(c));
+    let f2 = state.iter().enumerate().find(|(_, c)| h2.is_subset(c));
+    
+    if let (Some((i1, _)), Some((i2, _))) = (f1, f2) {
+        masks[h1.len() - 2][i1].union_with(h1);
+        masks[h1.len() - 2][i2].union_with(h2);
+        true
+    } else {
+        false
+    }
+}
+
 /// Recursive helper for [`index_search`], only public for benchmarking.
 ///
 /// Inputs:
@@ -230,6 +243,9 @@ pub fn recurse_index_search(
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
+    //println!("{:?}", removal_order);
+    //println!("{:?}", state);
+
     // An upper bound on the size of fragments that can be
     // removed from this or any descendant assembly state.
     let largest_remove = {
@@ -241,19 +257,40 @@ pub fn recurse_index_search(
         }
     };
 
+    let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
+    for v in subgraph.iter() {
+        let m = &matches[v];
+        is_usable(state, &m.0, &m.1, &mut masks);
+    }
+    /*while masks.iter().last().unwrap().len() == 0 {
+        masks.pop();
+    }*/
+    /*for (i, m) in masks[0].iter().enumerate() {
+        state[i] = m.clone();
+    }*/
+    /*let mut state = Vec::new();
+    for m in masks[0].iter() {
+        if m.len() != 0 {
+            state.extend(connected_components_under_edges(mol.graph(), m));
+        }
+    }
+    state.retain(|i| i.len() > 1);*/
+
+
     // If any bounds would prune this assembly state or if memoization is
     // enabled and this assembly state is preempted by the cached state, halt.
     if bound_exceeded(
         mol,
         matches,
         graph,
-        state,
+        &state,
         &subgraph,
         state_index,
         best_index.load(Relaxed),
         largest_remove,
         bounds,
-    ) || cache.memoize_state(mol, state, state_index, &removal_order)
+        &masks,
+    ) || cache.memoize_state(mol, &state, state_index, &removal_order)
     {
         return (state_index, 1);
     }
@@ -281,7 +318,7 @@ pub fn recurse_index_search(
     // the given (enumerated) pair of non-overlapping isomorphic subgraphs.
     let recurse_on_match = |v: usize| {
         let (h1, h2) = &matches[v];
-        if let Some(fragments) = fragments(mol, state, h1, h2) {
+        if let Some(fragments) = fragments(mol, &state, h1, h2) {
             // If using depth-one parallelism, all descendant states should be
             // computed serially.
             let new_parallel = if parallel_mode == ParallelMode::DepthOne {
@@ -483,6 +520,8 @@ pub fn index_search(
         kernel_mode,
     );
 
+    println!("{}", cache.count());
+
     (index as u32, matches.len() as u32, states_searched)
 }
 
diff --git a/src/bounds.rs b/src/bounds.rs
index 73307cc4..d4e5aa39 100644
--- a/src/bounds.rs
+++ b/src/bounds.rs
@@ -77,11 +77,13 @@ pub fn bound_exceeded(
     best_index: usize,
     largest_remove: usize,
     bounds: &[Bound],
+    masks: &Vec<Vec<BitSet>>,
 ) -> bool {
     for bound_type in bounds {
         let exceeds = match bound_type {
             Bound::Log => state_index - log_bound(fragments) >= best_index,
-            Bound::Int => state_index - int_bound(fragments, largest_remove) >= best_index,
+            //Bound::Int => state_index - int_bound_seet(masks, largest_remove) >= best_index,
+            Bound::Int => best_index + int_bound_new(masks) <= state_index,
             Bound::VecSimple => {
                 state_index - vec_simple_bound(fragments, largest_remove, mol) >= best_index
             }
@@ -149,6 +151,74 @@ fn int_bound(fragments: &[BitSet], m: usize) -> usize {
     max_s
 }
 
+fn int_bound_new(masks: &Vec<Vec<BitSet>>) -> usize {
+    let mut max = 0;
+    for (i, list) in masks.iter().enumerate() {
+        let mut val = 0;
+        let i = i + 2;
+        for (j, frag) in list.iter().enumerate() {
+            let mf = masks[0][j].len();
+            let mi = frag.len();
+            let x = mf + (mi % i) - mi;
+            val += mf - (mi / i) - x / (i-1) - (x % (i-1) != 0) as usize;
+        }
+        val -= (i as f32).log2().ceil() as usize;
+        max = max.max(val);
+    }
+
+    max
+}
+
+fn int_bound_seet(masks: &Vec<Vec<BitSet>>, largest_remove: usize) -> usize {
+    let mut dup_bonds_2 = 0;
+    let mut dup_bonds_total;
+    let mut max;
+    let mut size_lists = vec![Vec::new(); masks.len()];
+
+    for j in 0..masks.len()
+    {
+        size_lists[j] = vec![0; masks[j].len()];
+        for i in 0..masks[0].len()
+        {
+            size_lists[j][i] = masks[j][i].len();
+        }
+    }
+
+    for i in 0..size_lists[0].len() {
+        dup_bonds_2 += size_lists[0][i] / 2;
+    }
+    dup_bonds_2 -= 1;
+    max = dup_bonds_2;
+
+    for j in 3..largest_remove+1
+    {
+        let size_list = &size_lists[j - 2];
+        let size_list2 = &size_lists[0];
+        let mut adjusted_size_list = vec![0; size_list.len()];
+        let mut adjusted_size_list2 = vec![0; size_list.len()];
+        for i in 0..size_list.len()
+        {
+            adjusted_size_list[i] = size_list[i] - size_list[i] % j;
+            adjusted_size_list2[i] = size_list2[i] - adjusted_size_list[i];
+        }
+        dup_bonds_total = 0;
+        for i in 0..size_list.len()
+        {
+            dup_bonds_total += adjusted_size_list[i] - adjusted_size_list[i] / j;
+            dup_bonds_total += adjusted_size_list2[i] - adjusted_size_list2[i] / (j - 1);
+            if adjusted_size_list2[i] % (j - 1) != 0 {
+                dup_bonds_total -= 1;
+            }
+        }
+        dup_bonds_total -= (j as f32).log2().ceil() as usize;
+        max = max.max(dup_bonds_total);
+    }
+
+    max
+}
+
+
+
 /// TODO
 // Count number of unique edges in a fragment
 // Helper function for vector bounds
diff --git a/src/main.rs b/src/main.rs
index fa81b6aa..8b30222a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -100,7 +100,7 @@ fn main() -> Result<()> {
         // If clique not enable, default to a combination of the integer and vector bounds.
         (None, false) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags],
         // If clique is enabled, default to combination of int, vec, and clique bounds.
-        (None, true) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags, Bound::CliqueBudget, Bound::CoverNoSort],
+        (None, true) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags, /*Bound::CliqueBudget,*/ Bound::CoverNoSort],
         // If --no-bounds is set, do not use any bounds.
         (Some(BoundsGroup {
             no_bounds: true, ..

From f0c38bccf79b2546de561aa5d8c61e41c23bf461 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 21 Aug 2025 15:36:36 -0700
Subject: [PATCH 19/51] Add cache counter

---
 src/memoize.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/memoize.rs b/src/memoize.rs
index 6ae2060e..c2029d5b 100644
--- a/src/memoize.rs
+++ b/src/memoize.rs
@@ -6,6 +6,8 @@ use bit_set::BitSet;
 use clap::ValueEnum;
 use dashmap::DashMap;
 
+use std::sync::atomic::AtomicUsize;
+
 use crate::{
     canonize::{canonize, CanonizeMode, Labeling},
     molecule::Molecule,
@@ -47,6 +49,8 @@ pub struct Cache {
     /// A parallel-aware map from fragments to their canonical labelings; only
     /// used with [`MemoizeMode::CanonIndex`].
     fragment_labels: Arc<DashMap<BitSet, Labeling>>,
+
+    count: Arc<AtomicUsize>,
 }
 
 impl Cache {
@@ -57,6 +61,7 @@ impl Cache {
             canonize_mode,
             cache: Arc::new(DashMap::<CacheKey, (usize, Vec<usize>)>::new()),
             fragment_labels: Arc::new(DashMap::<BitSet, Labeling>::new()),
+            count: Arc::new(AtomicUsize::from(0)),
         }
     }
 
@@ -95,7 +100,7 @@ impl Cache {
     /// preempted by the cached assembly state.
     /// See https://github.com/DaymudeLab/assembly-theory/pull/95 for details.
     pub fn memoize_state(
-        &self,
+        &mut self,
         mol: &Molecule,
         state: &[BitSet],
         state_index: usize,
@@ -121,9 +126,14 @@ impl Cache {
                 .value()
                 .clone();
             if cached_index <= state_index && cached_order < *removal_order {
+                self.count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                 return true;
             }
         }
         false
     }
+
+    pub fn count(&mut self) -> usize {
+        self.count.load(std::sync::atomic::Ordering::Relaxed)
+    }
 }

From 4f87f7a6852e637f50dd572e33eff97fc06d7722 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 21 Aug 2025 16:04:42 -0700
Subject: [PATCH 20/51] Faster matches

---
 src/assembly.rs | 103 ++++++++++++++++++++++++++++++++++--------------
 src/bounds.rs   |   4 +-
 2 files changed, 75 insertions(+), 32 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 374dcc3b..17dd4e79 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -19,7 +19,7 @@
 //! ```
 
 use std::{
-    collections::HashMap, sync::{
+    collections::{HashMap, HashSet}, sync::{
         atomic::{AtomicUsize, Ordering::Relaxed},
         Arc,
     }
@@ -28,15 +28,16 @@ use std::{
 use bit_set::BitSet;
 use clap::ValueEnum;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
+use petgraph::graph::EdgeIndex;
 
 use crate::{
     bounds::{bound_exceeded, Bound},
     canonize::{canonize, CanonizeMode, Labeling},
-    enumerate::{enumerate_subgraphs, EnumerateMode},
+    enumerate::{EnumerateMode},
     kernels::{KernelMode, deletion_kernel, inclusion_kernel},
     memoize::{Cache, MemoizeMode},
     molecule::Molecule,
-    utils::connected_components_under_edges,
+    utils::{connected_components_under_edges, edge_neighbors},
     reductions::CompatGraph,
 };
 
@@ -98,38 +99,82 @@ pub fn depth(mol: &Molecule) -> u32 {
 ///
 /// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
 /// sorted to guarantee deterministic iteration.
-pub fn matches(
-    mol: &Molecule,
-    enumerate_mode: EnumerateMode,
-    canonize_mode: CanonizeMode,
-) -> Vec<(BitSet, BitSet)> {
-    // Enumerate all connected, non-induced subgraphs with at most |E|/2 edges
-    // and bin them into isomorphism classes using canonization.
-    let mut isomorphism_classes = HashMap::<Labeling, Vec<BitSet>>::new();
-    for subgraph in enumerate_subgraphs(mol, enumerate_mode) {
-        isomorphism_classes
-            .entry(canonize(mol, &subgraph, canonize_mode))
-            .and_modify(|bucket| bucket.push(subgraph.clone()))
-            .or_insert(vec![subgraph.clone()]);
+pub fn matches(mol: &Molecule) -> Vec<(BitSet, BitSet)> {
+    let num_edges = mol.graph().edge_count();
+    let mut subgraphs: HashSet<BitSet> = HashSet::new();
+    let mut size_one_subgraphs: HashSet<BitSet> = HashSet::new();
+    let mut matches: Vec<(BitSet, BitSet)> = Vec::new();
+    let mut buckets: HashMap<Labeling, Vec<BitSet>> = HashMap::new();
+
+    // Generate subgraphs with one edge
+    for i in 0..num_edges {
+        let mut new_bitset = BitSet::with_capacity(num_edges);
+        new_bitset.insert(i);
+        size_one_subgraphs.insert(new_bitset);
+    }
+
+    // Generate subgraphs with two edges
+    for subgraph in size_one_subgraphs.iter() {
+        let mut neighborhood = BitSet::with_capacity(num_edges);
+        for e in subgraph {
+            neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
+        }
+        neighborhood.difference_with(subgraph);
+
+        for e in neighborhood.iter() {
+            let mut new_subgraph = subgraph.clone();
+            new_subgraph.insert(e);
+            subgraphs.insert(new_subgraph);
+        }
     }
 
-    // In each isomorphism class, collect non-overlapping pairs of subgraphs.
-    let mut matches = Vec::new();
-    for bucket in isomorphism_classes.values() {
-        for (i, first) in bucket.iter().enumerate() {
-            for second in &bucket[i..] {
-                if first.is_disjoint(second) {
-                    if first > second {
+    while subgraphs.len() > 0 {
+        buckets.clear();
+        // Generate buckets
+        for g in subgraphs.iter() {
+            let canon = canonize(mol, g, CanonizeMode::TreeNauty);
+            buckets.entry(canon)
+                .and_modify(|v| v.push(g.clone()))
+                .or_insert(vec![g.clone()]);
+        }
+
+        subgraphs.clear();
+        for (_, bucket) in buckets.iter() {
+            // Generate matches
+            let mut has_match = BitSet::with_capacity(bucket.len());
+            for (i, first) in bucket.iter().enumerate() {
+                for (j, second) in bucket[i+1..].iter().enumerate() {
+                    if first.intersection(second).count() == 0 {
+                        if first > second {
                         matches.push((first.clone(), second.clone()));
-                    } else {
-                        matches.push((second.clone(), first.clone()));
+                        } else {
+                            matches.push((second.clone(), first.clone()));
+                        }
+                        has_match.insert(i);
+                        has_match.insert(i + 1 + j);
                     }
                 }
             }
+
+            // Generate new subgraphs
+            // Only extend if subgraph has a match
+            for i in has_match.iter() {
+                let subgraph = &bucket[i];
+                let mut neighborhood = BitSet::with_capacity(num_edges);
+                for e in subgraph {
+                    neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
+                }
+                neighborhood.difference_with(subgraph);
+
+                for e in neighborhood.iter() {
+                    let mut new_subgraph = subgraph.clone();
+                    new_subgraph.insert(e);
+                    subgraphs.insert(new_subgraph);
+                }
+            }
         }
     }
 
-    // Sort pairs in a deterministic order and return.
     matches.sort_by(|e1, e2| {
         let ord = [
             e2.0.len().cmp(&e1.0.len()), // Decreasing subgraph size.
@@ -468,7 +513,7 @@ pub fn recurse_index_search(
 /// ```
 pub fn index_search(
     mol: &Molecule,
-    enumerate_mode: EnumerateMode,
+    _enumerate_mode: EnumerateMode,
     canonize_mode: CanonizeMode,
     parallel_mode: ParallelMode,
     memoize_mode: MemoizeMode,
@@ -479,7 +524,7 @@ pub fn index_search(
     // Catch not-yet-implemented modes.
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
-    let matches = matches(mol, enumerate_mode, canonize_mode);
+    let matches = matches(mol);
 
     // Create memoization cache.
     let mut cache = Cache::new(memoize_mode, canonize_mode);
@@ -520,8 +565,6 @@ pub fn index_search(
         kernel_mode,
     );
 
-    println!("{}", cache.count());
-
     (index as u32, matches.len() as u32, states_searched)
 }
 
diff --git a/src/bounds.rs b/src/bounds.rs
index d4e5aa39..e13b097d 100644
--- a/src/bounds.rs
+++ b/src/bounds.rs
@@ -126,7 +126,7 @@ fn log_bound(fragments: &[BitSet]) -> usize {
 }
 
 /// TODO
-fn int_bound(fragments: &[BitSet], m: usize) -> usize {
+fn _int_bound(fragments: &[BitSet], m: usize) -> usize {
     let mut max_s: usize = 0;
     let mut frag_sizes: Vec<usize> = Vec::new();
 
@@ -169,7 +169,7 @@ fn int_bound_new(masks: &Vec<Vec<BitSet>>) -> usize {
     max
 }
 
-fn int_bound_seet(masks: &Vec<Vec<BitSet>>, largest_remove: usize) -> usize {
+fn _int_bound_seet(masks: &Vec<Vec<BitSet>>, largest_remove: usize) -> usize {
     let mut dup_bonds_2 = 0;
     let mut dup_bonds_total;
     let mut max;

From fef2d1362ad36e23d46aaefb44e129b8e9c861d6 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 21 Aug 2025 16:04:54 -0700
Subject: [PATCH 21/51] Remove cache counter

---
 src/memoize.rs | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/memoize.rs b/src/memoize.rs
index c2029d5b..2df74e3c 100644
--- a/src/memoize.rs
+++ b/src/memoize.rs
@@ -6,8 +6,6 @@ use bit_set::BitSet;
 use clap::ValueEnum;
 use dashmap::DashMap;
 
-use std::sync::atomic::AtomicUsize;
-
 use crate::{
     canonize::{canonize, CanonizeMode, Labeling},
     molecule::Molecule,
@@ -49,8 +47,6 @@ pub struct Cache {
     /// A parallel-aware map from fragments to their canonical labelings; only
     /// used with [`MemoizeMode::CanonIndex`].
     fragment_labels: Arc<DashMap<BitSet, Labeling>>,
-
-    count: Arc<AtomicUsize>,
 }
 
 impl Cache {
@@ -61,7 +57,6 @@ impl Cache {
             canonize_mode,
             cache: Arc::new(DashMap::<CacheKey, (usize, Vec<usize>)>::new()),
             fragment_labels: Arc::new(DashMap::<BitSet, Labeling>::new()),
-            count: Arc::new(AtomicUsize::from(0)),
         }
     }
 
@@ -126,14 +121,9 @@ impl Cache {
                 .value()
                 .clone();
             if cached_index <= state_index && cached_order < *removal_order {
-                self.count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                 return true;
             }
         }
         false
     }
-
-    pub fn count(&mut self) -> usize {
-        self.count.load(std::sync::atomic::Ordering::Relaxed)
-    }
 }

From ccd5a7916703319660615e4fd909804d267436e4 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 26 Aug 2025 11:44:32 -0700
Subject: [PATCH 22/51] new memoize strategy

---
 src/assembly.rs | 52 +++++++++++++++++++++++-------
 src/main.rs     | 41 ++++++++++++++++++++++++
 src/memoize.rs  | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+), 11 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 17dd4e79..d6bda21d 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -35,7 +35,7 @@ use crate::{
     canonize::{canonize, CanonizeMode, Labeling},
     enumerate::{EnumerateMode},
     kernels::{KernelMode, deletion_kernel, inclusion_kernel},
-    memoize::{Cache, MemoizeMode},
+    memoize::{Cache, MemoizeMode, NewCache},
     molecule::Molecule,
     utils::{connected_components_under_edges, edge_neighbors},
     reductions::CompatGraph,
@@ -284,7 +284,7 @@ pub fn recurse_index_search(
     state_index: usize,
     best_index: Arc<AtomicUsize>,
     bounds: &[Bound],
-    cache: &mut Cache,
+    cache: &mut NewCache,
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
@@ -310,17 +310,31 @@ pub fn recurse_index_search(
     /*while masks.iter().last().unwrap().len() == 0 {
         masks.pop();
     }*/
-    /*for (i, m) in masks[0].iter().enumerate() {
+    /*let mut state = Vec::new();
+    for (i, m) in masks[0].iter().enumerate() {
         state[i] = m.clone();
     }*/
-    /*let mut state = Vec::new();
-    for m in masks[0].iter() {
-        if m.len() != 0 {
-            state.extend(connected_components_under_edges(mol.graph(), m));
+    /*let mut temp = Vec::new();
+    let state = {
+        if removal_order.len() == 0 {
+            for m in masks[0].iter() {
+                if m.len() != 0 {
+                    temp.extend(connected_components_under_edges(mol.graph(), m));
+                }
+            }
+            temp.retain(|i| i.len() > 1);
+            &temp
         }
-    }
-    state.retain(|i| i.len() > 1);*/
+        else {
+            state
+        }
+    };*/
 
+    /*let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
+    for v in subgraph.iter() {
+        let m = &matches[v];
+        is_usable(&state, &m.0, &m.1, &mut masks);
+    }*/
 
     // If any bounds would prune this assembly state or if memoization is
     // enabled and this assembly state is preempted by the cached state, halt.
@@ -335,11 +349,16 @@ pub fn recurse_index_search(
         largest_remove,
         bounds,
         &masks,
-    ) || cache.memoize_state(mol, &state, state_index, &removal_order)
+    ) || cache.memoize_state(mol, &state, state_index)
     {
         return (state_index, 1);
     }
 
+    let mut state = Vec::new();
+    for (i, m) in masks[0].iter().enumerate() {
+        state.push(m.clone());
+    }
+
     // Apply kernels
     let mut must_include = matches.len();
     if kernel_mode != KernelMode::None {
@@ -442,6 +461,10 @@ pub fn recurse_index_search(
             .for_each(|v| recurse_on_match(*v));
     }
 
+    if removal_order.len() == 1 {
+        println!("{:?}, {}", removal_order, states_searched.load(Relaxed));
+    }
+
     (
         best_child_index.load(Relaxed),
         states_searched.load(Relaxed),
@@ -525,9 +548,13 @@ pub fn index_search(
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
     let matches = matches(mol);
+    /*for (i, m) in matches.iter().enumerate() {
+        println!("{i}: {:?}", m);
+    }*/
 
     // Create memoization cache.
-    let mut cache = Cache::new(memoize_mode, canonize_mode);
+    //let mut cache = Cache::new(memoize_mode, canonize_mode);
+    let mut cache = NewCache::new();
 
     let graph = {
         if clique {
@@ -550,6 +577,7 @@ pub fn index_search(
     // Search for the shortest assembly pathway recursively.
     let edge_count = mol.graph().edge_count();
     let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
+    //let best_index = Arc::new(AtomicUsize::from(22));
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
@@ -565,6 +593,8 @@ pub fn index_search(
         kernel_mode,
     );
 
+    println!("Bounded: {}", cache.count());
+
     (index as u32, matches.len() as u32, states_searched)
 }
 
diff --git a/src/main.rs b/src/main.rs
index 8b30222a..e8f36260 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -122,6 +122,47 @@ fn main() -> Result<()> {
         }
     }
 
+
+    /*use std::ffi::{OsStr, OsString};
+    use std::path::Path;
+    use assembly_theory::molecule::Molecule;
+    use rayon::iter::{IntoParallelIterator, ParallelIterator, IntoParallelRefIterator, IndexedParallelIterator};
+
+    let paths = fs::read_dir(Path::new("data").join("plycyclic_hydrocarbons")).unwrap();
+    let mut mol_list: Vec<Molecule> = Vec::new();
+    let mut names: Vec<OsString> = Vec::new();
+
+    for path in paths {
+        let name = path.unwrap().path();
+        if name.extension().and_then(OsStr::to_str) != Some("mol") {
+            continue;
+        }
+        names.push(name.file_name().unwrap().to_os_string());
+        mol_list.push(
+            parse_molfile_str(
+                &fs::read_to_string(name.clone())
+                    .expect(&format!("Could not read file {name:?}")),
+            )
+            .expect(&format!("Failed to parse {name:?}")),
+        );
+    }
+
+    names.par_iter().zip(mol_list.par_iter()).for_each(|(n, mol)| {
+        let index = index_search(
+            &mol,
+            cli.enumerate,
+            cli.canonize,
+            cli.parallel,
+            cli.memoize,
+            cli.kernel,            
+            boundlist,
+            cli.clique,
+        );
+        println!("{:?}: MA: {} Space: {}, Searched: {}", n, index.0, index.1, index.2);
+    });
+    std::process::exit(1);*/
+
+
     // Call index calculation with all the various options.
     let (index, num_matches, states_searched) = index_search(
         &mol,
diff --git a/src/memoize.rs b/src/memoize.rs
index 2df74e3c..4593b4b3 100644
--- a/src/memoize.rs
+++ b/src/memoize.rs
@@ -5,6 +5,7 @@ use std::sync::Arc;
 use bit_set::BitSet;
 use clap::ValueEnum;
 use dashmap::DashMap;
+use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
 
 use crate::{
     canonize::{canonize, CanonizeMode, Labeling},
@@ -47,6 +48,8 @@ pub struct Cache {
     /// A parallel-aware map from fragments to their canonical labelings; only
     /// used with [`MemoizeMode::CanonIndex`].
     fragment_labels: Arc<DashMap<BitSet, Labeling>>,
+
+    counter: Arc<AtomicUsize>,
 }
 
 impl Cache {
@@ -57,6 +60,7 @@ impl Cache {
             canonize_mode,
             cache: Arc::new(DashMap::<CacheKey, (usize, Vec<usize>)>::new()),
             fragment_labels: Arc::new(DashMap::<BitSet, Labeling>::new()),
+            counter: Arc::new(AtomicUsize::from(0)),
         }
     }
 
@@ -121,9 +125,89 @@ impl Cache {
                 .value()
                 .clone();
             if cached_index <= state_index && cached_order < *removal_order {
+                self.counter.fetch_add(1, Relaxed);
+                //println!("{:?}", state);
                 return true;
             }
         }
         false
     }
+
+    pub fn count(&self) -> usize {
+        self.counter.load(Relaxed)
+    }
+}
+
+#[derive(Clone)]
+pub struct NewCache {
+    cache: Arc<DashMap<Vec<usize>, usize>>,
+    label_to_canon_id: Arc<DashMap<Labeling, usize>>,
+    frag_to_canon_id: Arc<DashMap<BitSet, usize>>,
+    next_id: Arc<AtomicUsize>,
+    counter: Arc<AtomicUsize>,
 }
+
+impl NewCache {
+    pub fn new() -> Self {
+        Self {
+            cache: Arc::new(DashMap::<Vec<usize>, usize>::new()),
+            label_to_canon_id: Arc::new(DashMap::<Labeling, usize>::new()),
+            frag_to_canon_id: Arc::new(DashMap::<BitSet, usize>::new()),
+            next_id: Arc::new(AtomicUsize::from(0)),
+            counter: Arc::new(AtomicUsize::from(0)),
+        }
+    }
+
+    pub fn memoize_state(&mut self, mol: &Molecule, state: &[BitSet], state_index: usize) -> bool {
+        let mut frag_ids = Vec::new();
+        for frag in state {
+            let id = self.get_canon_id(mol, frag);
+            frag_ids.push(id);
+        }
+        frag_ids.sort();
+
+        let mut stop = false;
+
+        self.cache
+            .entry(frag_ids)
+            .and_modify(|val| {
+                if *val > state_index {
+                    *val = state_index;
+                } else {
+                    self.counter.fetch_add(1, Relaxed);
+                    stop = true;
+                }
+            })
+            .or_insert(state_index);
+
+        stop
+    }
+
+    fn get_canon_id(&mut self, mol: &Molecule, frag: &BitSet) -> usize {
+        // If frag has id, use it
+        if let Some(x) = self.frag_to_canon_id.get(frag) {
+            *x
+        }
+        // Otherwise canonize to get labeling
+        else {
+            let canon = canonize(mol, frag, CanonizeMode::TreeNauty);
+            // If label has id, use it
+            if let Some(x) = self.label_to_canon_id.get(&canon) {
+                let id = *x;
+                self.frag_to_canon_id.insert(frag.clone(), id);
+                id
+            }
+            // Otherwise asign new id
+            else {
+                let id = self.next_id.fetch_add(1, Relaxed);
+                self.label_to_canon_id.insert(canon, id);
+                self.frag_to_canon_id.insert(frag.clone(), id);
+                id
+            }
+        }
+    }
+
+    pub fn count(&self) -> usize {
+        self.counter.load(Relaxed)
+    }
+}
\ No newline at end of file

From 4f0f7c0b78ce21b1ef1ce9ba5cf758cddcef1e02 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 26 Aug 2025 15:48:27 -0700
Subject: [PATCH 23/51] Modified int bound

---
 src/assembly.rs   | 53 +++++++++++++++++++++++++++++++++++++----------
 src/main.rs       |  2 +-
 src/memoize.rs    | 17 +++++++++------
 src/reductions.rs |  4 ++++
 4 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index d6bda21d..ec1bcdea 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -307,6 +307,13 @@ pub fn recurse_index_search(
         let m = &matches[v];
         is_usable(state, &m.0, &m.1, &mut masks);
     }
+
+    let mut state = Vec::new();
+    for (i, m) in masks[0].iter().enumerate() {
+        if m.len() != 0 {
+            state.extend(connected_components_under_edges(mol.graph(), m));
+        }
+    }
     /*while masks.iter().last().unwrap().len() == 0 {
         masks.pop();
     }*/
@@ -336,9 +343,34 @@ pub fn recurse_index_search(
         is_usable(&state, &m.0, &m.1, &mut masks);
     }*/
 
+    let mut max = Vec::new();
+    for (i, list) in masks.iter().enumerate() {
+        let mut val = 0;
+        let i = i + 2;
+        for (j, frag) in list.iter().enumerate() {
+            let mf = masks[0][j].len();
+            let mi = frag.len();
+            let x = mf + (mi % i) - mi;
+            val += mf - (mi / i) - x / (i-1) - (x % (i-1) != 0) as usize;
+        }
+        val -= (i as f32).log2().ceil() as usize;
+        if max.len() == 0 {
+            max.push(val);
+        }
+        else {
+            max.push(val.max(max[max.len() - 1]));
+        }
+    }
+
+    let best = best_index.load(Relaxed);
+    let mut idx = 2;
+    while (idx < max.len() + 2) && best + max[idx - 2] <= state_index {
+        idx += 1;
+    }
+
     // If any bounds would prune this assembly state or if memoization is
     // enabled and this assembly state is preempted by the cached state, halt.
-    if bound_exceeded(
+    /*if bound_exceeded(
         mol,
         matches,
         graph,
@@ -349,14 +381,13 @@ pub fn recurse_index_search(
         largest_remove,
         bounds,
         &masks,
-    ) || cache.memoize_state(mol, &state, state_index)
+    ) || cache.memoize_state(mol, &state, state_index, &removal_order)
     {
         return (state_index, 1);
-    }
+    }*/
 
-    let mut state = Vec::new();
-    for (i, m) in masks[0].iter().enumerate() {
-        state.push(m.clone());
+    if cache.memoize_state(mol, &state, state_index, &removal_order) {
+        return (state_index, 1);
     }
 
     // Apply kernels
@@ -450,7 +481,7 @@ pub fn recurse_index_search(
     if parallel_mode == ParallelMode::None {
         subgraph
             .iter()
-            .filter(|v| *v <= must_include)
+            .filter(|v| *v <= must_include && matches[*v].0.len() >= idx)
             .for_each(|v| recurse_on_match(v));
     } else {
         let _ = subgraph
@@ -461,9 +492,9 @@ pub fn recurse_index_search(
             .for_each(|v| recurse_on_match(*v));
     }
 
-    if removal_order.len() == 1 {
+    /*if removal_order.len() == 1 {
         println!("{:?}, {}", removal_order, states_searched.load(Relaxed));
-    }
+    }*/
 
     (
         best_child_index.load(Relaxed),
@@ -577,7 +608,7 @@ pub fn index_search(
     // Search for the shortest assembly pathway recursively.
     let edge_count = mol.graph().edge_count();
     let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
-    //let best_index = Arc::new(AtomicUsize::from(22));
+    //let best_index = Arc::new(AtomicUsize::from(12));
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
@@ -593,7 +624,7 @@ pub fn index_search(
         kernel_mode,
     );
 
-    println!("Bounded: {}", cache.count());
+    //println!("Bounded: {}", cache.count());
 
     (index as u32, matches.len() as u32, states_searched)
 }
diff --git a/src/main.rs b/src/main.rs
index e8f36260..36f2f937 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -128,7 +128,7 @@ fn main() -> Result<()> {
     use assembly_theory::molecule::Molecule;
     use rayon::iter::{IntoParallelIterator, ParallelIterator, IntoParallelRefIterator, IndexedParallelIterator};
 
-    let paths = fs::read_dir(Path::new("data").join("plycyclic_hydrocarbons")).unwrap();
+    let paths = fs::read_dir(Path::new("data").join("coconut_220")).unwrap();
     let mut mol_list: Vec<Molecule> = Vec::new();
     let mut names: Vec<OsString> = Vec::new();
 
diff --git a/src/memoize.rs b/src/memoize.rs
index 4593b4b3..80efa27b 100644
--- a/src/memoize.rs
+++ b/src/memoize.rs
@@ -140,7 +140,7 @@ impl Cache {
 
 #[derive(Clone)]
 pub struct NewCache {
-    cache: Arc<DashMap<Vec<usize>, usize>>,
+    cache: Arc<DashMap<Vec<usize>, (usize, Vec<usize>)>>,
     label_to_canon_id: Arc<DashMap<Labeling, usize>>,
     frag_to_canon_id: Arc<DashMap<BitSet, usize>>,
     next_id: Arc<AtomicUsize>,
@@ -150,7 +150,7 @@ pub struct NewCache {
 impl NewCache {
     pub fn new() -> Self {
         Self {
-            cache: Arc::new(DashMap::<Vec<usize>, usize>::new()),
+            cache: Arc::new(DashMap::<Vec<usize>, (usize, Vec<usize>)>::new()),
             label_to_canon_id: Arc::new(DashMap::<Labeling, usize>::new()),
             frag_to_canon_id: Arc::new(DashMap::<BitSet, usize>::new()),
             next_id: Arc::new(AtomicUsize::from(0)),
@@ -158,7 +158,7 @@ impl NewCache {
         }
     }
 
-    pub fn memoize_state(&mut self, mol: &Molecule, state: &[BitSet], state_index: usize) -> bool {
+    pub fn memoize_state(&mut self, mol: &Molecule, state: &[BitSet], state_index: usize, removal_order: &Vec<usize>) -> bool {
         let mut frag_ids = Vec::new();
         for frag in state {
             let id = self.get_canon_id(mol, frag);
@@ -171,14 +171,15 @@ impl NewCache {
         self.cache
             .entry(frag_ids)
             .and_modify(|val| {
-                if *val > state_index {
-                    *val = state_index;
+                if val.0 > state_index || val.1 > *removal_order {
+                    val.0 = state_index;
+                    val.1 = removal_order.clone();
                 } else {
                     self.counter.fetch_add(1, Relaxed);
                     stop = true;
                 }
             })
-            .or_insert(state_index);
+            .or_insert((state_index, removal_order.clone()));
 
         stop
     }
@@ -207,6 +208,10 @@ impl NewCache {
         }
     }
 
+    pub fn add_count(&mut self) {
+        self.counter.fetch_add(1, Relaxed);
+    }
+
     pub fn count(&self) -> usize {
         self.counter.load(Relaxed)
     }
diff --git a/src/reductions.rs b/src/reductions.rs
index fc0e82ce..5ac282c9 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -10,6 +10,7 @@
 //! edge directionality.
 
 use bit_set::BitSet;
+use std::time::Instant;
 
 /// Graph representation of the compatibility of duplicatable subgraph pairs.
 pub struct CompatGraph {
@@ -21,6 +22,7 @@ pub struct CompatGraph {
 impl CompatGraph {
     /// Constructs a compatibility graph given a set of matches.
     pub fn new(init_matches: &Vec<(BitSet, BitSet)>) -> Self {
+        let start = Instant::now();
         let size = init_matches.len();
 
         // Initialize weights and empty graph
@@ -48,6 +50,8 @@ impl CompatGraph {
             }
         }
 
+        println!("Graph Time: {:?}", start.elapsed());
+
         Self {
             graph: init_graph,
         }

From 7bd9fdd1360754c67386b1027f0f99ae528033f7 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Wed, 27 Aug 2025 16:39:00 -0700
Subject: [PATCH 24/51] New (unoptimized) method of calculating masks

---
 src/assembly.rs | 116 ++++++++++++++++++++++++------------------------
 src/memoize.rs  |   8 +++-
 2 files changed, 66 insertions(+), 58 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index ec1bcdea..568c8a30 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -128,6 +128,7 @@ pub fn matches(mol: &Molecule) -> Vec<(BitSet, BitSet)> {
         }
     }
 
+    // Generate subgraphs with more than two edges
     while subgraphs.len() > 0 {
         buckets.clear();
         // Generate buckets
@@ -175,6 +176,7 @@ pub fn matches(mol: &Molecule) -> Vec<(BitSet, BitSet)> {
         }
     }
 
+    // Sort matches
     matches.sort_by(|e1, e2| {
         let ord = [
             e2.0.len().cmp(&e1.0.len()), // Decreasing subgraph size.
@@ -291,8 +293,6 @@ pub fn recurse_index_search(
     //println!("{:?}", removal_order);
     //println!("{:?}", state);
 
-    // An upper bound on the size of fragments that can be
-    // removed from this or any descendant assembly state.
     let largest_remove = {
         if let Some(v) = subgraph.iter().next() {
             matches[v].0.len()
@@ -302,47 +302,63 @@ pub fn recurse_index_search(
         }
     };
 
+    // Prune edges of state
+    let mut subgraphs: HashSet<BitSet> = HashSet::new();
     let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
-    for v in subgraph.iter() {
-        let m = &matches[v];
-        is_usable(state, &m.0, &m.1, &mut masks);
-    }
+    subgraph = subgraph.iter().filter(|v| {
+        let h1 = &matches[*v].0;
+        let h2 = &matches[*v].1;
 
-    let mut state = Vec::new();
-    for (i, m) in masks[0].iter().enumerate() {
-        if m.len() != 0 {
-            state.extend(connected_components_under_edges(mol.graph(), m));
+        let pre_have_h1 = subgraphs.contains(h1);
+        let pre_have_h2 = subgraphs.contains(h2);
+
+        let mut have_h1 = pre_have_h1;
+        let mut have_h2 = pre_have_h2;
+
+        let mut idx1 = 0;
+        let mut idx2 = 0;
+
+        if !pre_have_h1 {
+            if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)) {
+                idx1 = idx;
+                have_h1 = true;
+            }
         }
-    }
-    /*while masks.iter().last().unwrap().len() == 0 {
-        masks.pop();
-    }*/
-    /*let mut state = Vec::new();
-    for (i, m) in masks[0].iter().enumerate() {
-        state[i] = m.clone();
-    }*/
-    /*let mut temp = Vec::new();
-    let state = {
-        if removal_order.len() == 0 {
-            for m in masks[0].iter() {
-                if m.len() != 0 {
-                    temp.extend(connected_components_under_edges(mol.graph(), m));
-                }
+        if !pre_have_h2 {
+            if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)) {
+                idx2 = idx;
+                have_h2 = true;
             }
-            temp.retain(|i| i.len() > 1);
-            &temp
         }
-        else {
-            state
+
+        if have_h1 && have_h2 {
+            if !pre_have_h1 {
+                subgraphs.insert(h1.clone());
+                masks[h1.len() - 2][idx1].union_with(h1);
+            }
+            if !pre_have_h2 {
+                subgraphs.insert(h2.clone());
+                masks[h1.len() - 2][idx2].union_with(h2);
+            }
         }
-    };*/
+
+        have_h1 && have_h2
+    }).collect();
 
     /*let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
-    for v in subgraph.iter() {
-        let m = &matches[v];
-        is_usable(&state, &m.0, &m.1, &mut masks);
-    }*/
+    let subgraph1 = subgraph.iter().filter(|v| {
+        let m = &matches[*v];
+        is_usable(state, &m.0, &m.1, &mut masks)
+    }).collect::<BitSet>();*/
+
+    let mut state = Vec::new();
+    for m in masks[0].iter() {
+        if m.len() != 0 {
+            state.extend(connected_components_under_edges(mol.graph(), m));
+        }
+    }
 
+    // Int bound
     let mut max = Vec::new();
     for (i, list) in masks.iter().enumerate() {
         let mut val = 0;
@@ -368,24 +384,7 @@ pub fn recurse_index_search(
         idx += 1;
     }
 
-    // If any bounds would prune this assembly state or if memoization is
-    // enabled and this assembly state is preempted by the cached state, halt.
-    /*if bound_exceeded(
-        mol,
-        matches,
-        graph,
-        &state,
-        &subgraph,
-        state_index,
-        best_index.load(Relaxed),
-        largest_remove,
-        bounds,
-        &masks,
-    ) || cache.memoize_state(mol, &state, state_index, &removal_order)
-    {
-        return (state_index, 1);
-    }*/
-
+    // Memoization
     if cache.memoize_state(mol, &state, state_index, &removal_order) {
         return (state_index, 1);
     }
@@ -414,6 +413,7 @@ pub fn recurse_index_search(
     let recurse_on_match = |v: usize| {
         let (h1, h2) = &matches[v];
         if let Some(fragments) = fragments(mol, &state, h1, h2) {
+
             // If using depth-one parallelism, all descendant states should be
             // computed serially.
             let new_parallel = if parallel_mode == ParallelMode::DepthOne {
@@ -442,8 +442,10 @@ pub fn recurse_index_search(
             }
             else {
                 sub_clone = BitSet::with_capacity(matches.len());
-                for j in (v+1)..matches.len() {
-                    sub_clone.insert(j);
+                for j in subgraph.iter() {
+                    if j > v {
+                        sub_clone.insert(j);
+                    }
                 }
             }
 
@@ -486,7 +488,7 @@ pub fn recurse_index_search(
     } else {
         let _ = subgraph
             .iter()
-            .filter(|v| *v <= must_include)
+            .filter(|v| *v <= must_include /*&& matches[*v].0.len() >= idx*/)
             .collect::<Vec<usize>>()
             .par_iter()
             .for_each(|v| recurse_on_match(*v));
@@ -585,7 +587,7 @@ pub fn index_search(
 
     // Create memoization cache.
     //let mut cache = Cache::new(memoize_mode, canonize_mode);
-    let mut cache = NewCache::new();
+    let mut cache = NewCache::new(memoize_mode);
 
     let graph = {
         if clique {
@@ -624,7 +626,7 @@ pub fn index_search(
         kernel_mode,
     );
 
-    //println!("Bounded: {}", cache.count());
+    println!("Bounded: {}", cache.count());
 
     (index as u32, matches.len() as u32, states_searched)
 }
diff --git a/src/memoize.rs b/src/memoize.rs
index 80efa27b..a251639e 100644
--- a/src/memoize.rs
+++ b/src/memoize.rs
@@ -140,6 +140,7 @@ impl Cache {
 
 #[derive(Clone)]
 pub struct NewCache {
+    memoize_mode: MemoizeMode,
     cache: Arc<DashMap<Vec<usize>, (usize, Vec<usize>)>>,
     label_to_canon_id: Arc<DashMap<Labeling, usize>>,
     frag_to_canon_id: Arc<DashMap<BitSet, usize>>,
@@ -148,8 +149,9 @@ pub struct NewCache {
 }
 
 impl NewCache {
-    pub fn new() -> Self {
+    pub fn new(mode: MemoizeMode) -> Self {
         Self {
+            memoize_mode: mode,
             cache: Arc::new(DashMap::<Vec<usize>, (usize, Vec<usize>)>::new()),
             label_to_canon_id: Arc::new(DashMap::<Labeling, usize>::new()),
             frag_to_canon_id: Arc::new(DashMap::<BitSet, usize>::new()),
@@ -159,6 +161,10 @@ impl NewCache {
     }
 
     pub fn memoize_state(&mut self, mol: &Molecule, state: &[BitSet], state_index: usize, removal_order: &Vec<usize>) -> bool {
+        if self.memoize_mode == MemoizeMode::None {
+            return false;
+        }
+        
         let mut frag_ids = Vec::new();
         for frag in state {
             let id = self.get_canon_id(mol, frag);

From 66256cd67a59af98590a53c4c7f678888fbceb8a Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 28 Aug 2025 14:30:43 -0700
Subject: [PATCH 25/51] Optimize masks computation

---
 Cargo.lock      | 92 +++++++++++++++++++++++++++++++++++++++++++++++++
 Cargo.toml      |  2 ++
 src/assembly.rs | 71 ++++++++++++++++++++++++--------------
 3 files changed, 139 insertions(+), 26 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 372210a9..bc179a87 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,19 @@
 # It is not intended for manual editing.
 version = 4
 
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "getrandom",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
 [[package]]
 name = "aho-corasick"
 version = "1.1.3"
@@ -71,12 +84,14 @@ checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
 name = "assembly-theory"
 version = "0.4.0"
 dependencies = [
+ "ahash",
  "anyhow",
  "bit-set",
  "clap 4.5.41",
  "criterion",
  "csv",
  "dashmap",
+ "fxhash",
  "graph-canon",
  "petgraph",
  "pyo3",
@@ -168,6 +183,12 @@ version = "3.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -433,6 +454,27 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
 
+[[package]]
+name = "fxhash"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
+dependencies = [
+ "byteorder",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi",
+]
+
 [[package]]
 name = "glob"
 version = "0.3.2"
@@ -839,6 +881,12 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
 [[package]]
 name = "radium"
 version = "0.7.0"
@@ -1096,6 +1144,12 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -1106,6 +1160,15 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "wasi"
+version = "0.14.2+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.100"
@@ -1363,6 +1426,15 @@ version = "0.53.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
 
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags 2.9.1",
+]
+
 [[package]]
 name = "wyz"
 version = "0.5.1"
@@ -1371,3 +1443,23 @@ checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
 dependencies = [
  "tap",
 ]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/Cargo.toml b/Cargo.toml
index f056c248..bbd2c373 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,8 @@ petgraph = "0.6.5"
 pyo3 = { version = "0.24.1", features = ["abi3-py310", "extension-module"]}
 rayon = "1.10.0"
 dashmap = "6.1.0"
+ahash = "0.8.12"
+fxhash = "0.2.1"
 
 [dev-dependencies]
 criterion = "0.3"
diff --git a/src/assembly.rs b/src/assembly.rs
index 568c8a30..dfac7fd8 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -29,6 +29,7 @@ use bit_set::BitSet;
 use clap::ValueEnum;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use petgraph::graph::EdgeIndex;
+use ahash::{AHasher, RandomState};
 
 use crate::{
     bounds::{bound_exceeded, Bound},
@@ -290,7 +291,7 @@ pub fn recurse_index_search(
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
-    //println!("{:?}", removal_order);
+    println!("{:?}", removal_order);
     //println!("{:?}", state);
 
     let largest_remove = {
@@ -303,42 +304,60 @@ pub fn recurse_index_search(
     };
 
     // Prune edges of state
-    let mut subgraphs: HashSet<BitSet> = HashSet::new();
+    let mut subgraphs: HashMap<BitSet, (bool, bool), RandomState> = HashMap::with_capacity_and_hasher(256, RandomState::new());
     let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
     subgraph = subgraph.iter().filter(|v| {
         let h1 = &matches[*v].0;
         let h2 = &matches[*v].1;
 
-        let pre_have_h1 = subgraphs.contains(h1);
-        let pre_have_h2 = subgraphs.contains(h2);
-
-        let mut have_h1 = pre_have_h1;
-        let mut have_h2 = pre_have_h2;
+        let x1 = subgraphs.get(h1).to_owned().cloned();
+        let x2 = subgraphs.get(h2).to_owned().cloned();
 
-        let mut idx1 = 0;
-        let mut idx2 = 0;
+        let mut have_h1 = false;
+        let mut have_h2 = false;
+        let mut h1_match = false;
+        let mut h2_match = false;
 
-        if !pre_have_h1 {
-            if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)) {
-                idx1 = idx;
-                have_h1 = true;
+        if let Some ((i1, i2)) = x1 {
+            if i1 == false {
+                return false;
             }
+            have_h1 = i1;
+            h1_match = i2;
+        }
+        else if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)) {
+            have_h1 = true;
+            subgraphs.insert(h1.clone(), (true, false));
+        }
+        else {
+            subgraphs.insert(h1.clone(), (false, false));
         }
-        if !pre_have_h2 {
-            if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)) {
-                idx2 = idx;
-                have_h2 = true;
+
+        if let Some ((i1, i2)) = x2 {
+            if i1 == false {
+                return false;
             }
+            have_h2 = i1;
+            h2_match = i2;
+        }
+        else if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)) {
+            have_h2 = true;
+            subgraphs.insert(h2.clone(), (true, false));
+        }
+        else {
+            subgraphs.insert(h2.clone(), (false, false));
         }
 
         if have_h1 && have_h2 {
-            if !pre_have_h1 {
-                subgraphs.insert(h1.clone());
-                masks[h1.len() - 2][idx1].union_with(h1);
+            if h1_match == false {
+                let (idx, _) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)).unwrap();
+                masks[h1.len() - 2][idx].union_with(h1);
+                subgraphs.get_mut(h1).unwrap().1 = true;
             }
-            if !pre_have_h2 {
-                subgraphs.insert(h2.clone());
-                masks[h1.len() - 2][idx2].union_with(h2);
+            if h2_match == false {
+                let (idx, _) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)).unwrap();
+                masks[h2.len() - 2][idx].union_with(h2);
+                subgraphs.get_mut(h2).unwrap().1 = true;
             }
         }
 
@@ -346,14 +365,14 @@ pub fn recurse_index_search(
     }).collect();
 
     /*let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
-    let subgraph1 = subgraph.iter().filter(|v| {
+    subgraph = subgraph.iter().filter(|v| {
         let m = &matches[*v];
         is_usable(state, &m.0, &m.1, &mut masks)
-    }).collect::<BitSet>();*/
+    }).collect();*/
 
     let mut state = Vec::new();
     for m in masks[0].iter() {
-        if m.len() != 0 {
+        if m.len() >= 2 {
             state.extend(connected_components_under_edges(mol.graph(), m));
         }
     }

From 87428445f05fa3f2429b45ba8afcb257dd8e311e Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 28 Aug 2025 15:53:14 -0700
Subject: [PATCH 26/51] Build dag

---
 src/assembly.rs | 124 ++++++++++++++++++++++++++++++------------------
 1 file changed, 77 insertions(+), 47 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index dfac7fd8..f3d04666 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -29,14 +29,14 @@ use bit_set::BitSet;
 use clap::ValueEnum;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use petgraph::graph::EdgeIndex;
-use ahash::{AHasher, RandomState};
+use ahash::RandomState;
 
 use crate::{
     bounds::{bound_exceeded, Bound},
     canonize::{canonize, CanonizeMode, Labeling},
     enumerate::{EnumerateMode},
     kernels::{KernelMode, deletion_kernel, inclusion_kernel},
-    memoize::{Cache, MemoizeMode, NewCache},
+    memoize::{MemoizeMode, NewCache},
     molecule::Molecule,
     utils::{connected_components_under_edges, edge_neighbors},
     reductions::CompatGraph,
@@ -100,22 +100,28 @@ pub fn depth(mol: &Molecule) -> u32 {
 ///
 /// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
 /// sorted to guarantee deterministic iteration.
-pub fn matches(mol: &Molecule) -> Vec<(BitSet, BitSet)> {
+pub fn matches(mol: &Molecule) -> (Vec<(BitSet, BitSet)>, HashMap<usize, (BitSet, Vec<usize>)>) {
     let num_edges = mol.graph().edge_count();
-    let mut subgraphs: HashSet<BitSet> = HashSet::new();
-    let mut size_one_subgraphs: HashSet<BitSet> = HashSet::new();
+    let mut subgraphs: HashMap<BitSet, usize> = HashMap::new();
+    let mut size_one_subgraphs: HashMap<BitSet, usize> = HashMap::new();
     let mut matches: Vec<(BitSet, BitSet)> = Vec::new();
-    let mut buckets: HashMap<Labeling, Vec<BitSet>> = HashMap::new();
+    let mut buckets: HashMap<Labeling, Vec<(BitSet, usize)>> = HashMap::new();
+
+    let mut dag: HashMap<usize, (BitSet, Vec<usize>)> = HashMap::new();
+    let mut next_id = 0;
 
     // Generate subgraphs with one edge
     for i in 0..num_edges {
         let mut new_bitset = BitSet::with_capacity(num_edges);
         new_bitset.insert(i);
-        size_one_subgraphs.insert(new_bitset);
+        size_one_subgraphs.insert(new_bitset, i);
+
+        dag.insert(next_id, (BitSet::new(), Vec::new()));
+        next_id += 1;
     }
 
     // Generate subgraphs with two edges
-    for subgraph in size_one_subgraphs.iter() {
+    for (subgraph, idx) in size_one_subgraphs.iter() {
         let mut neighborhood = BitSet::with_capacity(num_edges);
         for e in subgraph {
             neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
@@ -125,7 +131,8 @@ pub fn matches(mol: &Molecule) -> Vec<(BitSet, BitSet)> {
         for e in neighborhood.iter() {
             let mut new_subgraph = subgraph.clone();
             new_subgraph.insert(e);
-            subgraphs.insert(new_subgraph);
+            subgraphs.entry(new_subgraph)
+                .or_insert(*idx);
         }
     }
 
@@ -133,47 +140,70 @@ pub fn matches(mol: &Molecule) -> Vec<(BitSet, BitSet)> {
     while subgraphs.len() > 0 {
         buckets.clear();
         // Generate buckets
-        for g in subgraphs.iter() {
-            let canon = canonize(mol, g, CanonizeMode::TreeNauty);
+        for (g, parent) in subgraphs.iter() {
+            let canon = canonize(mol, g, CanonizeMode::TreeNauty);            
             buckets.entry(canon)
-                .and_modify(|v| v.push(g.clone()))
-                .or_insert(vec![g.clone()]);
+                .and_modify(|v| v.push((g.clone(), *parent)))
+                .or_insert(vec![(g.clone(), *parent)]);
         }
 
         subgraphs.clear();
         for (_, bucket) in buckets.iter() {
             // Generate matches
             let mut has_match = BitSet::with_capacity(bucket.len());
-            for (i, first) in bucket.iter().enumerate() {
-                for (j, second) in bucket[i+1..].iter().enumerate() {
+            for (i, (first, first_parent)) in bucket.iter().enumerate() {
+                for (j, (second, second_parent)) in bucket[i+1..].iter().enumerate() {
                     if first.intersection(second).count() == 0 {
                         if first > second {
                         matches.push((first.clone(), second.clone()));
                         } else {
                             matches.push((second.clone(), first.clone()));
                         }
+
+                        if !has_match.contains(i) {
+                            dag.insert(next_id, (first.clone(), Vec::new()));
+                            dag.entry(*first_parent)
+                                .and_modify(|(_, children)| children.push(next_id));
+
+                            let mut neighborhood = BitSet::with_capacity(num_edges);
+                            for e in first {
+                                neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
+                            }
+                            neighborhood.difference_with(first);
+
+                            for e in neighborhood.iter() {
+                                let mut new_subgraph = first.clone();
+                                new_subgraph.insert(e);
+                                subgraphs.insert(new_subgraph, next_id);
+                            }
+
+                            next_id += 1;
+                        }
+                        if !has_match.contains(i + 1 + j) {
+                            dag.insert(next_id, (second.clone(), Vec::new()));
+                            dag.entry(*second_parent)
+                                .and_modify(|(_, children)| children.push(next_id));
+
+                            let mut neighborhood = BitSet::with_capacity(num_edges);
+                            for e in second {
+                                neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
+                            }
+                            neighborhood.difference_with(second);
+
+                            for e in neighborhood.iter() {
+                                let mut new_subgraph = second.clone();
+                                new_subgraph.insert(e);
+                                subgraphs.insert(new_subgraph, next_id);
+                            }
+
+                            next_id += 1;
+                        }
+
                         has_match.insert(i);
                         has_match.insert(i + 1 + j);
                     }
                 }
             }
-
-            // Generate new subgraphs
-            // Only extend if subgraph has a match
-            for i in has_match.iter() {
-                let subgraph = &bucket[i];
-                let mut neighborhood = BitSet::with_capacity(num_edges);
-                for e in subgraph {
-                    neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
-                }
-                neighborhood.difference_with(subgraph);
-
-                for e in neighborhood.iter() {
-                    let mut new_subgraph = subgraph.clone();
-                    new_subgraph.insert(e);
-                    subgraphs.insert(new_subgraph);
-                }
-            }
         }
     }
 
@@ -191,7 +221,7 @@ pub fn matches(mol: &Molecule) -> Vec<(BitSet, BitSet)> {
         ord[i]
     });
 
-    matches
+    (matches, dag)
 }
 
 /// Determine the fragments produced from the given assembly state by removing
@@ -291,7 +321,7 @@ pub fn recurse_index_search(
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
-    println!("{:?}", removal_order);
+    //println!("{:?}", removal_order);
     //println!("{:?}", state);
 
     let largest_remove = {
@@ -304,14 +334,14 @@ pub fn recurse_index_search(
     };
 
     // Prune edges of state
-    let mut subgraphs: HashMap<BitSet, (bool, bool), RandomState> = HashMap::with_capacity_and_hasher(256, RandomState::new());
+    /*let mut subgraphs: HashMap<&BitSet, (bool, bool), RandomState> = HashMap::with_capacity_and_hasher(256, RandomState::new());
     let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
     subgraph = subgraph.iter().filter(|v| {
         let h1 = &matches[*v].0;
         let h2 = &matches[*v].1;
 
-        let x1 = subgraphs.get(h1).to_owned().cloned();
-        let x2 = subgraphs.get(h2).to_owned().cloned();
+        let x1 = subgraphs.get(h1).cloned();
+        let x2 = subgraphs.get(h2).cloned();
 
         let mut have_h1 = false;
         let mut have_h2 = false;
@@ -325,12 +355,12 @@ pub fn recurse_index_search(
             have_h1 = i1;
             h1_match = i2;
         }
-        else if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)) {
+        else if let Some(_) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)) {
             have_h1 = true;
-            subgraphs.insert(h1.clone(), (true, false));
+            subgraphs.insert(h1, (true, false));
         }
         else {
-            subgraphs.insert(h1.clone(), (false, false));
+            subgraphs.insert(h1, (false, false));
         }
 
         if let Some ((i1, i2)) = x2 {
@@ -340,12 +370,12 @@ pub fn recurse_index_search(
             have_h2 = i1;
             h2_match = i2;
         }
-        else if let Some((idx, _)) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)) {
+        else if let Some(_) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)) {
             have_h2 = true;
-            subgraphs.insert(h2.clone(), (true, false));
+            subgraphs.insert(h2, (true, false));
         }
         else {
-            subgraphs.insert(h2.clone(), (false, false));
+            subgraphs.insert(h2, (false, false));
         }
 
         if have_h1 && have_h2 {
@@ -362,13 +392,13 @@ pub fn recurse_index_search(
         }
 
         have_h1 && have_h2
-    }).collect();
+    }).collect();*/
 
-    /*let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
+    let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
     subgraph = subgraph.iter().filter(|v| {
         let m = &matches[*v];
         is_usable(state, &m.0, &m.1, &mut masks)
-    }).collect();*/
+    }).collect();
 
     let mut state = Vec::new();
     for m in masks[0].iter() {
@@ -599,7 +629,7 @@ pub fn index_search(
     // Catch not-yet-implemented modes.
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
-    let matches = matches(mol);
+    let (matches, _dag) = matches(mol);
     /*for (i, m) in matches.iter().enumerate() {
         println!("{i}: {:?}", m);
     }*/

From aaca9c8398f7d11ea0e0f4b58d7cf5d8daf52cbd Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 2 Sep 2025 16:23:11 -0700
Subject: [PATCH 27/51] New matches generation with DAG

---
 Cargo.lock        |  14 +-
 Cargo.toml        |   1 +
 src/assembly.rs   | 337 +++++++++++++++++++++++++++++++++-------------
 src/reductions.rs |   7 +-
 4 files changed, 265 insertions(+), 94 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index bc179a87..660f0f07 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -93,6 +93,7 @@ dependencies = [
  "dashmap",
  "fxhash",
  "graph-canon",
+ "itertools 0.14.0",
  "petgraph",
  "pyo3",
  "rayon",
@@ -322,7 +323,7 @@ dependencies = [
  "clap 2.34.0",
  "criterion-plot",
  "csv",
- "itertools",
+ "itertools 0.10.5",
  "lazy_static",
  "num-traits",
  "oorandom",
@@ -344,7 +345,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876"
 dependencies = [
  "cast",
- "itertools",
+ "itertools 0.10.5",
 ]
 
 [[package]]
@@ -586,6 +587,15 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.15"
diff --git a/Cargo.toml b/Cargo.toml
index bbd2c373..84277549 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,6 +20,7 @@ rayon = "1.10.0"
 dashmap = "6.1.0"
 ahash = "0.8.12"
 fxhash = "0.2.1"
+itertools = "0.14.0"
 
 [dev-dependencies]
 criterion = "0.3"
diff --git a/src/assembly.rs b/src/assembly.rs
index f3d04666..ef0ce223 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -29,7 +29,7 @@ use bit_set::BitSet;
 use clap::ValueEnum;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use petgraph::graph::EdgeIndex;
-use ahash::RandomState;
+use itertools::Itertools;
 
 use crate::{
     bounds::{bound_exceeded, Bound},
@@ -96,19 +96,212 @@ pub fn depth(mol: &Molecule) -> u32 {
     ix
 }
 
+#[derive(Debug)]
+pub struct DagNode {
+    fragment: BitSet,
+    canon_id: usize,
+    // added_edge: usize,
+    children: Vec<usize>,
+}
+
+impl DagNode {
+    pub fn new(fragment: BitSet, canon_id: usize) -> Self {
+        Self {
+            fragment,
+            canon_id,
+            children: Vec::new(),
+        }
+    }
+}
+
+
+pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (HashMap<(usize, usize), usize>, Vec<DagNode>) {
+    let num_edges = mol.graph().edge_count();
+
+    let mut subgraphs: Vec<usize> = Vec::new();
+    let mut constructed_frags: HashSet<BitSet> = HashSet::new();
+    let mut frag_to_id: HashMap<BitSet, usize> = HashMap::new();
+
+    let mut dag: Vec<DagNode> = Vec::with_capacity(num_edges);
+    let mut matches: Vec<(usize, usize)> = Vec::new();
+
+    let mut next_canon_id = 0;
+
+    // Generate subgraphs with one edge
+    for i in 0..num_edges {
+        let mut new_bitset = BitSet::with_capacity(num_edges);
+        new_bitset.insert(i);
+        
+        let node = DagNode::new(new_bitset, 0);
+
+        dag.push(node);
+        subgraphs.push(i);
+    }
+
+    next_canon_id += 1;
+
+    while subgraphs.len() > 0 {
+        // Initialize new buckets and subgraph list
+        let mut buckets: HashMap<Labeling, Vec<(BitSet, usize)>> = HashMap::new();
+        let mut child_subgraphs: Vec<usize> = Vec::new();
+
+        // Extend and bucket new subgraphs
+        for parent_idx in subgraphs.iter() {
+            // Get fragment to be extended
+            let fragment = &dag[*parent_idx].fragment;
+
+            // Construct edge neighborhood of fragment
+            let mut neighborhood = BitSet::with_capacity(num_edges);
+            for e in fragment {
+                neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
+            }
+            neighborhood.difference_with(fragment);
+
+            // Extend fragment with edges from neighborhood
+            for e in neighborhood.iter() {
+                // Create extended fragment
+                let mut new_fragment = fragment.clone();
+                new_fragment.insert(e);
+
+                // Check if fragment has already been created
+                // If yes, continue
+                if constructed_frags.contains(&new_fragment) {
+                    continue;
+                }
+
+                // Bucket subgraph
+                // Gives ownership of new_fragment to buckets
+                let canon = canonize(mol, &new_fragment, canonize_mode);
+                buckets.entry(canon)
+                    .and_modify(|bucket| bucket.push((new_fragment.clone(), *parent_idx)))
+                    .or_insert(vec![(new_fragment.clone(), *parent_idx)]);
+            
+                // Add to constructed_frags
+                constructed_frags.insert(new_fragment);
+            }
+        }
+
+        // Go through buckets to create matches
+        for fragments in buckets.values() {
+            // Variables to track if a match has been found
+            let mut frag_has_match = BitSet::with_capacity(fragments.len());
+
+            // Loop through pairs of bitsets to find matches
+            for pair in fragments.iter().enumerate().combinations(2) {
+                // Extract pair of fragments
+                let frag1 = &pair[0].1.0;
+                let frag2 = &pair[1].1.0;
+
+                // Check for disjointness
+                // If the fragments are disjoint, they form a match
+                if frag1.is_disjoint(&frag2) {
+                    // Extract indices of fragments
+                    let idx1 = pair[0].0;
+                    let idx2 = pair[1].0;
+
+                    // If this is the first match for a fragment, create a dag node
+                    if !frag_has_match.contains(idx1) {
+                        // Create new dag node
+                        let new_dag_node = DagNode::new(frag1.clone(), next_canon_id);
+                        let child_idx = dag.len();
+
+                        // Add child to dag list
+                        dag.push(new_dag_node);
+
+                        // Add index to parent's child list
+                        let parent_idx = pair[0].1.1;
+                        let mut parent = &mut dag[parent_idx];
+                        parent.children.push(child_idx);
+
+                        // Add dag node to next subgraphs list
+                        child_subgraphs.push(child_idx);
+
+                        // Add to frag_to_id map
+                        frag_to_id.insert(frag1.clone(), child_idx);
+                    }
+                    if !frag_has_match.contains(idx2) {
+                        // Create new dag node
+                        let new_dag_node = DagNode::new(frag2.clone(), next_canon_id);
+                        let child_idx = dag.len();
+
+                        // Add child to dag list
+                        dag.push(new_dag_node);
+
+                        // Add index to parent's child list
+                        let parent_idx = pair[1].1.1;
+                        let mut parent = &mut dag[parent_idx];
+                        parent.children.push(child_idx);
+
+                        // Add dag node to next subgraphs list
+                        child_subgraphs.push(child_idx);
+
+                        // Add to frag_to_id map
+                        frag_to_id.insert(frag2.clone(), child_idx);
+                    }
+
+                    // Get fragment ids and add to matches
+                    let frag1_id = frag_to_id.get(&frag1).unwrap();
+                    let frag2_id = frag_to_id.get(&frag2).unwrap();
+
+                    if frag1_id > frag2_id {
+                        matches.push((*frag1_id, *frag2_id));
+                    }
+                    else {
+                        matches.push((*frag2_id, *frag1_id));
+                    }
+
+                    // Mark that these fragments have matches
+                    frag_has_match.insert(idx1);
+                    frag_has_match.insert(idx2);
+                }
+            }
+
+            // If there was a match, increment canon_id counter
+            if !frag_has_match.is_empty() {
+                next_canon_id += 1;
+            }
+        }
+
+        // Update to the new subgraphs
+        subgraphs = child_subgraphs;
+    }
+
+    // Sort matches
+    // Larger frag_ids get a smaller match_id
+    // Thus fragments with many edges have small ids
+    matches.sort();
+    matches.reverse();
+
+    // give matches ids
+    let mut next_match_id = 0;
+    let mut match_to_id: HashMap<(usize, usize), usize> = HashMap::new();
+    for m in matches {
+        match_to_id.insert(m, next_match_id);
+        next_match_id += 1;
+    }
+
+    (match_to_id, dag)
+}
+
+
 /// Helper function for [`index_search`]; only public for benchmarking.
 ///
 /// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
 /// sorted to guarantee deterministic iteration.
-pub fn matches(mol: &Molecule) -> (Vec<(BitSet, BitSet)>, HashMap<usize, (BitSet, Vec<usize>)>) {
+/*pub fn _matches(mol: &Molecule) -> (Vec<(usize, usize)>, DagNode) {
     let num_edges = mol.graph().edge_count();
+
     let mut subgraphs: HashMap<BitSet, usize> = HashMap::new();
     let mut size_one_subgraphs: HashMap<BitSet, usize> = HashMap::new();
-    let mut matches: Vec<(BitSet, BitSet)> = Vec::new();
-    let mut buckets: HashMap<Labeling, Vec<(BitSet, usize)>> = HashMap::new();
+    let mut buckets: HashMap<Labeling, Vec<DagNode>> = HashMap::new();
+
+    let mut dag: Vec<DagNode> = Vec::with_capacity(num_edges);
+    let mut canon_ids: HashMap<Labeling, usize> = HashMap::new();
+    let mut matches: HashMap<(usize, usize), usize> = Hashmap::new();
 
-    let mut dag: HashMap<usize, (BitSet, Vec<usize>)> = HashMap::new();
-    let mut next_id = 0;
+    let mut next_frag_id = 1;
+    let mut next_match_id = 0;
+    let mut next_canon_id = 1;
 
     // Generate subgraphs with one edge
     for i in 0..num_edges {
@@ -116,8 +309,7 @@ pub fn matches(mol: &Molecule) -> (Vec<(BitSet, BitSet)>, HashMap<usize, (BitSet
         new_bitset.insert(i);
         size_one_subgraphs.insert(new_bitset, i);
 
-        dag.insert(next_id, (BitSet::new(), Vec::new()));
-        next_id += 1;
+        dag.push(DagNode::new(new_bitset, 0, 0));
     }
 
     // Generate subgraphs with two edges
@@ -148,19 +340,16 @@ pub fn matches(mol: &Molecule) -> (Vec<(BitSet, BitSet)>, HashMap<usize, (BitSet
         }
 
         subgraphs.clear();
+        let mut frag_to_id: HashMap<BitSet, usize> = HashMap::new();
         for (_, bucket) in buckets.iter() {
             // Generate matches
             let mut has_match = BitSet::with_capacity(bucket.len());
             for (i, (first, first_parent)) in bucket.iter().enumerate() {
                 for (j, (second, second_parent)) in bucket[i+1..].iter().enumerate() {
                     if first.intersection(second).count() == 0 {
-                        if first > second {
-                        matches.push((first.clone(), second.clone()));
-                        } else {
-                            matches.push((second.clone(), first.clone()));
-                        }
-
                         if !has_match.contains(i) {
+                            frag_to_id.insert(first.clone(), next_id);
+
                             dag.insert(next_id, (first.clone(), Vec::new()));
                             dag.entry(*first_parent)
                                 .and_modify(|(_, children)| children.push(next_id));
@@ -180,6 +369,8 @@ pub fn matches(mol: &Molecule) -> (Vec<(BitSet, BitSet)>, HashMap<usize, (BitSet
                             next_id += 1;
                         }
                         if !has_match.contains(i + 1 + j) {
+                            frag_to_id.insert(second.clone(), next_id);
+
                             dag.insert(next_id, (second.clone(), Vec::new()));
                             dag.entry(*second_parent)
                                 .and_modify(|(_, children)| children.push(next_id));
@@ -199,6 +390,14 @@ pub fn matches(mol: &Molecule) -> (Vec<(BitSet, BitSet)>, HashMap<usize, (BitSet
                             next_id += 1;
                         }
 
+                        let id1 = frag_to_id.get(first).unwrap();
+                        let id2 = frag_to_id.get(second).unwrap();
+                        if first > second {
+                            matches.push((*id1, *id2));
+                        } else {
+                            matches.push((*id1, *id2));
+                        }
+
                         has_match.insert(i);
                         has_match.insert(i + 1 + j);
                     }
@@ -222,7 +421,7 @@ pub fn matches(mol: &Molecule) -> (Vec<(BitSet, BitSet)>, HashMap<usize, (BitSet
     });
 
     (matches, dag)
-}
+}*/
 
 /// Determine the fragments produced from the given assembly state by removing
 /// the given pair of non-overlapping, isomorphic subgraphs and then adding one
@@ -307,9 +506,10 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
 ///   upper bound returned is unchanged.)
 /// - `usize`: The number of assembly states searched.
 #[allow(clippy::too_many_arguments)]
-pub fn recurse_index_search(
+/*pub fn recurse_index_search(
     mol: &Molecule,
-    matches: &Vec<(BitSet, BitSet)>,
+    matches: &Vec<(usize, usize)>,
+    dag: &HashMap<usize, (BitSet, Vec<usize>)>,
     graph: &Option<CompatGraph>,
     mut subgraph: BitSet,
     removal_order: Vec<usize>,
@@ -326,79 +526,18 @@ pub fn recurse_index_search(
 
     let largest_remove = {
         if let Some(v) = subgraph.iter().next() {
-            matches[v].0.len()
+            dag.get(matches[v].0).unwrap().0.len();
         }
         else {
             return (state_index, 1);
         }
     };
 
-    // Prune edges of state
-    /*let mut subgraphs: HashMap<&BitSet, (bool, bool), RandomState> = HashMap::with_capacity_and_hasher(256, RandomState::new());
-    let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
-    subgraph = subgraph.iter().filter(|v| {
-        let h1 = &matches[*v].0;
-        let h2 = &matches[*v].1;
-
-        let x1 = subgraphs.get(h1).cloned();
-        let x2 = subgraphs.get(h2).cloned();
-
-        let mut have_h1 = false;
-        let mut have_h2 = false;
-        let mut h1_match = false;
-        let mut h2_match = false;
-
-        if let Some ((i1, i2)) = x1 {
-            if i1 == false {
-                return false;
-            }
-            have_h1 = i1;
-            h1_match = i2;
-        }
-        else if let Some(_) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)) {
-            have_h1 = true;
-            subgraphs.insert(h1, (true, false));
-        }
-        else {
-            subgraphs.insert(h1, (false, false));
-        }
-
-        if let Some ((i1, i2)) = x2 {
-            if i1 == false {
-                return false;
-            }
-            have_h2 = i1;
-            h2_match = i2;
-        }
-        else if let Some(_) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)) {
-            have_h2 = true;
-            subgraphs.insert(h2, (true, false));
-        }
-        else {
-            subgraphs.insert(h2, (false, false));
-        }
+    let mut masks: Vec<Vec<BitSet>> = vec![
+        vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; 
+        largest_remove - 1
+    ];
 
-        if have_h1 && have_h2 {
-            if h1_match == false {
-                let (idx, _) = state.iter().enumerate().find(|(_, frag)| h1.is_subset(frag)).unwrap();
-                masks[h1.len() - 2][idx].union_with(h1);
-                subgraphs.get_mut(h1).unwrap().1 = true;
-            }
-            if h2_match == false {
-                let (idx, _) = state.iter().enumerate().find(|(_, frag)| h2.is_subset(frag)).unwrap();
-                masks[h2.len() - 2][idx].union_with(h2);
-                subgraphs.get_mut(h2).unwrap().1 = true;
-            }
-        }
-
-        have_h1 && have_h2
-    }).collect();*/
-
-    let mut masks: Vec<Vec<BitSet>> = vec![vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; largest_remove - 1];
-    subgraph = subgraph.iter().filter(|v| {
-        let m = &matches[*v];
-        is_usable(state, &m.0, &m.1, &mut masks)
-    }).collect();
 
     let mut state = Vec::new();
     for m in masks[0].iter() {
@@ -502,6 +641,7 @@ pub fn recurse_index_search(
             let (child_index, child_states_searched) = recurse_index_search(
                 mol,
                 matches,
+                dag,
                 graph,
                 sub_clone,
                 {
@@ -551,7 +691,7 @@ pub fn recurse_index_search(
         best_child_index.load(Relaxed),
         states_searched.load(Relaxed),
     )
-}
+}*/
 
 /// Compute a molecule's assembly index and related information using a
 /// top-down recursive algorithm, parameterized by the specified options.
@@ -629,18 +769,31 @@ pub fn index_search(
     // Catch not-yet-implemented modes.
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
-    let (matches, _dag) = matches(mol);
-    /*for (i, m) in matches.iter().enumerate() {
-        println!("{i}: {:?}", m);
-    }*/
+    let (matches, dag) = matches(mol, canonize_mode);
+    /*let mut match_list: Vec<((usize, usize), usize)> = Vec::new();
+    for m in matches.iter() {
+        match_list.push((*m.0, *m.1));
+    }
+
+    for (idx, d) in dag.iter().enumerate() {
+        println!("{}: {:?}", idx, d);
+    }
+
+    match_list.sort_by_key(|m| m.1);
+    for m in match_list {
+        println!("{:?}", m);
+    }
+    println!("{}", matches.len());*/
+
+    std::process::exit(1);
 
     // Create memoization cache.
     //let mut cache = Cache::new(memoize_mode, canonize_mode);
-    let mut cache = NewCache::new(memoize_mode);
+    /*let mut cache = NewCache::new(memoize_mode);
 
     let graph = {
         if clique {
-            Some(CompatGraph::new(&matches))
+            Some(CompatGraph::new(&matches, &dag))
         }
         else {
             None
@@ -663,6 +816,7 @@ pub fn index_search(
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
+        &dag,
         &graph,
         subgraph,
         Vec::new(),
@@ -677,7 +831,8 @@ pub fn index_search(
 
     println!("Bounded: {}", cache.count());
 
-    (index as u32, matches.len() as u32, states_searched)
+    (index as u32, matches.len() as u32, states_searched)*/
+    (0, 0, 0)
 }
 
 /// Compute a molecule's assembly index using an efficient default strategy.
diff --git a/src/reductions.rs b/src/reductions.rs
index 5ac282c9..f7e699c6 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -11,6 +11,7 @@
 
 use bit_set::BitSet;
 use std::time::Instant;
+use std::collections::HashMap;
 
 /// Graph representation of the compatibility of duplicatable subgraph pairs.
 pub struct CompatGraph {
@@ -21,7 +22,7 @@ pub struct CompatGraph {
 
 impl CompatGraph {
     /// Constructs a compatibility graph given a set of matches.
-    pub fn new(init_matches: &Vec<(BitSet, BitSet)>) -> Self {
+    pub fn new(init_matches: &Vec<(usize, usize)>, dag: &HashMap<usize, (BitSet, Vec<usize>)>) -> Self {
         let start = Instant::now();
         let size = init_matches.len();
 
@@ -35,6 +36,10 @@ impl CompatGraph {
         for (idx1, (h1, h2)) in init_matches.iter().enumerate() {
             for (idx2, (h1p, h2p)) in init_matches[idx1 + 1..].iter().enumerate() {
                 let idx2 = idx2 + idx1 + 1;
+                let h1 = &dag.get(h1).unwrap().0;
+                let h1p = &dag.get(h1p).unwrap().0;
+                let h2 = &dag.get(h2).unwrap().0;
+                let h2p = &dag.get(h2p).unwrap().0;
 
                 let forward_compatible = {
                     h2.is_disjoint(h1p) && 

From e700ed68cc8ce363d52ad2f85878364ac1ba4e5f Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Wed, 3 Sep 2025 13:52:38 -0700
Subject: [PATCH 28/51] Enumerate matches using dag

---
 src/assembly.rs | 246 ++++++++++++++++++++++++++----------------------
 1 file changed, 132 insertions(+), 114 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index ef0ce223..3c52ef12 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -19,7 +19,7 @@
 //! ```
 
 use std::{
-    collections::{HashMap, HashSet}, sync::{
+    collections::{BTreeMap, HashMap, HashSet}, sync::{
         atomic::{AtomicUsize, Ordering::Relaxed},
         Arc,
     }
@@ -27,9 +27,11 @@ use std::{
 
 use bit_set::BitSet;
 use clap::ValueEnum;
+use graph_canon::canon;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use petgraph::graph::EdgeIndex;
 use itertools::Itertools;
+use std::cmp::{max, min};
 
 use crate::{
     bounds::{bound_exceeded, Bound},
@@ -142,7 +144,7 @@ pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (HashMap<(usize,
 
     while subgraphs.len() > 0 {
         // Initialize new buckets and subgraph list
-        let mut buckets: HashMap<Labeling, Vec<(BitSet, usize)>> = HashMap::new();
+        let mut buckets: BTreeMap<Labeling, Vec<(BitSet, usize)>> = BTreeMap::new();
         let mut child_subgraphs: Vec<usize> = Vec::new();
 
         // Extend and bucket new subgraphs
@@ -506,11 +508,10 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
 ///   upper bound returned is unchanged.)
 /// - `usize`: The number of assembly states searched.
 #[allow(clippy::too_many_arguments)]
-/*pub fn recurse_index_search(
+pub fn recurse_index_search(
     mol: &Molecule,
-    matches: &Vec<(usize, usize)>,
-    dag: &HashMap<usize, (BitSet, Vec<usize>)>,
-    graph: &Option<CompatGraph>,
+    matches: &HashMap<(usize, usize), usize>,
+    dag: &Vec<DagNode>,
     mut subgraph: BitSet,
     removal_order: Vec<usize>,
     state: &[BitSet],
@@ -523,85 +524,144 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
 ) -> (usize, usize) {
     //println!("{:?}", removal_order);
     //println!("{:?}", state);
+    
+    let num_edges = mol.graph().edge_count();
+    let mut enum_matches: Vec<(usize, usize)> = Vec::new();
+    
+    // MATCHES METHOD 1
+    // EXTEND ALWAYS AND DO MATCHING AT END
+    let mut subgraphs: Vec<usize> = Vec::new();
+    let mut buckets: HashMap<usize, Vec<usize>> = HashMap::new();
 
-    let largest_remove = {
-        if let Some(v) = subgraph.iter().next() {
-            dag.get(matches[v].0).unwrap().0.len();
+    // create subgraphs of size 1
+    for fragment in state.iter() {
+        for edge_idx in fragment.iter() {
+            subgraphs.push(edge_idx);
         }
-        else {
-            return (state_index, 1);
+    }
+
+    while subgraphs.len() > 0 {
+        // Subgraphs to extend in next loop
+        let mut new_subgraphs = Vec::new();
+
+        // Extend each subgraph and bucket
+        for frag_id in subgraphs {
+            // Get ids of extensions of this subgraph
+            let children_ids = &dag[frag_id].children;
+
+            for child_id in children_ids {
+                // Check if this extension is valid
+                // TODO: save fragment id
+                // TODO: save edge id in DAG
+                let child_frag = &dag[*child_id].fragment;
+                let valid = state.iter().any(|frag| child_frag.is_subset(frag));
+
+                if valid {
+                    let canon_id = &dag[*child_id].canon_id;
+                    
+                    // Add fragment to bucket
+                    buckets.entry(*canon_id)
+                        .and_modify(|bucket| bucket.push(*child_id))
+                        .or_insert(vec![*child_id]);
+                    
+                    // Add fragment to new subgraphs
+                    new_subgraphs.push(*child_id);
+                }
+            }
         }
-    };
 
-    let mut masks: Vec<Vec<BitSet>> = vec![
-        vec![BitSet::with_capacity(mol.graph().edge_count()); state.len()]; 
+        // Update to new subgraphs
+        subgraphs = new_subgraphs;
+    }
+
+    // Generate matches from buckets of ismorphic fragments
+    for fragments in buckets.values() {
+        // Loop over pairs of fragments
+        for pair in fragments.iter().combinations(2) {
+            let frag1 = *max(pair[0], pair[1]);
+            let frag2 = *min(pair[0], pair[1]);
+
+            // Check for valid match
+            // If valid, add to matches to iterate over
+            if let Some(x) = matches.get(&(frag1, frag2)) {
+                enum_matches.push((frag1, frag2));
+            }
+        }
+    }
+
+    // If there are no matches, return
+    if enum_matches.len() == 0 {
+        return (state_index, 1);
+    }
+
+    enum_matches.sort();
+    enum_matches.reverse();
+
+    // Modify mask
+    // TODO: try storing fragment so search can be reduced
+    // TODO: try with a set for frag_ids to only do intersection once per frag
+    let largest_remove = dag[enum_matches[0].0].fragment.len();
+    let mut masks = vec![
+        vec![BitSet::with_capacity(num_edges); state.len()];
         largest_remove - 1
     ];
 
+    for m in enum_matches.iter() {
+        // Extract fragments
+        let frag1 = &dag[m.0].fragment;
+        let frag2 = &dag[m.1].fragment;
+        let len = frag1.len() - 2;
+
+        // Find indices of fragments that contain frag1 and frag2
+        let (frag1_idx, _) = state.iter()
+            .enumerate()   
+            .find(|(_, state_frag)| frag1.is_subset(state_frag))
+            .unwrap();
+        let (frag2_idx, _) = state.iter()
+            .enumerate()
+            .find(|(_, state_frag)| frag2.is_subset(state_frag))
+            .unwrap();
+
+        // Union with masks to indicate that these edges are used
+        masks[len][frag1_idx].union_with(frag1);
+        masks[len][frag2_idx].union_with(frag2);
+    }
 
+    // Let new state be only the edges which can be matched
     let mut state = Vec::new();
-    for m in masks[0].iter() {
-        if m.len() >= 2 {
-            state.extend(connected_components_under_edges(mol.graph(), m));
-        }
-    }
 
-    // Int bound
-    let mut max = Vec::new();
-    for (i, list) in masks.iter().enumerate() {
-        let mut val = 0;
-        let i = i + 2;
-        for (j, frag) in list.iter().enumerate() {
-            let mf = masks[0][j].len();
-            let mi = frag.len();
-            let x = mf + (mi % i) - mi;
-            val += mf - (mi / i) - x / (i-1) - (x % (i-1) != 0) as usize;
-        }
-        val -= (i as f32).log2().ceil() as usize;
-        if max.len() == 0 {
-            max.push(val);
-        }
-        else {
-            max.push(val.max(max[max.len() - 1]));
-        }
+    for frag in masks[0].iter() {
+        // Split into connected components
+        let connected_frags = connected_components_under_edges(mol.graph(), &frag);
+        
+        // Add connected components to new state
+        state.extend(connected_frags);
     }
+                
+
+    // MATCHES METHOD 2
+    // ONLY EXTEND IF A SUBGRAPH HAS A MATCH
 
-    let best = best_index.load(Relaxed);
-    let mut idx = 2;
-    while (idx < max.len() + 2) && best + max[idx - 2] <= state_index {
-        idx += 1;
-    }
 
     // Memoization
     if cache.memoize_state(mol, &state, state_index, &removal_order) {
         return (state_index, 1);
     }
 
-    // Apply kernels
-    let mut must_include = matches.len();
-    if kernel_mode != KernelMode::None {
-        let g = graph.as_ref().unwrap();
-
-        // Deletion kernel
-        // Returns a subgraph without nodes that never occur in an optimal solution.
-        subgraph = deletion_kernel(matches, g, subgraph);
-        
-        // Inclusion kernel.
-        // must_include is the first match in the matches list that will be included in an optimal solution.
-        must_include = inclusion_kernel(matches, g, &subgraph);
-    }
-
     // Keep track of the best assembly index found in any of this assembly
     // state's children and the number of states searched, including this one.
     let best_child_index = AtomicUsize::from(state_index);
     let states_searched = AtomicUsize::from(1);
-
+    
     // Define a closure that handles recursing to a new assembly state based on
     // the given (enumerated) pair of non-overlapping isomorphic subgraphs.
-    let recurse_on_match = |v: usize| {
-        let (h1, h2) = &matches[v];
+    let  recurse_on_match = |v: (usize, usize)| {
+        //let (h1, h2) = &matches[v];
+        let h1 = &dag[v.0].fragment;
+        let h2 = &dag[v.1].fragment;
+        
+        // TODO: try keeping track of fragment to elimate some some search
         if let Some(fragments) = fragments(mol, &state, h1, h2) {
-
             // If using depth-one parallelism, all descendant states should be
             // computed serially.
             let new_parallel = if parallel_mode == ParallelMode::DepthOne {
@@ -610,43 +670,15 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
                 parallel_mode
             };
 
-            // If kernelizing once, do not kernelize again. If kernelizing at depth-one,
-            // kernelize once more at the beginning of each descendent.
-            let new_kernel = if kernel_mode == KernelMode::Once {
-                KernelMode::None
-            } else if kernel_mode == KernelMode::DepthOne {
-                KernelMode::Once
-            } else {
-                kernel_mode
-            };
-
-            // Update subgraph
-            // If using CompatGraph, only consider the neighborhood of the node removed.
-            // Otherwise, only consider the matches that occur after the removed match in the list.
-            let mut sub_clone;
-            if let Some(g) = graph {
-                sub_clone = subgraph.clone();
-                sub_clone.intersect_with(&g.forward_neighbors(v, &subgraph));
-            }
-            else {
-                sub_clone = BitSet::with_capacity(matches.len());
-                for j in subgraph.iter() {
-                    if j > v {
-                        sub_clone.insert(j);
-                    }
-                }
-            }
-
             // Recurse using the remaining matches and updated fragments.
             let (child_index, child_states_searched) = recurse_index_search(
                 mol,
                 matches,
                 dag,
-                graph,
-                sub_clone,
+                BitSet::new(),
                 {
                     let mut clone = removal_order.clone();
-                    clone.push(v);
+                    clone.push(*matches.get(&v).unwrap());
                     clone
                 },
                 &fragments,
@@ -655,7 +687,7 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
                 bounds,
                 &mut cache.clone(),
                 new_parallel,
-                new_kernel,
+                kernel_mode,
             );
 
             // Update the best assembly indices (across children states and
@@ -667,31 +699,21 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
     };
 
     // Use the iterator type corresponding to the specified parallelism mode.
-    // Only search on nodes with v <= must_include since any searches after that would
-    // not use must_include.
     if parallel_mode == ParallelMode::None {
-        subgraph
+        enum_matches
             .iter()
-            .filter(|v| *v <= must_include && matches[*v].0.len() >= idx)
-            .for_each(|v| recurse_on_match(v));
+            .for_each(|v| recurse_on_match(*v));
     } else {
-        let _ = subgraph
-            .iter()
-            .filter(|v| *v <= must_include /*&& matches[*v].0.len() >= idx*/)
-            .collect::<Vec<usize>>()
+        enum_matches
             .par_iter()
             .for_each(|v| recurse_on_match(*v));
     }
 
-    /*if removal_order.len() == 1 {
-        println!("{:?}, {}", removal_order, states_searched.load(Relaxed));
-    }*/
-
     (
         best_child_index.load(Relaxed),
         states_searched.load(Relaxed),
     )
-}*/
+}
 
 /// Compute a molecule's assembly index and related information using a
 /// top-down recursive algorithm, parameterized by the specified options.
@@ -770,6 +792,7 @@ pub fn index_search(
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
     let (matches, dag) = matches(mol, canonize_mode);
+
     /*let mut match_list: Vec<((usize, usize), usize)> = Vec::new();
     for m in matches.iter() {
         match_list.push((*m.0, *m.1));
@@ -785,20 +808,18 @@ pub fn index_search(
     }
     println!("{}", matches.len());*/
 
-    std::process::exit(1);
-
     // Create memoization cache.
     //let mut cache = Cache::new(memoize_mode, canonize_mode);
-    /*let mut cache = NewCache::new(memoize_mode);
+    let mut cache = NewCache::new(memoize_mode);
 
-    let graph = {
+    /*let graph = {
         if clique {
             Some(CompatGraph::new(&matches, &dag))
         }
         else {
             None
         }
-    };
+    };*/
 
     // Initialize the first fragment as the entire graph.
     let mut init = BitSet::new();
@@ -812,12 +833,10 @@ pub fn index_search(
     // Search for the shortest assembly pathway recursively.
     let edge_count = mol.graph().edge_count();
     let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
-    //let best_index = Arc::new(AtomicUsize::from(12));
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
         &dag,
-        &graph,
         subgraph,
         Vec::new(),
         &[init],
@@ -831,8 +850,7 @@ pub fn index_search(
 
     println!("Bounded: {}", cache.count());
 
-    (index as u32, matches.len() as u32, states_searched)*/
-    (0, 0, 0)
+    (index as u32, matches.len() as u32, states_searched)
 }
 
 /// Compute a molecule's assembly index using an efficient default strategy.

From 0997b005d2b95b26ea08578ef53eae3e483e3121 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Wed, 3 Sep 2025 15:25:50 -0700
Subject: [PATCH 29/51] Create masks with frag_id to state_id map

---
 src/assembly.rs | 86 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 57 insertions(+), 29 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 3c52ef12..9510b44f 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -28,10 +28,11 @@ use std::{
 use bit_set::BitSet;
 use clap::ValueEnum;
 use graph_canon::canon;
-use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
+use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator, IndexedParallelIterator};
 use petgraph::graph::EdgeIndex;
 use itertools::Itertools;
 use std::cmp::{max, min};
+use ahash::RandomState;
 
 use crate::{
     bounds::{bound_exceeded, Bound},
@@ -102,7 +103,6 @@ pub fn depth(mol: &Molecule) -> u32 {
 pub struct DagNode {
     fragment: BitSet,
     canon_id: usize,
-    // added_edge: usize,
     children: Vec<usize>,
 }
 
@@ -512,6 +512,7 @@ pub fn recurse_index_search(
     mol: &Molecule,
     matches: &HashMap<(usize, usize), usize>,
     dag: &Vec<DagNode>,
+    last_removed: usize,
     mut subgraph: BitSet,
     removal_order: Vec<usize>,
     state: &[BitSet],
@@ -525,18 +526,18 @@ pub fn recurse_index_search(
     //println!("{:?}", removal_order);
     //println!("{:?}", state);
     
-    let num_edges = mol.graph().edge_count();
-    let mut enum_matches: Vec<(usize, usize)> = Vec::new();
-    
     // MATCHES METHOD 1
     // EXTEND ALWAYS AND DO MATCHING AT END
-    let mut subgraphs: Vec<usize> = Vec::new();
-    let mut buckets: HashMap<usize, Vec<usize>> = HashMap::new();
+    let num_edges = mol.graph().edge_count();
+
+    let mut enum_matches: Vec<(usize, usize)> = Vec::new();
+    let mut subgraphs: Vec<(usize, usize)> = Vec::new();
+    let mut buckets: HashMap<usize, Vec<(usize, usize)>> = HashMap::new();
 
     // create subgraphs of size 1
-    for fragment in state.iter() {
+    for (state_id, fragment) in state.iter().enumerate() {
         for edge_idx in fragment.iter() {
-            subgraphs.push(edge_idx);
+            subgraphs.push((edge_idx, state_id));
         }
     }
 
@@ -545,27 +546,29 @@ pub fn recurse_index_search(
         let mut new_subgraphs = Vec::new();
 
         // Extend each subgraph and bucket
-        for frag_id in subgraphs {
+        for (frag_id, state_id) in subgraphs {
+            // Get state frament that this subgraph is contained in
+            let state_frag = &state[state_id];
+
             // Get ids of extensions of this subgraph
             let children_ids = &dag[frag_id].children;
 
             for child_id in children_ids {
                 // Check if this extension is valid
-                // TODO: save fragment id
-                // TODO: save edge id in DAG
+                // i.e. the extended subgraph is contained in a fragment of state
                 let child_frag = &dag[*child_id].fragment;
-                let valid = state.iter().any(|frag| child_frag.is_subset(frag));
+                let valid = child_frag.is_subset(state_frag);
 
                 if valid {
                     let canon_id = &dag[*child_id].canon_id;
                     
                     // Add fragment to bucket
                     buckets.entry(*canon_id)
-                        .and_modify(|bucket| bucket.push(*child_id))
-                        .or_insert(vec![*child_id]);
+                        .and_modify(|bucket| bucket.push((*child_id, state_id)))
+                        .or_insert(vec![(*child_id, state_id)]);
                     
                     // Add fragment to new subgraphs
-                    new_subgraphs.push(*child_id);
+                    new_subgraphs.push((*child_id, state_id));
                 }
             }
         }
@@ -574,6 +577,8 @@ pub fn recurse_index_search(
         subgraphs = new_subgraphs;
     }
 
+    let mut frag_id_to_state_idx: HashMap<usize, usize, RandomState> = HashMap::with_hasher(RandomState::new());
+
     // Generate matches from buckets of ismorphic fragments
     for fragments in buckets.values() {
         // Loop over pairs of fragments
@@ -581,10 +586,23 @@ pub fn recurse_index_search(
             let frag1 = *max(pair[0], pair[1]);
             let frag2 = *min(pair[0], pair[1]);
 
+            let (frag1_id, frag1_state_idx) = frag1;
+            let (frag2_id, frag2_state_idx) = frag2;
+
             // Check for valid match
             // If valid, add to matches to iterate over
-            if let Some(x) = matches.get(&(frag1, frag2)) {
-                enum_matches.push((frag1, frag2));
+            if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
+                // Only add match if it occurs later in the ordering than
+                // the last removed match
+                if *x >= last_removed {
+                    enum_matches.push((frag1_id, frag2_id));
+
+                    // Store which state fragments the matched subgraphs are contained in
+                    frag_id_to_state_idx.entry(frag1_id)
+                        .or_insert(frag1_state_idx);
+                    frag_id_to_state_idx.entry(frag2_id)
+                        .or_insert(frag2_state_idx);
+                }
             }
         }
     }
@@ -598,15 +616,13 @@ pub fn recurse_index_search(
     enum_matches.reverse();
 
     // Modify mask
-    // TODO: try storing fragment so search can be reduced
-    // TODO: try with a set for frag_ids to only do intersection once per frag
     let largest_remove = dag[enum_matches[0].0].fragment.len();
     let mut masks = vec![
         vec![BitSet::with_capacity(num_edges); state.len()];
         largest_remove - 1
     ];
 
-    for m in enum_matches.iter() {
+    /*for m in enum_matches.iter() {
         // Extract fragments
         let frag1 = &dag[m.0].fragment;
         let frag2 = &dag[m.1].fragment;
@@ -625,18 +641,25 @@ pub fn recurse_index_search(
         // Union with masks to indicate that these edges are used
         masks[len][frag1_idx].union_with(frag1);
         masks[len][frag2_idx].union_with(frag2);
+    }*/
+
+    for (frag_id, state_idx) in frag_id_to_state_idx.iter() {
+        let frag = &dag[*frag_id].fragment;
+        let len = frag.len() - 2;
+
+        masks[len][*state_idx].union_with(frag);
     }
 
     // Let new state be only the edges which can be matched
     let mut state = Vec::new();
 
     for frag in masks[0].iter() {
-        // Split into connected components
-        let connected_frags = connected_components_under_edges(mol.graph(), &frag);
-        
         // Add connected components to new state
-        state.extend(connected_frags);
+        state.extend(connected_components_under_edges(mol.graph(), &frag));
     }
+
+    // Remove frags of size 1 or less
+    state.retain(|frag| frag.len() >= 2);
                 
 
     // MATCHES METHOD 2
@@ -655,7 +678,7 @@ pub fn recurse_index_search(
     
     // Define a closure that handles recursing to a new assembly state based on
     // the given (enumerated) pair of non-overlapping isomorphic subgraphs.
-    let  recurse_on_match = |v: (usize, usize)| {
+    let  recurse_on_match = |i: usize, v: (usize, usize)| {
         //let (h1, h2) = &matches[v];
         let h1 = &dag[v.0].fragment;
         let h2 = &dag[v.1].fragment;
@@ -675,10 +698,12 @@ pub fn recurse_index_search(
                 mol,
                 matches,
                 dag,
+                *matches.get(&v).unwrap(),
                 BitSet::new(),
                 {
                     let mut clone = removal_order.clone();
-                    clone.push(*matches.get(&v).unwrap());
+                    // clone.push(*matches.get(&v).unwrap());
+                    clone.push(i);
                     clone
                 },
                 &fragments,
@@ -702,11 +727,13 @@ pub fn recurse_index_search(
     if parallel_mode == ParallelMode::None {
         enum_matches
             .iter()
-            .for_each(|v| recurse_on_match(*v));
+            .enumerate()
+            .for_each(|(i, v)| recurse_on_match(i, *v));
     } else {
         enum_matches
             .par_iter()
-            .for_each(|v| recurse_on_match(*v));
+            .enumerate()
+            .for_each(|(i, v)| recurse_on_match(i, *v));
     }
 
     (
@@ -837,6 +864,7 @@ pub fn index_search(
         mol,
         &matches,
         &dag,
+        0,
         subgraph,
         Vec::new(),
         &[init],

From 87ce547d331a706c08e69ddfae2adda641e58a45 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Wed, 3 Sep 2025 15:27:56 -0700
Subject: [PATCH 30/51] Create masks by searching through state

---
 src/assembly.rs | 30 ++++++------------------------
 1 file changed, 6 insertions(+), 24 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 9510b44f..d1b0ef73 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -532,7 +532,7 @@ pub fn recurse_index_search(
 
     let mut enum_matches: Vec<(usize, usize)> = Vec::new();
     let mut subgraphs: Vec<(usize, usize)> = Vec::new();
-    let mut buckets: HashMap<usize, Vec<(usize, usize)>> = HashMap::new();
+    let mut buckets: HashMap<usize, Vec<usize>> = HashMap::new();
 
     // create subgraphs of size 1
     for (state_id, fragment) in state.iter().enumerate() {
@@ -564,8 +564,8 @@ pub fn recurse_index_search(
                     
                     // Add fragment to bucket
                     buckets.entry(*canon_id)
-                        .and_modify(|bucket| bucket.push((*child_id, state_id)))
-                        .or_insert(vec![(*child_id, state_id)]);
+                        .and_modify(|bucket| bucket.push(*child_id))
+                        .or_insert(vec![*child_id]);
                     
                     // Add fragment to new subgraphs
                     new_subgraphs.push((*child_id, state_id));
@@ -577,8 +577,6 @@ pub fn recurse_index_search(
         subgraphs = new_subgraphs;
     }
 
-    let mut frag_id_to_state_idx: HashMap<usize, usize, RandomState> = HashMap::with_hasher(RandomState::new());
-
     // Generate matches from buckets of ismorphic fragments
     for fragments in buckets.values() {
         // Loop over pairs of fragments
@@ -586,22 +584,13 @@ pub fn recurse_index_search(
             let frag1 = *max(pair[0], pair[1]);
             let frag2 = *min(pair[0], pair[1]);
 
-            let (frag1_id, frag1_state_idx) = frag1;
-            let (frag2_id, frag2_state_idx) = frag2;
-
             // Check for valid match
             // If valid, add to matches to iterate over
-            if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
+            if let Some(x) = matches.get(&(frag1, frag2)) {
                 // Only add match if it occurs later in the ordering than
                 // the last removed match
                 if *x >= last_removed {
-                    enum_matches.push((frag1_id, frag2_id));
-
-                    // Store which state fragments the matched subgraphs are contained in
-                    frag_id_to_state_idx.entry(frag1_id)
-                        .or_insert(frag1_state_idx);
-                    frag_id_to_state_idx.entry(frag2_id)
-                        .or_insert(frag2_state_idx);
+                    enum_matches.push((frag1, frag2));
                 }
             }
         }
@@ -622,7 +611,7 @@ pub fn recurse_index_search(
         largest_remove - 1
     ];
 
-    /*for m in enum_matches.iter() {
+    for m in enum_matches.iter() {
         // Extract fragments
         let frag1 = &dag[m.0].fragment;
         let frag2 = &dag[m.1].fragment;
@@ -641,13 +630,6 @@ pub fn recurse_index_search(
         // Union with masks to indicate that these edges are used
         masks[len][frag1_idx].union_with(frag1);
         masks[len][frag2_idx].union_with(frag2);
-    }*/
-
-    for (frag_id, state_idx) in frag_id_to_state_idx.iter() {
-        let frag = &dag[*frag_id].fragment;
-        let len = frag.len() - 2;
-
-        masks[len][*state_idx].union_with(frag);
     }
 
     // Let new state be only the edges which can be matched

From e66d91143d12fa4c3eedf9737f0f8380d1bd659e Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Wed, 3 Sep 2025 16:03:26 -0700
Subject: [PATCH 31/51] Add int bound

---
 src/assembly.rs | 54 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index d1b0ef73..be089906 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -642,6 +642,45 @@ pub fn recurse_index_search(
 
     // Remove frags of size 1 or less
     state.retain(|frag| frag.len() >= 2);
+
+
+    let best = best_index.load(Relaxed);
+    let mut best_bound = 0;
+    let mut largest_length = 2;
+
+    // Use bounding strategy to find the largest length match
+    // to try removing from this state.
+    for (i, list) in masks.iter().enumerate() {
+        let mut bound = 0;
+        let i = i + 2;
+        for (j, frag) in list.iter().enumerate() {
+            let mf = masks[0][j].len();
+            let mi = frag.len();
+            let x = mf + (mi % i) - mi;
+            bound += mf - (mi / i) - x / (i-1) - (x % (i-1) != 0) as usize;
+        }
+        bound -= (i as f32).log2().ceil() as usize;
+
+        best_bound = max(best_bound, bound);
+
+        // Check if removing at this length can give a more optimal answer
+        // If yes, stop and return the largest_length to remove
+        if state_index - best_bound < best {
+            break;
+        }
+
+        largest_length += 1;
+    }
+
+    // Remove from enum_matches any matches that have size smaller
+    // than largest_length, and thus their removal does not need to be tested
+    if largest_length > 2 {
+        let mut final_idx = 0;
+        while dag[enum_matches[final_idx].0].fragment.len() >= largest_length {
+            final_idx += 1;
+        }
+        enum_matches.truncate(final_idx);
+    }
                 
 
     // MATCHES METHOD 2
@@ -802,21 +841,6 @@ pub fn index_search(
     // Enumerate non-overlapping isomorphic subgraph pairs.
     let (matches, dag) = matches(mol, canonize_mode);
 
-    /*let mut match_list: Vec<((usize, usize), usize)> = Vec::new();
-    for m in matches.iter() {
-        match_list.push((*m.0, *m.1));
-    }
-
-    for (idx, d) in dag.iter().enumerate() {
-        println!("{}: {:?}", idx, d);
-    }
-
-    match_list.sort_by_key(|m| m.1);
-    for m in match_list {
-        println!("{:?}", m);
-    }
-    println!("{}", matches.len());*/
-
     // Create memoization cache.
     //let mut cache = Cache::new(memoize_mode, canonize_mode);
     let mut cache = NewCache::new(memoize_mode);

From a14e9ed6f8d4a914b619dd65c4b609fa4e44dde9 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Wed, 3 Sep 2025 16:09:13 -0700
Subject: [PATCH 32/51] Cleanup some code

---
 src/assembly.rs | 178 +++++-------------------------------------------
 1 file changed, 18 insertions(+), 160 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index be089906..c5d1bed2 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -116,7 +116,10 @@ impl DagNode {
     }
 }
 
-
+/// Helper function for [`index_search`]; only public for benchmarking.
+///
+/// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
+/// sorted to guarantee deterministic iteration.
 pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (HashMap<(usize, usize), usize>, Vec<DagNode>) {
     let num_edges = mol.graph().edge_count();
 
@@ -285,146 +288,6 @@ pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (HashMap<(usize,
     (match_to_id, dag)
 }
 
-
-/// Helper function for [`index_search`]; only public for benchmarking.
-///
-/// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
-/// sorted to guarantee deterministic iteration.
-/*pub fn _matches(mol: &Molecule) -> (Vec<(usize, usize)>, DagNode) {
-    let num_edges = mol.graph().edge_count();
-
-    let mut subgraphs: HashMap<BitSet, usize> = HashMap::new();
-    let mut size_one_subgraphs: HashMap<BitSet, usize> = HashMap::new();
-    let mut buckets: HashMap<Labeling, Vec<DagNode>> = HashMap::new();
-
-    let mut dag: Vec<DagNode> = Vec::with_capacity(num_edges);
-    let mut canon_ids: HashMap<Labeling, usize> = HashMap::new();
-    let mut matches: HashMap<(usize, usize), usize> = Hashmap::new();
-
-    let mut next_frag_id = 1;
-    let mut next_match_id = 0;
-    let mut next_canon_id = 1;
-
-    // Generate subgraphs with one edge
-    for i in 0..num_edges {
-        let mut new_bitset = BitSet::with_capacity(num_edges);
-        new_bitset.insert(i);
-        size_one_subgraphs.insert(new_bitset, i);
-
-        dag.push(DagNode::new(new_bitset, 0, 0));
-    }
-
-    // Generate subgraphs with two edges
-    for (subgraph, idx) in size_one_subgraphs.iter() {
-        let mut neighborhood = BitSet::with_capacity(num_edges);
-        for e in subgraph {
-            neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
-        }
-        neighborhood.difference_with(subgraph);
-
-        for e in neighborhood.iter() {
-            let mut new_subgraph = subgraph.clone();
-            new_subgraph.insert(e);
-            subgraphs.entry(new_subgraph)
-                .or_insert(*idx);
-        }
-    }
-
-    // Generate subgraphs with more than two edges
-    while subgraphs.len() > 0 {
-        buckets.clear();
-        // Generate buckets
-        for (g, parent) in subgraphs.iter() {
-            let canon = canonize(mol, g, CanonizeMode::TreeNauty);            
-            buckets.entry(canon)
-                .and_modify(|v| v.push((g.clone(), *parent)))
-                .or_insert(vec![(g.clone(), *parent)]);
-        }
-
-        subgraphs.clear();
-        let mut frag_to_id: HashMap<BitSet, usize> = HashMap::new();
-        for (_, bucket) in buckets.iter() {
-            // Generate matches
-            let mut has_match = BitSet::with_capacity(bucket.len());
-            for (i, (first, first_parent)) in bucket.iter().enumerate() {
-                for (j, (second, second_parent)) in bucket[i+1..].iter().enumerate() {
-                    if first.intersection(second).count() == 0 {
-                        if !has_match.contains(i) {
-                            frag_to_id.insert(first.clone(), next_id);
-
-                            dag.insert(next_id, (first.clone(), Vec::new()));
-                            dag.entry(*first_parent)
-                                .and_modify(|(_, children)| children.push(next_id));
-
-                            let mut neighborhood = BitSet::with_capacity(num_edges);
-                            for e in first {
-                                neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
-                            }
-                            neighborhood.difference_with(first);
-
-                            for e in neighborhood.iter() {
-                                let mut new_subgraph = first.clone();
-                                new_subgraph.insert(e);
-                                subgraphs.insert(new_subgraph, next_id);
-                            }
-
-                            next_id += 1;
-                        }
-                        if !has_match.contains(i + 1 + j) {
-                            frag_to_id.insert(second.clone(), next_id);
-
-                            dag.insert(next_id, (second.clone(), Vec::new()));
-                            dag.entry(*second_parent)
-                                .and_modify(|(_, children)| children.push(next_id));
-
-                            let mut neighborhood = BitSet::with_capacity(num_edges);
-                            for e in second {
-                                neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
-                            }
-                            neighborhood.difference_with(second);
-
-                            for e in neighborhood.iter() {
-                                let mut new_subgraph = second.clone();
-                                new_subgraph.insert(e);
-                                subgraphs.insert(new_subgraph, next_id);
-                            }
-
-                            next_id += 1;
-                        }
-
-                        let id1 = frag_to_id.get(first).unwrap();
-                        let id2 = frag_to_id.get(second).unwrap();
-                        if first > second {
-                            matches.push((*id1, *id2));
-                        } else {
-                            matches.push((*id1, *id2));
-                        }
-
-                        has_match.insert(i);
-                        has_match.insert(i + 1 + j);
-                    }
-                }
-            }
-        }
-    }
-
-    // Sort matches
-    matches.sort_by(|e1, e2| {
-        let ord = [
-            e2.0.len().cmp(&e1.0.len()), // Decreasing subgraph size.
-            e1.0.cmp(&e2.0),             // First subgraph lexicographical.
-            e1.1.cmp(&e2.1),             // Second subgraph lexicographical.
-        ];
-        let mut i = 0;
-        while ord[i] == std::cmp::Ordering::Equal {
-            i += 1;
-        }
-        ord[i]
-    });
-
-    (matches, dag)
-}*/
-
 /// Determine the fragments produced from the given assembly state by removing
 /// the given pair of non-overlapping, isomorphic subgraphs and then adding one
 /// back; return `None` if not possible.
@@ -526,15 +389,14 @@ pub fn recurse_index_search(
     //println!("{:?}", removal_order);
     //println!("{:?}", state);
     
-    // MATCHES METHOD 1
-    // EXTEND ALWAYS AND DO MATCHING AT END
+    // Generate matches
     let num_edges = mol.graph().edge_count();
 
-    let mut enum_matches: Vec<(usize, usize)> = Vec::new();
+    let mut valid_matches: Vec<(usize, usize)> = Vec::new();
     let mut subgraphs: Vec<(usize, usize)> = Vec::new();
     let mut buckets: HashMap<usize, Vec<usize>> = HashMap::new();
 
-    // create subgraphs of size 1
+    // Create subgraphs of size 1
     for (state_id, fragment) in state.iter().enumerate() {
         for edge_idx in fragment.iter() {
             subgraphs.push((edge_idx, state_id));
@@ -590,28 +452,28 @@ pub fn recurse_index_search(
                 // Only add match if it occurs later in the ordering than
                 // the last removed match
                 if *x >= last_removed {
-                    enum_matches.push((frag1, frag2));
+                    valid_matches.push((frag1, frag2));
                 }
             }
         }
     }
 
     // If there are no matches, return
-    if enum_matches.len() == 0 {
+    if valid_matches.len() == 0 {
         return (state_index, 1);
     }
 
-    enum_matches.sort();
-    enum_matches.reverse();
+    valid_matches.sort();
+    valid_matches.reverse();
 
     // Modify mask
-    let largest_remove = dag[enum_matches[0].0].fragment.len();
+    let largest_remove = dag[valid_matches[0].0].fragment.len();
     let mut masks = vec![
         vec![BitSet::with_capacity(num_edges); state.len()];
         largest_remove - 1
     ];
 
-    for m in enum_matches.iter() {
+    for m in valid_matches.iter() {
         // Extract fragments
         let frag1 = &dag[m.0].fragment;
         let frag2 = &dag[m.1].fragment;
@@ -672,19 +534,15 @@ pub fn recurse_index_search(
         largest_length += 1;
     }
 
-    // Remove from enum_matches any matches that have size smaller
+    // Remove from valid_matches any matches that have size smaller
     // than largest_length, and thus their removal does not need to be tested
     if largest_length > 2 {
         let mut final_idx = 0;
-        while dag[enum_matches[final_idx].0].fragment.len() >= largest_length {
+        while dag[valid_matches[final_idx].0].fragment.len() >= largest_length {
             final_idx += 1;
         }
-        enum_matches.truncate(final_idx);
+        valid_matches.truncate(final_idx);
     }
-                
-
-    // MATCHES METHOD 2
-    // ONLY EXTEND IF A SUBGRAPH HAS A MATCH
 
 
     // Memoization
@@ -746,12 +604,12 @@ pub fn recurse_index_search(
 
     // Use the iterator type corresponding to the specified parallelism mode.
     if parallel_mode == ParallelMode::None {
-        enum_matches
+        valid_matches
             .iter()
             .enumerate()
             .for_each(|(i, v)| recurse_on_match(i, *v));
     } else {
-        enum_matches
+        valid_matches
             .par_iter()
             .enumerate()
             .for_each(|(i, v)| recurse_on_match(i, *v));

From b2689d8c85638ee319ad475480e4bad113b12305 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Wed, 3 Sep 2025 16:47:23 -0700
Subject: [PATCH 33/51] Don't extend subgraphs without a match

---
 src/assembly.rs | 72 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 23 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index c5d1bed2..edc630f1 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -394,7 +394,6 @@ pub fn recurse_index_search(
 
     let mut valid_matches: Vec<(usize, usize)> = Vec::new();
     let mut subgraphs: Vec<(usize, usize)> = Vec::new();
-    let mut buckets: HashMap<usize, Vec<usize>> = HashMap::new();
 
     // Create subgraphs of size 1
     for (state_id, fragment) in state.iter().enumerate() {
@@ -405,6 +404,7 @@ pub fn recurse_index_search(
 
     while subgraphs.len() > 0 {
         // Subgraphs to extend in next loop
+        let mut buckets: HashMap<usize, Vec<(usize, usize)>> = HashMap::new();
         let mut new_subgraphs = Vec::new();
 
         // Extend each subgraph and bucket
@@ -426,36 +426,62 @@ pub fn recurse_index_search(
                     
                     // Add fragment to bucket
                     buckets.entry(*canon_id)
-                        .and_modify(|bucket| bucket.push(*child_id))
-                        .or_insert(vec![*child_id]);
-                    
-                    // Add fragment to new subgraphs
-                    new_subgraphs.push((*child_id, state_id));
+                        .and_modify(|bucket| bucket.push((*child_id, state_id)))
+                        .or_insert(vec![(*child_id, state_id)]);
                 }
             }
         }
 
-        // Update to new subgraphs
-        subgraphs = new_subgraphs;
-    }
+        // Search through buckets and create matches
+        for fragments in buckets.values() {
+            let mut has_match = BitSet::with_capacity(fragments.len());
+            // Loop over pairs of fragments
+            for pair in fragments.iter().enumerate().combinations(2) {
+                let mut frag1;
+                let mut frag2;
 
-    // Generate matches from buckets of ismorphic fragments
-    for fragments in buckets.values() {
-        // Loop over pairs of fragments
-        for pair in fragments.iter().combinations(2) {
-            let frag1 = *max(pair[0], pair[1]);
-            let frag2 = *min(pair[0], pair[1]);
-
-            // Check for valid match
-            // If valid, add to matches to iterate over
-            if let Some(x) = matches.get(&(frag1, frag2)) {
-                // Only add match if it occurs later in the ordering than
-                // the last removed match
-                if *x >= last_removed {
-                    valid_matches.push((frag1, frag2));
+                // Order fragments
+                if pair[0].1.0 > pair[1].1.0 {
+                    frag1 = pair[0];
+                    frag2 = pair[1];
+                }
+                else {
+                    frag1 = pair[1];
+                    frag2 = pair[0];
+                }
+
+                let frag1_bucket_idx = frag1.0;
+                let frag1_id = (*frag1.1).0;
+                let frag1_state_id = (*frag1.1).1;
+
+                let frag2_bucket_idx = frag2.0;
+                let frag2_id = (*frag2.1).0;
+                let frag2_state_id = (*frag2.1).1;
+
+                // Check for valid match
+                if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
+                    // Only add match if it occurs later in the ordering than
+                    // the last removed match
+                    if *x >= last_removed {
+                        valid_matches.push((frag1_id, frag2_id));
+
+                        // If this is the first time seeing that this frag has a match,
+                        // add it to the new_subgraphs list
+                        if !has_match.contains(frag1_bucket_idx) {
+                            new_subgraphs.push((frag1_id, frag1_state_id));
+                            has_match.insert(frag1_bucket_idx);
+                        }
+                        if !has_match.contains(frag2_bucket_idx) {
+                            new_subgraphs.push((frag2_id, frag2_state_id));
+                            has_match.insert(frag2_bucket_idx);
+                        }
+                    }
                 }
             }
         }
+
+        // Update to new subgraphs
+        subgraphs = new_subgraphs;
     }
 
     // If there are no matches, return

From fc86feea86ccbed4c8219bfef93719b7bf47bade Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 4 Sep 2025 12:30:21 -0700
Subject: [PATCH 34/51] Add timers

---
 src/assembly.rs | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index edc630f1..211d126e 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -22,7 +22,7 @@ use std::{
     collections::{BTreeMap, HashMap, HashSet}, sync::{
         atomic::{AtomicUsize, Ordering::Relaxed},
         Arc,
-    }
+    }, time::{Duration, Instant}
 };
 
 use bit_set::BitSet;
@@ -33,6 +33,7 @@ use petgraph::graph::EdgeIndex;
 use itertools::Itertools;
 use std::cmp::{max, min};
 use ahash::RandomState;
+use fxhash::FxHashMap;
 
 use crate::{
     bounds::{bound_exceeded, Bound},
@@ -120,7 +121,7 @@ impl DagNode {
 ///
 /// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
 /// sorted to guarantee deterministic iteration.
-pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (HashMap<(usize, usize), usize>, Vec<DagNode>) {
+pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (FxHashMap<(usize, usize), usize>, Vec<DagNode>) {
     let num_edges = mol.graph().edge_count();
 
     let mut subgraphs: Vec<usize> = Vec::new();
@@ -279,7 +280,7 @@ pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (HashMap<(usize,
 
     // give matches ids
     let mut next_match_id = 0;
-    let mut match_to_id: HashMap<(usize, usize), usize> = HashMap::new();
+    let mut match_to_id: FxHashMap<(usize, usize), usize> = FxHashMap::default();
     for m in matches {
         match_to_id.insert(m, next_match_id);
         next_match_id += 1;
@@ -373,7 +374,7 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
 #[allow(clippy::too_many_arguments)]
 pub fn recurse_index_search(
     mol: &Molecule,
-    matches: &HashMap<(usize, usize), usize>,
+    matches: &FxHashMap<(usize, usize), usize>,
     dag: &Vec<DagNode>,
     last_removed: usize,
     mut subgraph: BitSet,
@@ -388,6 +389,12 @@ pub fn recurse_index_search(
 ) -> (usize, usize) {
     //println!("{:?}", removal_order);
     //println!("{:?}", state);
+
+    let mut bucket_time = Duration::new(0, 0);
+    let mut sort_time = Duration::new(0, 0);
+    let mut create_matches_time = Duration::new(0, 0);
+    let mut mask_time = Duration::new(0, 0);
+    let mut bound_time = Duration::new(0, 0);
     
     // Generate matches
     let num_edges = mol.graph().edge_count();
@@ -421,6 +428,7 @@ pub fn recurse_index_search(
                 let child_frag = &dag[*child_id].fragment;
                 let valid = child_frag.is_subset(state_frag);
 
+                let start = Instant::now();
                 if valid {
                     let canon_id = &dag[*child_id].canon_id;
                     
@@ -429,9 +437,11 @@ pub fn recurse_index_search(
                         .and_modify(|bucket| bucket.push((*child_id, state_id)))
                         .or_insert(vec![(*child_id, state_id)]);
                 }
+                bucket_time += start.elapsed();
             }
         }
 
+        let start = Instant::now();
         // Search through buckets and create matches
         for fragments in buckets.values() {
             let mut has_match = BitSet::with_capacity(fragments.len());
@@ -459,7 +469,9 @@ pub fn recurse_index_search(
                 let frag2_state_id = (*frag2.1).1;
 
                 // Check for valid match
-                if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
+                // if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
+                if dag[frag1_id].fragment.is_disjoint(&dag[frag2_id].fragment) {
+                    let x = matches.get(&(frag1_id, frag2_id)).unwrap();
                     // Only add match if it occurs later in the ordering than
                     // the last removed match
                     if *x >= last_removed {
@@ -479,6 +491,8 @@ pub fn recurse_index_search(
                 }
             }
         }
+        create_matches_time += start.elapsed();
+
 
         // Update to new subgraphs
         subgraphs = new_subgraphs;
@@ -489,10 +503,13 @@ pub fn recurse_index_search(
         return (state_index, 1);
     }
 
+    let start = Instant::now();
     valid_matches.sort();
     valid_matches.reverse();
+    sort_time += start.elapsed();
 
     // Modify mask
+    let start = Instant::now();
     let largest_remove = dag[valid_matches[0].0].fragment.len();
     let mut masks = vec![
         vec![BitSet::with_capacity(num_edges); state.len()];
@@ -530,8 +547,9 @@ pub fn recurse_index_search(
 
     // Remove frags of size 1 or less
     state.retain(|frag| frag.len() >= 2);
+    mask_time += start.elapsed();
 
-
+    let start = Instant::now();
     let best = best_index.load(Relaxed);
     let mut best_bound = 0;
     let mut largest_length = 2;
@@ -569,6 +587,9 @@ pub fn recurse_index_search(
         }
         valid_matches.truncate(final_idx);
     }
+    bound_time += start.elapsed();
+
+    // println!("bucket: {} match: {} sort: {} mask: {} bound: {}", bucket_time.as_nanos(), create_matches_time.as_nanos(), sort_time.as_nanos(), mask_time.as_nanos(), bound_time.as_nanos());
 
 
     // Memoization

From 266cdca547acbd2d135cf24d2028fc2c976c3a95 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 4 Sep 2025 13:39:55 -0700
Subject: [PATCH 35/51] Remove itertools.combinations from search

---
 src/assembly.rs | 95 ++++++++++++++++++++-----------------------------
 1 file changed, 38 insertions(+), 57 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 211d126e..0bc523d4 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -389,12 +389,6 @@ pub fn recurse_index_search(
 ) -> (usize, usize) {
     //println!("{:?}", removal_order);
     //println!("{:?}", state);
-
-    let mut bucket_time = Duration::new(0, 0);
-    let mut sort_time = Duration::new(0, 0);
-    let mut create_matches_time = Duration::new(0, 0);
-    let mut mask_time = Duration::new(0, 0);
-    let mut bound_time = Duration::new(0, 0);
     
     // Generate matches
     let num_edges = mol.graph().edge_count();
@@ -428,7 +422,6 @@ pub fn recurse_index_search(
                 let child_frag = &dag[*child_id].fragment;
                 let valid = child_frag.is_subset(state_frag);
 
-                let start = Instant::now();
                 if valid {
                     let canon_id = &dag[*child_id].canon_id;
                     
@@ -437,61 +430,57 @@ pub fn recurse_index_search(
                         .and_modify(|bucket| bucket.push((*child_id, state_id)))
                         .or_insert(vec![(*child_id, state_id)]);
                 }
-                bucket_time += start.elapsed();
             }
         }
 
-        let start = Instant::now();
         // Search through buckets and create matches
         for fragments in buckets.values() {
             let mut has_match = BitSet::with_capacity(fragments.len());
             // Loop over pairs of fragments
-            for pair in fragments.iter().enumerate().combinations(2) {
-                let mut frag1;
-                let mut frag2;
-
-                // Order fragments
-                if pair[0].1.0 > pair[1].1.0 {
-                    frag1 = pair[0];
-                    frag2 = pair[1];
-                }
-                else {
-                    frag1 = pair[1];
-                    frag2 = pair[0];
-                }
+            for i in 0..fragments.len() {
+                for j in i+1..fragments.len() {
+                    let mut frag1 = (i, fragments[i]);
+                    let mut frag2 = (j, fragments[j]);
+
+                    // Order fragments
+                    if frag1.1.0 < frag2.1.0 {
+                        let temp = frag1;
+                        frag1 = frag2;
+                        frag2 = temp;
+                    }
 
-                let frag1_bucket_idx = frag1.0;
-                let frag1_id = (*frag1.1).0;
-                let frag1_state_id = (*frag1.1).1;
-
-                let frag2_bucket_idx = frag2.0;
-                let frag2_id = (*frag2.1).0;
-                let frag2_state_id = (*frag2.1).1;
-
-                // Check for valid match
-                // if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
-                if dag[frag1_id].fragment.is_disjoint(&dag[frag2_id].fragment) {
-                    let x = matches.get(&(frag1_id, frag2_id)).unwrap();
-                    // Only add match if it occurs later in the ordering than
-                    // the last removed match
-                    if *x >= last_removed {
-                        valid_matches.push((frag1_id, frag2_id));
-
-                        // If this is the first time seeing that this frag has a match,
-                        // add it to the new_subgraphs list
-                        if !has_match.contains(frag1_bucket_idx) {
-                            new_subgraphs.push((frag1_id, frag1_state_id));
-                            has_match.insert(frag1_bucket_idx);
-                        }
-                        if !has_match.contains(frag2_bucket_idx) {
-                            new_subgraphs.push((frag2_id, frag2_state_id));
-                            has_match.insert(frag2_bucket_idx);
+                    let frag1_bucket_idx = frag1.0;
+                    let frag1_id = frag1.1.0;
+                    let frag1_state_id = frag1.1.1;
+
+                    let frag2_bucket_idx = frag2.0;
+                    let frag2_id = frag2.1.0;
+                    let frag2_state_id = frag2.1.1;
+
+                    // Check for valid match
+                    // if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
+                    if dag[frag1_id].fragment.is_disjoint(&dag[frag2_id].fragment) {
+                        let x = matches.get(&(frag1_id, frag2_id)).unwrap();
+                        // Only add match if it occurs later in the ordering than
+                        // the last removed match
+                        if *x >= last_removed {
+                            valid_matches.push((frag1_id, frag2_id));
+
+                            // If this is the first time seeing that this frag has a match,
+                            // add it to the new_subgraphs list
+                            if !has_match.contains(frag1_bucket_idx) {
+                                new_subgraphs.push((frag1_id, frag1_state_id));
+                                has_match.insert(frag1_bucket_idx);
+                            }
+                            if !has_match.contains(frag2_bucket_idx) {
+                                new_subgraphs.push((frag2_id, frag2_state_id));
+                                has_match.insert(frag2_bucket_idx);
+                            }
                         }
                     }
                 }
             }
         }
-        create_matches_time += start.elapsed();
 
 
         // Update to new subgraphs
@@ -503,13 +492,10 @@ pub fn recurse_index_search(
         return (state_index, 1);
     }
 
-    let start = Instant::now();
     valid_matches.sort();
     valid_matches.reverse();
-    sort_time += start.elapsed();
 
     // Modify mask
-    let start = Instant::now();
     let largest_remove = dag[valid_matches[0].0].fragment.len();
     let mut masks = vec![
         vec![BitSet::with_capacity(num_edges); state.len()];
@@ -547,9 +533,7 @@ pub fn recurse_index_search(
 
     // Remove frags of size 1 or less
     state.retain(|frag| frag.len() >= 2);
-    mask_time += start.elapsed();
 
-    let start = Instant::now();
     let best = best_index.load(Relaxed);
     let mut best_bound = 0;
     let mut largest_length = 2;
@@ -587,9 +571,6 @@ pub fn recurse_index_search(
         }
         valid_matches.truncate(final_idx);
     }
-    bound_time += start.elapsed();
-
-    // println!("bucket: {} match: {} sort: {} mask: {} bound: {}", bucket_time.as_nanos(), create_matches_time.as_nanos(), sort_time.as_nanos(), mask_time.as_nanos(), bound_time.as_nanos());
 
 
     // Memoization

From f7b15f3e3783a7e905e68f30c7a90679c2d1d59a Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 4 Sep 2025 14:23:38 -0700
Subject: [PATCH 36/51] Move mask creation into match enumeration

---
 src/assembly.rs | 47 ++++++++++++-----------------------------------
 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 0bc523d4..9efc5b61 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -28,7 +28,7 @@ use std::{
 use bit_set::BitSet;
 use clap::ValueEnum;
 use graph_canon::canon;
-use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator, IndexedParallelIterator};
+use rayon::iter::{IntoParallelRefIterator, ParallelIterator, IndexedParallelIterator};
 use petgraph::graph::EdgeIndex;
 use itertools::Itertools;
 use std::cmp::{max, min};
@@ -216,7 +216,7 @@ pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (FxHashMap<(usize
 
                         // Add index to parent's child list
                         let parent_idx = pair[0].1.1;
-                        let mut parent = &mut dag[parent_idx];
+                        let parent = &mut dag[parent_idx];
                         parent.children.push(child_idx);
 
                         // Add dag node to next subgraphs list
@@ -235,7 +235,7 @@ pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (FxHashMap<(usize
 
                         // Add index to parent's child list
                         let parent_idx = pair[1].1.1;
-                        let mut parent = &mut dag[parent_idx];
+                        let parent = &mut dag[parent_idx];
                         parent.children.push(child_idx);
 
                         // Add dag node to next subgraphs list
@@ -395,6 +395,7 @@ pub fn recurse_index_search(
 
     let mut valid_matches: Vec<(usize, usize)> = Vec::new();
     let mut subgraphs: Vec<(usize, usize)> = Vec::new();
+    let mut masks: Vec<Vec<BitSet>> = Vec::new();
 
     // Create subgraphs of size 1
     for (state_id, fragment) in state.iter().enumerate() {
@@ -407,6 +408,7 @@ pub fn recurse_index_search(
         // Subgraphs to extend in next loop
         let mut buckets: HashMap<usize, Vec<(usize, usize)>> = HashMap::new();
         let mut new_subgraphs = Vec::new();
+        let mut used_edge_mask = vec![BitSet::with_capacity(num_edges); state.len()];
 
         // Extend each subgraph and bucket
         for (frag_id, state_id) in subgraphs {
@@ -458,22 +460,23 @@ pub fn recurse_index_search(
                     let frag2_state_id = frag2.1.1;
 
                     // Check for valid match
-                    // if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
-                    if dag[frag1_id].fragment.is_disjoint(&dag[frag2_id].fragment) {
-                        let x = matches.get(&(frag1_id, frag2_id)).unwrap();
+                    // TODO: try alternatives to this
+                    if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
                         // Only add match if it occurs later in the ordering than
                         // the last removed match
                         if *x >= last_removed {
                             valid_matches.push((frag1_id, frag2_id));
 
                             // If this is the first time seeing that this frag has a match,
-                            // add it to the new_subgraphs list
+                            // add it to the new_subgraphs list and modify mask
                             if !has_match.contains(frag1_bucket_idx) {
                                 new_subgraphs.push((frag1_id, frag1_state_id));
+                                used_edge_mask[frag1_state_id].union_with(&dag[frag1_id].fragment);
                                 has_match.insert(frag1_bucket_idx);
                             }
                             if !has_match.contains(frag2_bucket_idx) {
                                 new_subgraphs.push((frag2_id, frag2_state_id));
+                                used_edge_mask[frag2_state_id].union_with(&dag[frag2_id].fragment);
                                 has_match.insert(frag2_bucket_idx);
                             }
                         }
@@ -482,6 +485,8 @@ pub fn recurse_index_search(
             }
         }
 
+        // Add mask to list
+        masks.push(used_edge_mask);
 
         // Update to new subgraphs
         subgraphs = new_subgraphs;
@@ -495,34 +500,6 @@ pub fn recurse_index_search(
     valid_matches.sort();
     valid_matches.reverse();
 
-    // Modify mask
-    let largest_remove = dag[valid_matches[0].0].fragment.len();
-    let mut masks = vec![
-        vec![BitSet::with_capacity(num_edges); state.len()];
-        largest_remove - 1
-    ];
-
-    for m in valid_matches.iter() {
-        // Extract fragments
-        let frag1 = &dag[m.0].fragment;
-        let frag2 = &dag[m.1].fragment;
-        let len = frag1.len() - 2;
-
-        // Find indices of fragments that contain frag1 and frag2
-        let (frag1_idx, _) = state.iter()
-            .enumerate()   
-            .find(|(_, state_frag)| frag1.is_subset(state_frag))
-            .unwrap();
-        let (frag2_idx, _) = state.iter()
-            .enumerate()
-            .find(|(_, state_frag)| frag2.is_subset(state_frag))
-            .unwrap();
-
-        // Union with masks to indicate that these edges are used
-        masks[len][frag1_idx].union_with(frag1);
-        masks[len][frag2_idx].union_with(frag2);
-    }
-
     // Let new state be only the edges which can be matched
     let mut state = Vec::new();
 

From 0b90e2edc439a99e0c3162c2e43a6cc083d7b376 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 4 Sep 2025 15:22:25 -0700
Subject: [PATCH 37/51] Build matches after bucketing

---
 src/assembly.rs | 70 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 23 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 9efc5b61..c0a11180 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -396,6 +396,7 @@ pub fn recurse_index_search(
     let mut valid_matches: Vec<(usize, usize)> = Vec::new();
     let mut subgraphs: Vec<(usize, usize)> = Vec::new();
     let mut masks: Vec<Vec<BitSet>> = Vec::new();
+    let mut buckets_by_len: Vec<HashMap<usize, Vec<(usize, usize)>>> = Vec::new();
 
     // Create subgraphs of size 1
     for (state_id, fragment) in state.iter().enumerate() {
@@ -436,11 +437,15 @@ pub fn recurse_index_search(
         }
 
         // Search through buckets and create matches
-        for fragments in buckets.values() {
+        for fragments in buckets.values_mut() {
             let mut has_match = BitSet::with_capacity(fragments.len());
             // Loop over pairs of fragments
             for i in 0..fragments.len() {
                 for j in i+1..fragments.len() {
+                    if has_match.contains(i) && has_match.contains(j) {
+                        continue;
+                    }
+
                     let mut frag1 = (i, fragments[i]);
                     let mut frag2 = (j, fragments[j]);
 
@@ -460,12 +465,11 @@ pub fn recurse_index_search(
                     let frag2_state_id = frag2.1.1;
 
                     // Check for valid match
-                    // TODO: try alternatives to this
                     if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
                         // Only add match if it occurs later in the ordering than
                         // the last removed match
                         if *x >= last_removed {
-                            valid_matches.push((frag1_id, frag2_id));
+                            // valid_matches.push((frag1_id, frag2_id));
 
                             // If this is the first time seeing that this frag has a match,
                             // add it to the new_subgraphs list and modify mask
@@ -483,8 +487,19 @@ pub fn recurse_index_search(
                     }
                 }
             }
+
+            // Remove any matchless fragments from bucket
+            let len = fragments.len();
+            for i in (0..len).rev() {
+                if !has_match.contains(i) {
+                    fragments.remove(i);
+                }
+            }     
         }
 
+        // Add buckets to list
+        buckets_by_len.push(buckets);
+
         // Add mask to list
         masks.push(used_edge_mask);
 
@@ -492,14 +507,6 @@ pub fn recurse_index_search(
         subgraphs = new_subgraphs;
     }
 
-    // If there are no matches, return
-    if valid_matches.len() == 0 {
-        return (state_index, 1);
-    }
-
-    valid_matches.sort();
-    valid_matches.reverse();
-
     // Let new state be only the edges which can be matched
     let mut state = Vec::new();
 
@@ -511,6 +518,11 @@ pub fn recurse_index_search(
     // Remove frags of size 1 or less
     state.retain(|frag| frag.len() >= 2);
 
+    // Memoization
+    if cache.memoize_state(mol, &state, state_index, &removal_order) {
+        return (state_index, 1);
+    }
+
     let best = best_index.load(Relaxed);
     let mut best_bound = 0;
     let mut largest_length = 2;
@@ -539,22 +551,34 @@ pub fn recurse_index_search(
         largest_length += 1;
     }
 
-    // Remove from valid_matches any matches that have size smaller
-    // than largest_length, and thus their removal does not need to be tested
-    if largest_length > 2 {
-        let mut final_idx = 0;
-        while dag[valid_matches[final_idx].0].fragment.len() >= largest_length {
-            final_idx += 1;
-        }
-        valid_matches.truncate(final_idx);
-    }
+    // Create matches
+    for bucket in buckets_by_len[largest_length-2..].iter().rev() {
+        for fragments in bucket.values() {
+            for i in 0..fragments.len() {
+                for j in i+1..fragments.len() {
+                    let mut frag1_id = fragments[i].0;
+                    let mut frag2_id = fragments[j].0;
 
+                    if frag1_id < frag2_id {
+                        let temp = frag1_id;
+                        frag1_id = frag2_id;
+                        frag2_id = temp;
+                    }
 
-    // Memoization
-    if cache.memoize_state(mol, &state, state_index, &removal_order) {
-        return (state_index, 1);
+                    if let Some(x) = matches.get(&(frag1_id, frag2_id)){
+                        if *x >= last_removed {
+                            valid_matches.push((frag1_id, frag2_id));
+                        }
+                    }
+                }
+            }
+        }
     }
 
+    // Sort matches
+    valid_matches.sort();
+    valid_matches.reverse();
+
     // Keep track of the best assembly index found in any of this assembly
     // state's children and the number of states searched, including this one.
     let best_child_index = AtomicUsize::from(state_index);

From 9e7e1f57f3a5f5c4149ee2cb32668b462c5e0dbb Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 4 Sep 2025 15:34:15 -0700
Subject: [PATCH 38/51] Move when memoization happens

---
 src/assembly.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index c0a11180..8a46a5f7 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -389,6 +389,11 @@ pub fn recurse_index_search(
 ) -> (usize, usize) {
     //println!("{:?}", removal_order);
     //println!("{:?}", state);
+
+    // Memoization
+    if cache.memoize_state(mol, &state, state_index, &removal_order) {
+        return (state_index, 1);
+    }
     
     // Generate matches
     let num_edges = mol.graph().edge_count();
@@ -518,10 +523,6 @@ pub fn recurse_index_search(
     // Remove frags of size 1 or less
     state.retain(|frag| frag.len() >= 2);
 
-    // Memoization
-    if cache.memoize_state(mol, &state, state_index, &removal_order) {
-        return (state_index, 1);
-    }
 
     let best = best_index.load(Relaxed);
     let mut best_bound = 0;

From afb46b32752aab903b21a27af6bd3fc9dc77f429 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Fri, 5 Sep 2025 14:06:08 -0700
Subject: [PATCH 39/51] Add simple vector bound

---
 src/assembly.rs | 115 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 105 insertions(+), 10 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 8a46a5f7..48a5dde8 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -32,7 +32,6 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator, IndexedParallelIter
 use petgraph::graph::EdgeIndex;
 use itertools::Itertools;
 use std::cmp::{max, min};
-use ahash::RandomState;
 use fxhash::FxHashMap;
 
 use crate::{
@@ -41,7 +40,7 @@ use crate::{
     enumerate::{EnumerateMode},
     kernels::{KernelMode, deletion_kernel, inclusion_kernel},
     memoize::{MemoizeMode, NewCache},
-    molecule::Molecule,
+    molecule::{Bond, Element, Molecule},
     utils::{connected_components_under_edges, edge_neighbors},
     reductions::CompatGraph,
 };
@@ -117,11 +116,17 @@ impl DagNode {
     }
 }
 
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+struct EdgeType {
+    bond: Bond,
+    ends: (Element, Element),
+}
+
 /// Helper function for [`index_search`]; only public for benchmarking.
 ///
 /// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
 /// sorted to guarantee deterministic iteration.
-pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (FxHashMap<(usize, usize), usize>, Vec<DagNode>) {
+pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (FxHashMap<(usize, usize), usize>, Vec<DagNode>, Vec<usize>) {
     let num_edges = mol.graph().edge_count();
 
     let mut subgraphs: Vec<usize> = Vec::new();
@@ -286,7 +291,48 @@ pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (FxHashMap<(usize
         next_match_id += 1;
     }
 
-    (match_to_id, dag)
+    // Create edge type ids
+    let graph = mol.graph();
+    let mut edgetype_to_id: HashMap<EdgeType, usize> = HashMap::new();
+    let mut edgetypes: Vec<usize> = vec![0; num_edges];
+    let mut next_id = 0;
+
+    // Create a list of the elements of the nodes
+    // The list can be indexed by NodeIndex-es
+    let mut node_elements: Vec<Element> = Vec::new();
+    for v in graph.node_weights() {
+        node_elements.push(v.element());
+    }
+
+    // Extract bond types
+    let weights: Vec<Bond> = graph.edge_weights().copied().collect();
+
+    // types will hold an element for every unique edge type in fragment
+    for e in graph.edge_indices() {
+        // Extract bond type and endpoint atom types
+        let bond = weights[e.index()];
+        let (end1, end2) = graph.edge_endpoints(e).expect("Edge Endpoint Error");
+
+        let atom1 = node_elements[end1.index()];
+        let atom2 = node_elements[end2.index()];
+
+        // Create edgetype
+        let ends = if atom1 < atom2 {(atom1, atom2)} else {(atom2, atom1)};
+        let edgetype = EdgeType{bond, ends};
+
+        // Find or create id for this edgetype
+        // and store the edgetype id for this edge
+        if let Some(id) = edgetype_to_id.get(&edgetype) {
+            edgetypes[e.index()] = *id;
+        }
+        else {
+            edgetypes[e.index()] = next_id;
+            edgetype_to_id.insert(edgetype, next_id);
+            next_id += 1;
+        }
+    }
+
+    (match_to_id, dag, edgetypes)
 }
 
 /// Determine the fragments produced from the given assembly state by removing
@@ -376,6 +422,7 @@ pub fn recurse_index_search(
     mol: &Molecule,
     matches: &FxHashMap<(usize, usize), usize>,
     dag: &Vec<DagNode>,
+    edgetypes: &Vec<usize>,
     last_removed: usize,
     mut subgraph: BitSet,
     removal_order: Vec<usize>,
@@ -387,8 +434,16 @@ pub fn recurse_index_search(
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
-    //println!("{:?}", removal_order);
-    //println!("{:?}", state);
+    // println!("{:?}", removal_order);
+    // println!("{:?}", state);
+
+    // let mut memoize_time = Duration::new(0, 0);
+    // let mut bucket_time = Duration::new(0, 0);
+    // let mut component_time = Duration::new(0, 0);
+    // let mut int_time = Duration::new(0, 0);
+    // let mut vec_time = Duration::new(0, 0);
+    // let mut matches_time = Duration::new(0, 0);
+    // let mut sort_time = Duration::new(0, 0);
 
     // Memoization
     if cache.memoize_state(mol, &state, state_index, &removal_order) {
@@ -552,6 +607,36 @@ pub fn recurse_index_search(
         largest_length += 1;
     }
 
+    let mut largest_length2 = 2;
+    let mut best_bound = 0;
+
+    // Vector bounding strategy
+    // Uses edge types of the fragments.
+    for (i, list) in masks.iter().enumerate() {
+        let i = i + 2;
+
+        let mut s: usize = list.iter().map(|frag| frag.len()).sum();
+
+        let mut set = HashSet::new();
+        for frag in list.iter() {
+            for edge in frag.iter() {
+                set.insert(edgetypes[edge]);
+            }
+        }
+        let z = set.len();
+
+        let bound = (s - z) - ((s - z) as f32 / i as f32).ceil() as usize;
+        best_bound = max(best_bound, bound);
+
+        if state_index - best_bound < best {
+            break;
+        }
+
+        largest_length2 += 1;
+    }
+
+    largest_length = max(largest_length, largest_length2);
+
     // Create matches
     for bucket in buckets_by_len[largest_length-2..].iter().rev() {
         for fragments in bucket.values() {
@@ -580,6 +665,14 @@ pub fn recurse_index_search(
     valid_matches.sort();
     valid_matches.reverse();
 
+    // print!("Mem: {} ", memoize_time.as_nanos());
+    // print!("Buk: {} ", bucket_time.as_nanos());
+    // print!("Comp: {} ", component_time.as_nanos());
+    // print!("Int: {} ", int_time.as_nanos());
+    // print!("Vec: {} ", vec_time.as_nanos());
+    // print!("Mat: {} ", matches_time.as_nanos());
+    // print!("Sort: {}\n", sort_time.as_nanos());
+
     // Keep track of the best assembly index found in any of this assembly
     // state's children and the number of states searched, including this one.
     let best_child_index = AtomicUsize::from(state_index);
@@ -587,7 +680,7 @@ pub fn recurse_index_search(
     
     // Define a closure that handles recursing to a new assembly state based on
     // the given (enumerated) pair of non-overlapping isomorphic subgraphs.
-    let  recurse_on_match = |i: usize, v: (usize, usize)| {
+    let recurse_on_match = |i: usize, v: (usize, usize)| {
         //let (h1, h2) = &matches[v];
         let h1 = &dag[v.0].fragment;
         let h2 = &dag[v.1].fragment;
@@ -607,12 +700,13 @@ pub fn recurse_index_search(
                 mol,
                 matches,
                 dag,
+                edgetypes,
                 *matches.get(&v).unwrap(),
                 BitSet::new(),
                 {
                     let mut clone = removal_order.clone();
-                    // clone.push(*matches.get(&v).unwrap());
-                    clone.push(i);
+                    clone.push(*matches.get(&v).unwrap());
+                    // clone.push(i);
                     clone
                 },
                 &fragments,
@@ -727,7 +821,7 @@ pub fn index_search(
     // Catch not-yet-implemented modes.
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
-    let (matches, dag) = matches(mol, canonize_mode);
+    let (matches, dag, edgetypes) = matches(mol, canonize_mode);
 
     // Create memoization cache.
     //let mut cache = Cache::new(memoize_mode, canonize_mode);
@@ -758,6 +852,7 @@ pub fn index_search(
         mol,
         &matches,
         &dag,
+        &edgetypes,
         0,
         subgraph,
         Vec::new(),

From e1650d96bd6c9cb177df714fd7464a0afb7a9543 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Fri, 5 Sep 2025 14:30:35 -0700
Subject: [PATCH 40/51] Move vector bound

---
 src/assembly.rs | 46 ++++++++++++++--------------------------------
 1 file changed, 14 insertions(+), 32 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 48a5dde8..8c8cdc5f 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -580,7 +580,6 @@ pub fn recurse_index_search(
 
 
     let best = best_index.load(Relaxed);
-    let mut best_bound = 0;
     let mut largest_length = 2;
 
     // Use bounding strategy to find the largest length match
@@ -596,46 +595,29 @@ pub fn recurse_index_search(
         }
         bound -= (i as f32).log2().ceil() as usize;
 
-        best_bound = max(best_bound, bound);
-
         // Check if removing at this length can give a more optimal answer
         // If yes, stop and return the largest_length to remove
-        if state_index - best_bound < best {
-            break;
-        }
-
-        largest_length += 1;
-    }
-
-    let mut largest_length2 = 2;
-    let mut best_bound = 0;
-
-    // Vector bounding strategy
-    // Uses edge types of the fragments.
-    for (i, list) in masks.iter().enumerate() {
-        let i = i + 2;
-
-        let mut s: usize = list.iter().map(|frag| frag.len()).sum();
+        if state_index - bound < best {
+            let s: usize = list.iter().map(|frag| frag.len()).sum();
 
-        let mut set = HashSet::new();
-        for frag in list.iter() {
-            for edge in frag.iter() {
-                set.insert(edgetypes[edge]);
+            let mut set = HashSet::new();
+            for frag in list.iter() {
+                for edge in frag.iter() {
+                    set.insert(edgetypes[edge]);
+                }
             }
-        }
-        let z = set.len();
+            let z = set.len();
 
-        let bound = (s - z) - ((s - z) as f32 / i as f32).ceil() as usize;
-        best_bound = max(best_bound, bound);
+            let bound = (s - z) - ((s - z) as f32 / i as f32).ceil() as usize;
 
-        if state_index - best_bound < best {
-            break;
+            if state_index - bound < best {
+                break;
+            }
         }
 
-        largest_length2 += 1;
+        largest_length += 1;
     }
-
-    largest_length = max(largest_length, largest_length2);
+    
 
     // Create matches
     for bucket in buckets_by_len[largest_length-2..].iter().rev() {

From 883444f33512f5d9dd99fd995a2f9468d7790b87 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 9 Sep 2025 11:16:33 -0700
Subject: [PATCH 41/51] Replace bugged vector bound

---
 src/assembly.rs | 85 ++++++++++++++++++++++++++++++++-----------------
 src/main.rs     | 76 +++++++++++++++++++++----------------------
 2 files changed, 93 insertions(+), 68 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 8c8cdc5f..bbf0bdb8 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -434,7 +434,7 @@ pub fn recurse_index_search(
     parallel_mode: ParallelMode,
     kernel_mode: KernelMode,
 ) -> (usize, usize) {
-    // println!("{:?}", removal_order);
+    // if removal_order.len() == 1 {println!("{:?}", removal_order);}
     // println!("{:?}", state);
 
     // let mut memoize_time = Duration::new(0, 0);
@@ -579,45 +579,70 @@ pub fn recurse_index_search(
     state.retain(|frag| frag.len() >= 2);
 
 
+    // Use bounding strategy to find the largest length match
+    // to try removing from this state.
     let best = best_index.load(Relaxed);
     let mut largest_length = 2;
 
-    // Use bounding strategy to find the largest length match
-    // to try removing from this state.
     for (i, list) in masks.iter().enumerate() {
-        let mut bound = 0;
-        let i = i + 2;
-        for (j, frag) in list.iter().enumerate() {
-            let mf = masks[0][j].len();
-            let mi = frag.len();
-            let x = mf + (mi % i) - mi;
-            bound += mf - (mi / i) - x / (i-1) - (x % (i-1) != 0) as usize;
-        }
-        bound -= (i as f32).log2().ceil() as usize;
+        let mut stop = false;
+        for bound_type in bounds {
+            if stop {
+                break;
+            }
 
-        // Check if removing at this length can give a more optimal answer
-        // If yes, stop and return the largest_length to remove
-        if state_index - bound < best {
-            let s: usize = list.iter().map(|frag| frag.len()).sum();
+            match bound_type {
+                Bound::Int => {
+                    let mut bound = 0;
+                    let i = i + 2;
+                    for (j, frag) in list.iter().enumerate() {
+                        let mf = masks[0][j].len();
+                        let mi = frag.len();
+                        let x = mf + (mi % i) - mi;
+                        bound += mf - (mi / i) - (x+i-2) / (i-1);
+                    }
+                    bound -= (i as f32).log2().ceil() as usize;
 
-            let mut set = HashSet::new();
-            for frag in list.iter() {
-                for edge in frag.iter() {
-                    set.insert(edgetypes[edge]);
+                    if state_index - bound >= best {
+                        stop = true;
+                    }
                 }
-            }
-            let z = set.len();
+                Bound::VecSimple => {
+                    let mut set = HashSet::new();
+                    for frag in state.iter() {
+                        for edge in frag.iter() {
+                            set.insert(edgetypes[edge]);
+                        }
+                    }
+                    let z = set.len();
 
-            let bound = (s - z) - ((s - z) as f32 / i as f32).ceil() as usize;
+                    let i = i + 2;
+                    let s: usize = state.iter().map(|frag| frag.len()).sum();
 
-            if state_index - bound < best {
-                break;
+                    let bound = if s == 0 {
+                        0
+                    }
+                    else {
+                        (s - z) - ((s - z) as f32 / i as f32).ceil() as usize
+                    };
+
+                    
+                    if state_index - bound >= best {
+                        stop = true;
+                    }                    
+                }
+                _ => ()
             }
         }
 
-        largest_length += 1;
+        if stop {
+            largest_length += 1;
+        }
+        else {
+            break;
+        }
     }
-    
+
 
     // Create matches
     for bucket in buckets_by_len[largest_length-2..].iter().rev() {
@@ -687,8 +712,8 @@ pub fn recurse_index_search(
                 BitSet::new(),
                 {
                     let mut clone = removal_order.clone();
-                    clone.push(*matches.get(&v).unwrap());
-                    // clone.push(i);
+                    // clone.push(*matches.get(&v).unwrap());
+                    clone.push(i);
                     clone
                 },
                 &fragments,
@@ -847,7 +872,7 @@ pub fn index_search(
         kernel_mode,
     );
 
-    println!("Bounded: {}", cache.count());
+    // println!("Bounded: {}", cache.count());
 
     (index as u32, matches.len() as u32, states_searched)
 }
diff --git a/src/main.rs b/src/main.rs
index 36f2f937..84b0df29 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -123,44 +123,44 @@ fn main() -> Result<()> {
     }
 
 
-    /*use std::ffi::{OsStr, OsString};
-    use std::path::Path;
-    use assembly_theory::molecule::Molecule;
-    use rayon::iter::{IntoParallelIterator, ParallelIterator, IntoParallelRefIterator, IndexedParallelIterator};
-
-    let paths = fs::read_dir(Path::new("data").join("coconut_220")).unwrap();
-    let mut mol_list: Vec<Molecule> = Vec::new();
-    let mut names: Vec<OsString> = Vec::new();
-
-    for path in paths {
-        let name = path.unwrap().path();
-        if name.extension().and_then(OsStr::to_str) != Some("mol") {
-            continue;
-        }
-        names.push(name.file_name().unwrap().to_os_string());
-        mol_list.push(
-            parse_molfile_str(
-                &fs::read_to_string(name.clone())
-                    .expect(&format!("Could not read file {name:?}")),
-            )
-            .expect(&format!("Failed to parse {name:?}")),
-        );
-    }
-
-    names.par_iter().zip(mol_list.par_iter()).for_each(|(n, mol)| {
-        let index = index_search(
-            &mol,
-            cli.enumerate,
-            cli.canonize,
-            cli.parallel,
-            cli.memoize,
-            cli.kernel,            
-            boundlist,
-            cli.clique,
-        );
-        println!("{:?}: MA: {} Space: {}, Searched: {}", n, index.0, index.1, index.2);
-    });
-    std::process::exit(1);*/
+    // use std::ffi::{OsStr, OsString};
+    // use std::path::Path;
+    // use assembly_theory::molecule::Molecule;
+    // use rayon::iter::{IntoParallelIterator, ParallelIterator, IntoParallelRefIterator, IndexedParallelIterator};
+
+    // let paths = fs::read_dir(Path::new("data").join("coconut_220")).unwrap();
+    // let mut mol_list: Vec<Molecule> = Vec::new();
+    // let mut names: Vec<OsString> = Vec::new();
+
+    // for path in paths {
+    //     let name = path.unwrap().path();
+    //     if name.extension().and_then(OsStr::to_str) != Some("mol") {
+    //         continue;
+    //     }
+    //     names.push(name.file_name().unwrap().to_os_string());
+    //     mol_list.push(
+    //         parse_molfile_str(
+    //             &fs::read_to_string(name.clone())
+    //                 .expect(&format!("Could not read file {name:?}")),
+    //         )
+    //         .expect(&format!("Failed to parse {name:?}")),
+    //     );
+    // }
+
+    // names.par_iter().zip(mol_list.par_iter()).for_each(|(n, mol)| {
+    //     let index = index_search(
+    //         &mol,
+    //         cli.enumerate,
+    //         cli.canonize,
+    //         cli.parallel,
+    //         cli.memoize,
+    //         cli.kernel,            
+    //         boundlist,
+    //         cli.clique,
+    //     );
+    //     println!("{:?}: MA: {} Space: {}, Searched: {}", n, index.0, index.1, index.2);
+    // });
+    // std::process::exit(1);
 
 
     // Call index calculation with all the various options.

From 09c2cf51220de5369dc589d425d601a8048e7759 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Tue, 9 Sep 2025 11:19:42 -0700
Subject: [PATCH 42/51] Update cargo.toml

---
 Cargo.lock | 76 ------------------------------------------------------
 Cargo.toml |  1 -
 2 files changed, 77 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 660f0f07..d1c16b66 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,19 +2,6 @@
 # It is not intended for manual editing.
 version = 4
 
-[[package]]
-name = "ahash"
-version = "0.8.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
-dependencies = [
- "cfg-if",
- "getrandom",
- "once_cell",
- "version_check",
- "zerocopy",
-]
-
 [[package]]
 name = "aho-corasick"
 version = "1.1.3"
@@ -84,7 +71,6 @@ checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
 name = "assembly-theory"
 version = "0.4.0"
 dependencies = [
- "ahash",
  "anyhow",
  "bit-set",
  "clap 4.5.41",
@@ -464,18 +450,6 @@ dependencies = [
  "byteorder",
 ]
 
-[[package]]
-name = "getrandom"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
-dependencies = [
- "cfg-if",
- "libc",
- "r-efi",
- "wasi",
-]
-
 [[package]]
 name = "glob"
 version = "0.3.2"
@@ -891,12 +865,6 @@ dependencies = [
  "proc-macro2",
 ]
 
-[[package]]
-name = "r-efi"
-version = "5.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
-
 [[package]]
 name = "radium"
 version = "0.7.0"
@@ -1154,12 +1122,6 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
-[[package]]
-name = "version_check"
-version = "0.9.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
-
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -1170,15 +1132,6 @@ dependencies = [
  "winapi-util",
 ]
 
-[[package]]
-name = "wasi"
-version = "0.14.2+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
-dependencies = [
- "wit-bindgen-rt",
-]
-
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.100"
@@ -1436,15 +1389,6 @@ version = "0.53.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
 
-[[package]]
-name = "wit-bindgen-rt"
-version = "0.39.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
-dependencies = [
- "bitflags 2.9.1",
-]
-
 [[package]]
 name = "wyz"
 version = "0.5.1"
@@ -1453,23 +1397,3 @@ checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
 dependencies = [
  "tap",
 ]
-
-[[package]]
-name = "zerocopy"
-version = "0.8.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
-dependencies = [
- "zerocopy-derive",
-]
-
-[[package]]
-name = "zerocopy-derive"
-version = "0.8.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
diff --git a/Cargo.toml b/Cargo.toml
index 84277549..c24dc767 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,7 +18,6 @@ petgraph = "0.6.5"
 pyo3 = { version = "0.24.1", features = ["abi3-py310", "extension-module"]}
 rayon = "1.10.0"
 dashmap = "6.1.0"
-ahash = "0.8.12"
 fxhash = "0.2.1"
 itertools = "0.14.0"
 

From 535c25174aebfb99953f19100c2a9ee653160df4 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Fri, 12 Sep 2025 14:12:24 -0700
Subject: [PATCH 43/51] Change vector bound

---
 src/assembly.rs | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index bbf0bdb8..4453f1eb 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -585,6 +585,8 @@ pub fn recurse_index_search(
     let mut largest_length = 2;
 
     for (i, list) in masks.iter().enumerate() {
+        let mut b1 = 0;
+
         let mut stop = false;
         for bound_type in bounds {
             if stop {
@@ -608,25 +610,41 @@ pub fn recurse_index_search(
                     }
                 }
                 Bound::VecSimple => {
-                    let mut set = HashSet::new();
-                    for frag in state.iter() {
-                        for edge in frag.iter() {
-                            set.insert(edgetypes[edge]);
+                    let size_two_list = &masks[0];
+                    let mut total_set = HashSet::new();
+                    let mut small_set = HashSet::new();
+                    
+                    for frag in size_two_list.iter() {
+                        for edge in frag {
+                            total_set.insert(edgetypes[edge]);
+                        }
+                    }
+                    for frag in list {
+                        for edge in frag {
+                            small_set.insert(edgetypes[edge]);
                         }
                     }
-                    let z = set.len();
 
+                    let z = total_set.len();
+                    let zi = small_set.len();
                     let i = i + 2;
-                    let s: usize = state.iter().map(|frag| frag.len()).sum();
+                    let s: usize = size_two_list.iter().map(|frag| frag.len()).sum();
+                    let mi: usize = list.iter().map(|frag| frag.len()).sum();
+                    let mf = s - mi;
 
-                    let bound = if s == 0 {
-                        0
-                    }
-                    else {
-                        (s - z) - ((s - z) as f32 / i as f32).ceil() as usize
+                    let bound = s - {
+                        if z < i {
+                            z-1 + (i as f32 / z as f32).log2().ceil() as usize + (mi/i) + ((mi%i)+mf - (z - zi) + i-2)/(i-1)
+                        }
+                        else {
+                            z + (mi-zi)/i + ((mi-zi)%i + mf - (z - zi) + i-2)/(i-1)
+                        }
                     };
 
-                    
+                    if mi == 0 {
+                        break;
+                    }
+
                     if state_index - bound >= best {
                         stop = true;
                     }                    

From d820b0f9883749e9ad095110b59193c76c2cc4f7 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 14:14:17 -0700
Subject: [PATCH 44/51] New matches struct

---
 src/assembly.rs | 528 +-----------------------------------------------
 src/lib.rs      |   1 +
 src/main.rs     |   4 +-
 src/matches.rs  | 512 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 525 insertions(+), 520 deletions(-)
 create mode 100644 src/matches.rs

diff --git a/src/assembly.rs b/src/assembly.rs
index 4453f1eb..958deaa3 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -19,7 +19,7 @@
 //! ```
 
 use std::{
-    collections::{BTreeMap, HashMap, HashSet}, sync::{
+    sync::{
         atomic::{AtomicUsize, Ordering::Relaxed},
         Arc,
     }, time::{Duration, Instant}
@@ -27,22 +27,10 @@ use std::{
 
 use bit_set::BitSet;
 use clap::ValueEnum;
-use graph_canon::canon;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator, IndexedParallelIterator};
-use petgraph::graph::EdgeIndex;
-use itertools::Itertools;
-use std::cmp::{max, min};
-use fxhash::FxHashMap;
 
 use crate::{
-    bounds::{bound_exceeded, Bound},
-    canonize::{canonize, CanonizeMode, Labeling},
-    enumerate::{EnumerateMode},
-    kernels::{KernelMode, deletion_kernel, inclusion_kernel},
-    memoize::{MemoizeMode, NewCache},
-    molecule::{Bond, Element, Molecule},
-    utils::{connected_components_under_edges, edge_neighbors},
-    reductions::CompatGraph,
+    bounds::Bound, canonize::CanonizeMode, enumerate::EnumerateMode, kernels::KernelMode, matches::Matches, memoize::{MemoizeMode, NewCache}, molecule:: Molecule, utils::{connected_components_under_edges}
 };
 
 /// Parallelization strategy for the recursive search phase.
@@ -99,242 +87,6 @@ pub fn depth(mol: &Molecule) -> u32 {
     ix
 }
 
-#[derive(Debug)]
-pub struct DagNode {
-    fragment: BitSet,
-    canon_id: usize,
-    children: Vec<usize>,
-}
-
-impl DagNode {
-    pub fn new(fragment: BitSet, canon_id: usize) -> Self {
-        Self {
-            fragment,
-            canon_id,
-            children: Vec::new(),
-        }
-    }
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-struct EdgeType {
-    bond: Bond,
-    ends: (Element, Element),
-}
-
-/// Helper function for [`index_search`]; only public for benchmarking.
-///
-/// Return all pairs of non-overlapping, isomorphic subgraphs in the molecule,
-/// sorted to guarantee deterministic iteration.
-pub fn matches(mol: &Molecule, canonize_mode: CanonizeMode) -> (FxHashMap<(usize, usize), usize>, Vec<DagNode>, Vec<usize>) {
-    let num_edges = mol.graph().edge_count();
-
-    let mut subgraphs: Vec<usize> = Vec::new();
-    let mut constructed_frags: HashSet<BitSet> = HashSet::new();
-    let mut frag_to_id: HashMap<BitSet, usize> = HashMap::new();
-
-    let mut dag: Vec<DagNode> = Vec::with_capacity(num_edges);
-    let mut matches: Vec<(usize, usize)> = Vec::new();
-
-    let mut next_canon_id = 0;
-
-    // Generate subgraphs with one edge
-    for i in 0..num_edges {
-        let mut new_bitset = BitSet::with_capacity(num_edges);
-        new_bitset.insert(i);
-        
-        let node = DagNode::new(new_bitset, 0);
-
-        dag.push(node);
-        subgraphs.push(i);
-    }
-
-    next_canon_id += 1;
-
-    while subgraphs.len() > 0 {
-        // Initialize new buckets and subgraph list
-        let mut buckets: BTreeMap<Labeling, Vec<(BitSet, usize)>> = BTreeMap::new();
-        let mut child_subgraphs: Vec<usize> = Vec::new();
-
-        // Extend and bucket new subgraphs
-        for parent_idx in subgraphs.iter() {
-            // Get fragment to be extended
-            let fragment = &dag[*parent_idx].fragment;
-
-            // Construct edge neighborhood of fragment
-            let mut neighborhood = BitSet::with_capacity(num_edges);
-            for e in fragment {
-                neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
-            }
-            neighborhood.difference_with(fragment);
-
-            // Extend fragment with edges from neighborhood
-            for e in neighborhood.iter() {
-                // Create extended fragment
-                let mut new_fragment = fragment.clone();
-                new_fragment.insert(e);
-
-                // Check if fragment has already been created
-                // If yes, continue
-                if constructed_frags.contains(&new_fragment) {
-                    continue;
-                }
-
-                // Bucket subgraph
-                // Gives ownership of new_fragment to buckets
-                let canon = canonize(mol, &new_fragment, canonize_mode);
-                buckets.entry(canon)
-                    .and_modify(|bucket| bucket.push((new_fragment.clone(), *parent_idx)))
-                    .or_insert(vec![(new_fragment.clone(), *parent_idx)]);
-            
-                // Add to constructed_frags
-                constructed_frags.insert(new_fragment);
-            }
-        }
-
-        // Go through buckets to create matches
-        for fragments in buckets.values() {
-            // Variables to track if a match has been found
-            let mut frag_has_match = BitSet::with_capacity(fragments.len());
-
-            // Loop through pairs of bitsets to find matches
-            for pair in fragments.iter().enumerate().combinations(2) {
-                // Extract pair of fragments
-                let frag1 = &pair[0].1.0;
-                let frag2 = &pair[1].1.0;
-
-                // Check for disjointness
-                // If the fragments are disjoint, they form a match
-                if frag1.is_disjoint(&frag2) {
-                    // Extract indices of fragments
-                    let idx1 = pair[0].0;
-                    let idx2 = pair[1].0;
-
-                    // If this is the first match for a fragment, create a dag node
-                    if !frag_has_match.contains(idx1) {
-                        // Create new dag node
-                        let new_dag_node = DagNode::new(frag1.clone(), next_canon_id);
-                        let child_idx = dag.len();
-
-                        // Add child to dag list
-                        dag.push(new_dag_node);
-
-                        // Add index to parent's child list
-                        let parent_idx = pair[0].1.1;
-                        let parent = &mut dag[parent_idx];
-                        parent.children.push(child_idx);
-
-                        // Add dag node to next subgraphs list
-                        child_subgraphs.push(child_idx);
-
-                        // Add to frag_to_id map
-                        frag_to_id.insert(frag1.clone(), child_idx);
-                    }
-                    if !frag_has_match.contains(idx2) {
-                        // Create new dag node
-                        let new_dag_node = DagNode::new(frag2.clone(), next_canon_id);
-                        let child_idx = dag.len();
-
-                        // Add child to dag list
-                        dag.push(new_dag_node);
-
-                        // Add index to parent's child list
-                        let parent_idx = pair[1].1.1;
-                        let parent = &mut dag[parent_idx];
-                        parent.children.push(child_idx);
-
-                        // Add dag node to next subgraphs list
-                        child_subgraphs.push(child_idx);
-
-                        // Add to frag_to_id map
-                        frag_to_id.insert(frag2.clone(), child_idx);
-                    }
-
-                    // Get fragment ids and add to matches
-                    let frag1_id = frag_to_id.get(&frag1).unwrap();
-                    let frag2_id = frag_to_id.get(&frag2).unwrap();
-
-                    if frag1_id > frag2_id {
-                        matches.push((*frag1_id, *frag2_id));
-                    }
-                    else {
-                        matches.push((*frag2_id, *frag1_id));
-                    }
-
-                    // Mark that these fragments have matches
-                    frag_has_match.insert(idx1);
-                    frag_has_match.insert(idx2);
-                }
-            }
-
-            // If there was a match, increment canon_id counter
-            if !frag_has_match.is_empty() {
-                next_canon_id += 1;
-            }
-        }
-
-        // Update to the new subgraphs
-        subgraphs = child_subgraphs;
-    }
-
-    // Sort matches
-    // Larger frag_ids get a smaller match_id
-    // Thus fragments with many edges have small ids
-    matches.sort();
-    matches.reverse();
-
-    // give matches ids
-    let mut next_match_id = 0;
-    let mut match_to_id: FxHashMap<(usize, usize), usize> = FxHashMap::default();
-    for m in matches {
-        match_to_id.insert(m, next_match_id);
-        next_match_id += 1;
-    }
-
-    // Create edge type ids
-    let graph = mol.graph();
-    let mut edgetype_to_id: HashMap<EdgeType, usize> = HashMap::new();
-    let mut edgetypes: Vec<usize> = vec![0; num_edges];
-    let mut next_id = 0;
-
-    // Create a list of the elements of the nodes
-    // The list can be indexed by NodeIndex-es
-    let mut node_elements: Vec<Element> = Vec::new();
-    for v in graph.node_weights() {
-        node_elements.push(v.element());
-    }
-
-    // Extract bond types
-    let weights: Vec<Bond> = graph.edge_weights().copied().collect();
-
-    // types will hold an element for every unique edge type in fragment
-    for e in graph.edge_indices() {
-        // Extract bond type and endpoint atom types
-        let bond = weights[e.index()];
-        let (end1, end2) = graph.edge_endpoints(e).expect("Edge Endpoint Error");
-
-        let atom1 = node_elements[end1.index()];
-        let atom2 = node_elements[end2.index()];
-
-        // Create edgetype
-        let ends = if atom1 < atom2 {(atom1, atom2)} else {(atom2, atom1)};
-        let edgetype = EdgeType{bond, ends};
-
-        // Find or create id for this edgetype
-        // and store the edgetype id for this edge
-        if let Some(id) = edgetype_to_id.get(&edgetype) {
-            edgetypes[e.index()] = *id;
-        }
-        else {
-            edgetypes[e.index()] = next_id;
-            edgetype_to_id.insert(edgetype, next_id);
-            next_id += 1;
-        }
-    }
-
-    (match_to_id, dag, edgetypes)
-}
-
 /// Determine the fragments produced from the given assembly state by removing
 /// the given pair of non-overlapping, isomorphic subgraphs and then adding one
 /// back; return `None` if not possible.
@@ -382,7 +134,7 @@ fn fragments(mol: &Molecule, state: &[BitSet], h1: &BitSet, h2: &BitSet) -> Opti
     Some(fragments)
 }
 
-fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<BitSet>>) -> bool {
+fn _is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<BitSet>>) -> bool {
     let f1 = state.iter().enumerate().find(|(_, c)| h1.is_subset(c));
     let f2 = state.iter().enumerate().find(|(_, c)| h2.is_subset(c));
     
@@ -420,11 +172,8 @@ fn is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bit
 #[allow(clippy::too_many_arguments)]
 pub fn recurse_index_search(
     mol: &Molecule,
-    matches: &FxHashMap<(usize, usize), usize>,
-    dag: &Vec<DagNode>,
-    edgetypes: &Vec<usize>,
+    matches: &Matches,
     last_removed: usize,
-    mut subgraph: BitSet,
     removal_order: Vec<usize>,
     state: &[BitSet],
     state_index: usize,
@@ -449,254 +198,8 @@ pub fn recurse_index_search(
     if cache.memoize_state(mol, &state, state_index, &removal_order) {
         return (state_index, 1);
     }
-    
-    // Generate matches
-    let num_edges = mol.graph().edge_count();
-
-    let mut valid_matches: Vec<(usize, usize)> = Vec::new();
-    let mut subgraphs: Vec<(usize, usize)> = Vec::new();
-    let mut masks: Vec<Vec<BitSet>> = Vec::new();
-    let mut buckets_by_len: Vec<HashMap<usize, Vec<(usize, usize)>>> = Vec::new();
-
-    // Create subgraphs of size 1
-    for (state_id, fragment) in state.iter().enumerate() {
-        for edge_idx in fragment.iter() {
-            subgraphs.push((edge_idx, state_id));
-        }
-    }
 
-    while subgraphs.len() > 0 {
-        // Subgraphs to extend in next loop
-        let mut buckets: HashMap<usize, Vec<(usize, usize)>> = HashMap::new();
-        let mut new_subgraphs = Vec::new();
-        let mut used_edge_mask = vec![BitSet::with_capacity(num_edges); state.len()];
-
-        // Extend each subgraph and bucket
-        for (frag_id, state_id) in subgraphs {
-            // Get state frament that this subgraph is contained in
-            let state_frag = &state[state_id];
-
-            // Get ids of extensions of this subgraph
-            let children_ids = &dag[frag_id].children;
-
-            for child_id in children_ids {
-                // Check if this extension is valid
-                // i.e. the extended subgraph is contained in a fragment of state
-                let child_frag = &dag[*child_id].fragment;
-                let valid = child_frag.is_subset(state_frag);
-
-                if valid {
-                    let canon_id = &dag[*child_id].canon_id;
-                    
-                    // Add fragment to bucket
-                    buckets.entry(*canon_id)
-                        .and_modify(|bucket| bucket.push((*child_id, state_id)))
-                        .or_insert(vec![(*child_id, state_id)]);
-                }
-            }
-        }
-
-        // Search through buckets and create matches
-        for fragments in buckets.values_mut() {
-            let mut has_match = BitSet::with_capacity(fragments.len());
-            // Loop over pairs of fragments
-            for i in 0..fragments.len() {
-                for j in i+1..fragments.len() {
-                    if has_match.contains(i) && has_match.contains(j) {
-                        continue;
-                    }
-
-                    let mut frag1 = (i, fragments[i]);
-                    let mut frag2 = (j, fragments[j]);
-
-                    // Order fragments
-                    if frag1.1.0 < frag2.1.0 {
-                        let temp = frag1;
-                        frag1 = frag2;
-                        frag2 = temp;
-                    }
-
-                    let frag1_bucket_idx = frag1.0;
-                    let frag1_id = frag1.1.0;
-                    let frag1_state_id = frag1.1.1;
-
-                    let frag2_bucket_idx = frag2.0;
-                    let frag2_id = frag2.1.0;
-                    let frag2_state_id = frag2.1.1;
-
-                    // Check for valid match
-                    if let Some(x) = matches.get(&(frag1_id, frag2_id)) {
-                        // Only add match if it occurs later in the ordering than
-                        // the last removed match
-                        if *x >= last_removed {
-                            // valid_matches.push((frag1_id, frag2_id));
-
-                            // If this is the first time seeing that this frag has a match,
-                            // add it to the new_subgraphs list and modify mask
-                            if !has_match.contains(frag1_bucket_idx) {
-                                new_subgraphs.push((frag1_id, frag1_state_id));
-                                used_edge_mask[frag1_state_id].union_with(&dag[frag1_id].fragment);
-                                has_match.insert(frag1_bucket_idx);
-                            }
-                            if !has_match.contains(frag2_bucket_idx) {
-                                new_subgraphs.push((frag2_id, frag2_state_id));
-                                used_edge_mask[frag2_state_id].union_with(&dag[frag2_id].fragment);
-                                has_match.insert(frag2_bucket_idx);
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Remove any matchless fragments from bucket
-            let len = fragments.len();
-            for i in (0..len).rev() {
-                if !has_match.contains(i) {
-                    fragments.remove(i);
-                }
-            }     
-        }
-
-        // Add buckets to list
-        buckets_by_len.push(buckets);
-
-        // Add mask to list
-        masks.push(used_edge_mask);
-
-        // Update to new subgraphs
-        subgraphs = new_subgraphs;
-    }
-
-    // Let new state be only the edges which can be matched
-    let mut state = Vec::new();
-
-    for frag in masks[0].iter() {
-        // Add connected components to new state
-        state.extend(connected_components_under_edges(mol.graph(), &frag));
-    }
-
-    // Remove frags of size 1 or less
-    state.retain(|frag| frag.len() >= 2);
-
-
-    // Use bounding strategy to find the largest length match
-    // to try removing from this state.
-    let best = best_index.load(Relaxed);
-    let mut largest_length = 2;
-
-    for (i, list) in masks.iter().enumerate() {
-        let mut b1 = 0;
-
-        let mut stop = false;
-        for bound_type in bounds {
-            if stop {
-                break;
-            }
-
-            match bound_type {
-                Bound::Int => {
-                    let mut bound = 0;
-                    let i = i + 2;
-                    for (j, frag) in list.iter().enumerate() {
-                        let mf = masks[0][j].len();
-                        let mi = frag.len();
-                        let x = mf + (mi % i) - mi;
-                        bound += mf - (mi / i) - (x+i-2) / (i-1);
-                    }
-                    bound -= (i as f32).log2().ceil() as usize;
-
-                    if state_index - bound >= best {
-                        stop = true;
-                    }
-                }
-                Bound::VecSimple => {
-                    let size_two_list = &masks[0];
-                    let mut total_set = HashSet::new();
-                    let mut small_set = HashSet::new();
-                    
-                    for frag in size_two_list.iter() {
-                        for edge in frag {
-                            total_set.insert(edgetypes[edge]);
-                        }
-                    }
-                    for frag in list {
-                        for edge in frag {
-                            small_set.insert(edgetypes[edge]);
-                        }
-                    }
-
-                    let z = total_set.len();
-                    let zi = small_set.len();
-                    let i = i + 2;
-                    let s: usize = size_two_list.iter().map(|frag| frag.len()).sum();
-                    let mi: usize = list.iter().map(|frag| frag.len()).sum();
-                    let mf = s - mi;
-
-                    let bound = s - {
-                        if z < i {
-                            z-1 + (i as f32 / z as f32).log2().ceil() as usize + (mi/i) + ((mi%i)+mf - (z - zi) + i-2)/(i-1)
-                        }
-                        else {
-                            z + (mi-zi)/i + ((mi-zi)%i + mf - (z - zi) + i-2)/(i-1)
-                        }
-                    };
-
-                    if mi == 0 {
-                        break;
-                    }
-
-                    if state_index - bound >= best {
-                        stop = true;
-                    }                    
-                }
-                _ => ()
-            }
-        }
-
-        if stop {
-            largest_length += 1;
-        }
-        else {
-            break;
-        }
-    }
-
-
-    // Create matches
-    for bucket in buckets_by_len[largest_length-2..].iter().rev() {
-        for fragments in bucket.values() {
-            for i in 0..fragments.len() {
-                for j in i+1..fragments.len() {
-                    let mut frag1_id = fragments[i].0;
-                    let mut frag2_id = fragments[j].0;
-
-                    if frag1_id < frag2_id {
-                        let temp = frag1_id;
-                        frag1_id = frag2_id;
-                        frag2_id = temp;
-                    }
-
-                    if let Some(x) = matches.get(&(frag1_id, frag2_id)){
-                        if *x >= last_removed {
-                            valid_matches.push((frag1_id, frag2_id));
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    // Sort matches
-    valid_matches.sort();
-    valid_matches.reverse();
-
-    // print!("Mem: {} ", memoize_time.as_nanos());
-    // print!("Buk: {} ", bucket_time.as_nanos());
-    // print!("Comp: {} ", component_time.as_nanos());
-    // print!("Int: {} ", int_time.as_nanos());
-    // print!("Vec: {} ", vec_time.as_nanos());
-    // print!("Mat: {} ", matches_time.as_nanos());
-    // print!("Sort: {}\n", sort_time.as_nanos());
+    let valid_matches = matches.generate_matches(mol, state, state_index, last_removed, best_index.load(Relaxed), bounds);
 
     // Keep track of the best assembly index found in any of this assembly
     // state's children and the number of states searched, including this one.
@@ -707,8 +210,8 @@ pub fn recurse_index_search(
     // the given (enumerated) pair of non-overlapping isomorphic subgraphs.
     let recurse_on_match = |i: usize, v: (usize, usize)| {
         //let (h1, h2) = &matches[v];
-        let h1 = &dag[v.0].fragment;
-        let h2 = &dag[v.1].fragment;
+        let h1 = matches.get_frag(v.0);
+        let h2 = matches.get_frag(v.1);
         
         // TODO: try keeping track of fragment to elimate some some search
         if let Some(fragments) = fragments(mol, &state, h1, h2) {
@@ -724,10 +227,7 @@ pub fn recurse_index_search(
             let (child_index, child_states_searched) = recurse_index_search(
                 mol,
                 matches,
-                dag,
-                edgetypes,
-                *matches.get(&v).unwrap(),
-                BitSet::new(),
+                matches.get_match_id(&v).unwrap(),
                 {
                     let mut clone = removal_order.clone();
                     // clone.push(*matches.get(&v).unwrap());
@@ -841,12 +341,12 @@ pub fn index_search(
     memoize_mode: MemoizeMode,
     kernel_mode: KernelMode,
     bounds: &[Bound],
-    clique: bool,
+    _clique: bool,
 ) -> (u32, u32, usize) {
     // Catch not-yet-implemented modes.
 
     // Enumerate non-overlapping isomorphic subgraph pairs.
-    let (matches, dag, edgetypes) = matches(mol, canonize_mode);
+    let matches = Matches::new(mol, canonize_mode);
 
     // Create memoization cache.
     //let mut cache = Cache::new(memoize_mode, canonize_mode);
@@ -865,21 +365,13 @@ pub fn index_search(
     let mut init = BitSet::new();
     init.extend(mol.graph().edge_indices().map(|ix| ix.index()));
 
-    let mut subgraph = BitSet::with_capacity(matches.len());
-    for i in 0..matches.len() {
-        subgraph.insert(i);
-    }
-
     // Search for the shortest assembly pathway recursively.
     let edge_count = mol.graph().edge_count();
     let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
-        &dag,
-        &edgetypes,
         0,
-        subgraph,
         Vec::new(),
         &[init],
         edge_count - 1,
diff --git a/src/lib.rs b/src/lib.rs
index 4310afda..25cd9a3a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -53,6 +53,7 @@ pub mod enumerate;
 pub mod kernels;
 pub mod memoize;
 pub mod reductions;
+pub mod matches;
 mod vf3;
 
 // Python wrapper.
diff --git a/src/main.rs b/src/main.rs
index 84b0df29..f526425c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -98,9 +98,9 @@ fn main() -> Result<()> {
     // Handle bounding strategy CLI arguments.
     let boundlist: &[Bound] = match (cli.boundsgroup, cli.clique) {
         // If clique not enable, default to a combination of the integer and vector bounds.
-        (None, false) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags],
+        (None, false) => &[Bound::Int, Bound::VecSimple, /*Bound::VecSmallFrags*/],
         // If clique is enabled, default to combination of int, vec, and clique bounds.
-        (None, true) => &[Bound::Int, Bound::VecSimple, Bound::VecSmallFrags, /*Bound::CliqueBudget,*/ Bound::CoverNoSort],
+        (None, true) => &[Bound::Int, Bound::VecSimple],
         // If --no-bounds is set, do not use any bounds.
         (Some(BoundsGroup {
             no_bounds: true, ..
diff --git a/src/matches.rs b/src/matches.rs
new file mode 100644
index 00000000..43a46f43
--- /dev/null
+++ b/src/matches.rs
@@ -0,0 +1,512 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+
+use bit_set::BitSet;
+use fxhash::FxHashMap;
+use itertools::Itertools;
+use petgraph::graph::EdgeIndex;
+
+use crate::{bounds::Bound, canonize::{canonize, CanonizeMode, Labeling}, molecule::{Bond, Element, Molecule}, reductions::CompatGraph, utils::{connected_components_under_edges, edge_neighbors}};
+
+pub struct DagNode {
+    fragment: BitSet,
+    canon_id: usize,
+    children: Vec<usize>,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+struct EdgeType {
+    bond: Bond,
+    ends: (Element, Element),
+}
+
+pub struct Matches {
+    id_to_match: Vec<(usize, usize)>,
+    match_to_id: FxHashMap<(usize, usize), usize>,
+    dag: Vec<DagNode>,
+    // clique: CompatGraph,
+    // clique_offset: usize,
+    edge_types: Vec<usize>,
+}
+
+
+impl DagNode {
+    pub fn new(fragment: BitSet, canon_id: usize) -> Self {
+        Self {
+            fragment,
+            canon_id,
+            children: Vec::new(),
+        }
+    }
+}
+
+impl Matches {
+    // Generate DAG, clique, and other info from the base molecule.
+    pub fn new(mol: &Molecule, canonize_mode: CanonizeMode) -> Self {
+        let num_edges = mol.graph().edge_count();
+
+        let mut subgraphs: Vec<usize> = Vec::new();
+        let mut constructed_frags: HashSet<BitSet> = HashSet::new();
+        let mut frag_to_id: HashMap<BitSet, usize> = HashMap::new();
+
+        let mut dag: Vec<DagNode> = Vec::with_capacity(num_edges);
+        let mut matches: Vec<(usize, usize)> = Vec::new();
+
+        let mut next_canon_id = 0;
+
+        // Generate subgraphs with one edge
+        for i in 0..num_edges {
+            let mut new_bitset = BitSet::with_capacity(num_edges);
+            new_bitset.insert(i);
+            
+            let node = DagNode::new(new_bitset, 0);
+
+            dag.push(node);
+            subgraphs.push(i);
+        }
+
+        next_canon_id += 1;
+
+        while subgraphs.len() > 0 {
+            // Initialize new buckets and subgraph list
+            let mut buckets: BTreeMap<Labeling, Vec<(BitSet, usize)>> = BTreeMap::new();
+            let mut child_subgraphs: Vec<usize> = Vec::new();
+
+            // Extend and bucket new subgraphs
+            for parent_idx in subgraphs.iter() {
+                // Get fragment to be extended
+                let fragment = &dag[*parent_idx].fragment;
+
+                // Construct edge neighborhood of fragment
+                let mut neighborhood = BitSet::with_capacity(num_edges);
+                for e in fragment {
+                    neighborhood.extend(edge_neighbors(mol.graph(), EdgeIndex::new(e)).map(|x| x.index()));
+                }
+                neighborhood.difference_with(fragment);
+
+                // Extend fragment with edges from neighborhood
+                for e in neighborhood.iter() {
+                    // Create extended fragment
+                    let mut new_fragment = fragment.clone();
+                    new_fragment.insert(e);
+
+                    // Check if fragment has already been created
+                    // If yes, continue
+                    if constructed_frags.contains(&new_fragment) {
+                        continue;
+                    }
+
+                    // Bucket subgraph
+                    // Gives ownership of new_fragment to buckets
+                    let canon = canonize(mol, &new_fragment, canonize_mode);
+                    buckets.entry(canon)
+                        .and_modify(|bucket| bucket.push((new_fragment.clone(), *parent_idx)))
+                        .or_insert(vec![(new_fragment.clone(), *parent_idx)]);
+                
+                    // Add to constructed_frags
+                    constructed_frags.insert(new_fragment);
+                }
+            }
+
+            // Go through buckets to create matches
+            for fragments in buckets.values() {
+                // Variables to track if a match has been found
+                let mut frag_has_match = BitSet::with_capacity(fragments.len());
+
+                // Loop through pairs of bitsets to find matches
+                for pair in fragments.iter().enumerate().combinations(2) {
+                    // Extract pair of fragments
+                    let frag1 = &pair[0].1.0;
+                    let frag2 = &pair[1].1.0;
+
+                    // Check for disjointness
+                    // If the fragments are disjoint, they form a match
+                    if frag1.is_disjoint(&frag2) {
+                        // Extract indices of fragments
+                        let idx1 = pair[0].0;
+                        let idx2 = pair[1].0;
+
+                        // If this is the first match for a fragment, create a dag node
+                        if !frag_has_match.contains(idx1) {
+                            // Create new dag node
+                            let new_dag_node = DagNode::new(frag1.clone(), next_canon_id);
+                            let child_idx = dag.len();
+
+                            // Add child to dag list
+                            dag.push(new_dag_node);
+
+                            // Add index to parent's child list
+                            let parent_idx = pair[0].1.1;
+                            let parent = &mut dag[parent_idx];
+                            parent.children.push(child_idx);
+
+                            // Add dag node to next subgraphs list
+                            child_subgraphs.push(child_idx);
+
+                            // Add to frag_to_id map
+                            frag_to_id.insert(frag1.clone(), child_idx);
+                        }
+                        if !frag_has_match.contains(idx2) {
+                            // Create new dag node
+                            let new_dag_node = DagNode::new(frag2.clone(), next_canon_id);
+                            let child_idx = dag.len();
+
+                            // Add child to dag list
+                            dag.push(new_dag_node);
+
+                            // Add index to parent's child list
+                            let parent_idx = pair[1].1.1;
+                            let parent = &mut dag[parent_idx];
+                            parent.children.push(child_idx);
+
+                            // Add dag node to next subgraphs list
+                            child_subgraphs.push(child_idx);
+
+                            // Add to frag_to_id map
+                            frag_to_id.insert(frag2.clone(), child_idx);
+                        }
+
+                        // Get fragment ids and add to matches
+                        let frag1_id = frag_to_id.get(&frag1).unwrap();
+                        let frag2_id = frag_to_id.get(&frag2).unwrap();
+
+                        if frag1_id > frag2_id {
+                            matches.push((*frag1_id, *frag2_id));
+                        }
+                        else {
+                            matches.push((*frag2_id, *frag1_id));
+                        }
+
+                        // Mark that these fragments have matches
+                        frag_has_match.insert(idx1);
+                        frag_has_match.insert(idx2);
+                    }
+                }
+
+                // If there was a match, increment canon_id counter
+                if !frag_has_match.is_empty() {
+                    next_canon_id += 1;
+                }
+            }
+
+            // Update to the new subgraphs
+            subgraphs = child_subgraphs;
+        }
+
+        // Sort matches
+        // Larger frag_ids get a smaller match_id
+        // Thus fragments with many edges have small ids
+        matches.sort();
+        matches.reverse();
+
+        // Give ids to matches
+        let mut next_match_id = 0;
+        let mut id_to_match = Vec::new();
+        let mut match_to_id: FxHashMap<(usize, usize), usize> = FxHashMap::default();
+
+        for m in matches {
+            id_to_match.push(m);
+            match_to_id.insert(m, next_match_id);
+            next_match_id += 1;
+        }
+
+        // Create edge type ids
+        let graph = mol.graph();
+        let mut edgetype_to_id: HashMap<EdgeType, usize> = HashMap::new();
+        let mut edge_types: Vec<usize> = vec![0; num_edges];
+        let mut next_id = 0;
+
+        // Create a list of the elements of the nodes
+        // The list can be indexed by NodeIndex-es
+        let mut node_elements: Vec<Element> = Vec::new();
+        for v in graph.node_weights() {
+            node_elements.push(v.element());
+        }
+
+        // Extract bond types
+        let weights: Vec<Bond> = graph.edge_weights().copied().collect();
+
+        // types will hold an element for every unique edge type in fragment
+        for e in graph.edge_indices() {
+            // Extract bond type and endpoint atom types
+            let bond = weights[e.index()];
+            let (end1, end2) = graph.edge_endpoints(e).expect("Edge Endpoint Error");
+
+            let atom1 = node_elements[end1.index()];
+            let atom2 = node_elements[end2.index()];
+
+            // Create edgetype
+            let ends = if atom1 < atom2 {(atom1, atom2)} else {(atom2, atom1)};
+            let edgetype = EdgeType{bond, ends};
+
+            // Find or create id for this edgetype
+            // and store the edgetype id for this edge
+            if let Some(id) = edgetype_to_id.get(&edgetype) {
+                edge_types[e.index()] = *id;
+            }
+            else {
+                edge_types[e.index()] = next_id;
+                edgetype_to_id.insert(edgetype, next_id);
+                next_id += 1;
+            }
+        }
+
+        Self {
+            id_to_match,
+            match_to_id,
+            dag,
+            edge_types,
+        }
+    }
+
+    pub fn generate_matches(&self, mol: &Molecule, state: &[BitSet], state_index: usize, last_removed: usize, best: usize, bounds: &[Bound]) -> Vec<(usize, usize)> {
+        let num_edges = mol.graph().edge_count();
+
+        let mut valid_matches: Vec<(usize, usize)> = Vec::new();
+        let mut subgraphs: Vec<(usize, usize)> = Vec::new();
+        let mut masks: Vec<Vec<BitSet>> = Vec::new();
+        let mut buckets_by_len: Vec<HashMap<usize, Vec<(usize, usize)>>> = Vec::new();
+
+        // Create subgraphs of size 1
+        for (state_id, fragment) in state.iter().enumerate() {
+            for edge_idx in fragment.iter() {
+                subgraphs.push((edge_idx, state_id));
+            }
+        }
+
+        while subgraphs.len() > 0 {
+            // Subgraphs to extend in next loop
+            let mut buckets: HashMap<usize, Vec<(usize, usize)>> = HashMap::new();
+            let mut new_subgraphs = Vec::new();
+            let mut used_edge_mask = vec![BitSet::with_capacity(num_edges); state.len()];
+
+            // Extend each subgraph and bucket
+            for (frag_id, state_id) in subgraphs {
+                // Get state frament that this subgraph is contained in
+                let state_frag = &state[state_id];
+
+                // Get ids of extensions of this subgraph
+                let children_ids = &self.dag[frag_id].children;
+
+                for child_id in children_ids {
+                    // Check if this extension is valid
+                    // i.e. the extended subgraph is contained in a fragment of state
+                    let child_frag = &self.dag[*child_id].fragment;
+                    let valid = child_frag.is_subset(state_frag);
+
+                    if valid {
+                        let canon_id = &self.dag[*child_id].canon_id;
+                        
+                        // Add fragment to bucket
+                        buckets.entry(*canon_id)
+                            .and_modify(|bucket| bucket.push((*child_id, state_id)))
+                            .or_insert(vec![(*child_id, state_id)]);
+                    }
+                }
+            }
+
+            // Search through buckets and create matches
+            for fragments in buckets.values_mut() {
+                let mut has_match = BitSet::with_capacity(fragments.len());
+                // Loop over pairs of fragments
+                for i in 0..fragments.len() {
+                    for j in i+1..fragments.len() {
+                        if has_match.contains(i) && has_match.contains(j) {
+                            continue;
+                        }
+
+                        let mut frag1 = (i, fragments[i]);
+                        let mut frag2 = (j, fragments[j]);
+
+                        // Order fragments
+                        if frag1.1.0 < frag2.1.0 {
+                            let temp = frag1;
+                            frag1 = frag2;
+                            frag2 = temp;
+                        }
+
+                        let frag1_bucket_idx = frag1.0;
+                        let frag1_id = frag1.1.0;
+                        let frag1_state_id = frag1.1.1;
+
+                        let frag2_bucket_idx = frag2.0;
+                        let frag2_id = frag2.1.0;
+                        let frag2_state_id = frag2.1.1;
+
+                        // Check for valid match
+                        if let Some(x) = self.match_to_id.get(&(frag1_id, frag2_id)) {
+                            // Only add match if it occurs later in the ordering than
+                            // the last removed match
+                            if *x >= last_removed {
+                                // valid_matches.push((frag1_id, frag2_id));
+
+                                // If this is the first time seeing that this frag has a match,
+                                // add it to the new_subgraphs list and modify mask
+                                if !has_match.contains(frag1_bucket_idx) {
+                                    new_subgraphs.push((frag1_id, frag1_state_id));
+                                    used_edge_mask[frag1_state_id].union_with(&self.dag[frag1_id].fragment);
+                                    has_match.insert(frag1_bucket_idx);
+                                }
+                                if !has_match.contains(frag2_bucket_idx) {
+                                    new_subgraphs.push((frag2_id, frag2_state_id));
+                                    used_edge_mask[frag2_state_id].union_with(&self.dag[frag2_id].fragment);
+                                    has_match.insert(frag2_bucket_idx);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Remove any matchless fragments from bucket
+                let len = fragments.len();
+                for i in (0..len).rev() {
+                    if !has_match.contains(i) {
+                        fragments.remove(i);
+                    }
+                }     
+            }
+
+            // Add buckets to list
+            buckets_by_len.push(buckets);
+
+            // Add mask to list
+            masks.push(used_edge_mask);
+
+            // Update to new subgraphs
+            subgraphs = new_subgraphs;
+        }
+
+        // Let new state be only the edges which can be matched
+        let mut state = Vec::new();
+
+        for frag in masks[0].iter() {
+            // Add connected components to new state
+            state.extend(connected_components_under_edges(mol.graph(), &frag));
+        }
+
+        // Remove frags of size 1 or less
+        state.retain(|frag| frag.len() >= 2);
+
+
+        // Use bounding strategy to find the largest length match
+        // to try removing from this state.
+        let mut largest_length = 2;
+
+        for (i, list) in masks.iter().enumerate() {
+            let mut stop = false;
+            for bound_type in bounds {
+                if stop {
+                    break;
+                }
+
+                match bound_type {
+                    Bound::Int => {
+                        let mut bound = 0;
+                        let i = i + 2;
+                        for (j, frag) in list.iter().enumerate() {
+                            let mf = masks[0][j].len();
+                            let mi = frag.len();
+                            let x = mf + (mi % i) - mi;
+                            bound += mf - (mi / i) - (x+i-2) / (i-1);
+                        }
+                        bound -= (i as f32).log2().ceil() as usize;
+
+                        if state_index - bound >= best {
+                            stop = true;
+                        }
+                    }
+                    Bound::VecSimple => {
+                        let size_two_list = &masks[0];
+                        let mut total_set = HashSet::new();
+                        let mut small_set = HashSet::new();
+                        
+                        for frag in size_two_list.iter() {
+                            for edge in frag {
+                                total_set.insert(self.edge_types[edge]);
+                            }
+                        }
+                        for frag in list {
+                            for edge in frag {
+                                small_set.insert(self.edge_types[edge]);
+                            }
+                        }
+
+                        let z = total_set.len();
+                        let zi = small_set.len();
+                        let i = i + 2;
+                        let s: usize = size_two_list.iter().map(|frag| frag.len()).sum();
+                        let mi: usize = list.iter().map(|frag| frag.len()).sum();
+                        let mf = s - mi;
+
+                        let bound = s - {
+                            if z < i {
+                                z-1 + (i as f32 / z as f32).log2().ceil() as usize + (mi/i) + ((mi%i)+mf - (z - zi) + i-2)/(i-1)
+                            }
+                            else {
+                                z + (mi-zi)/i + ((mi-zi)%i + mf - (z - zi) + i-2)/(i-1)
+                            }
+                        };
+
+                        if mi == 0 {
+                            break;
+                        }
+
+                        if state_index - bound >= best {
+                            stop = true;
+                        }                    
+                    }
+                    _ => ()
+                }
+            }
+
+            if stop {
+                largest_length += 1;
+            }
+            else {
+                break;
+            }
+        }
+
+        // Create matches
+        for bucket in buckets_by_len[largest_length-2..].iter().rev() {
+            for fragments in bucket.values() {
+                for i in 0..fragments.len() {
+                    for j in i+1..fragments.len() {
+                        let mut frag1_id = fragments[i].0;
+                        let mut frag2_id = fragments[j].0;
+
+                        if frag1_id < frag2_id {
+                            let temp = frag1_id;
+                            frag1_id = frag2_id;
+                            frag2_id = temp;
+                        }
+
+                        if let Some(x) = self.match_to_id.get(&(frag1_id, frag2_id)){
+                            if *x >= last_removed {
+                                valid_matches.push((frag1_id, frag2_id));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Sort matches
+        valid_matches.sort();
+        valid_matches.reverse();
+
+        valid_matches
+    }
+
+    pub fn len(&self) -> usize {
+        self.id_to_match.len()
+    }
+
+    pub fn get_frag(&self, id: usize) -> &BitSet {
+        &self.dag[id].fragment
+    }
+
+    pub fn get_match_id(&self, mat: &(usize, usize)) -> Option<usize> {
+        self.match_to_id.get(mat).copied()
+    }
+}
+

From fd418c020ddce3d9194a01cae46a330bad3e152a Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 14:46:59 -0700
Subject: [PATCH 45/51] New state struct

---
 src/assembly.rs | 67 +++++++++++--------------------------------------
 src/lib.rs      |  1 +
 src/matches.rs  | 17 +++++++------
 src/memoize.rs  | 10 +++++---
 src/state.rs    | 54 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 62 deletions(-)
 create mode 100644 src/state.rs

diff --git a/src/assembly.rs b/src/assembly.rs
index 958deaa3..69b661ad 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -30,7 +30,7 @@ use clap::ValueEnum;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator, IndexedParallelIterator};
 
 use crate::{
-    bounds::Bound, canonize::CanonizeMode, enumerate::EnumerateMode, kernels::KernelMode, matches::Matches, memoize::{MemoizeMode, NewCache}, molecule:: Molecule, utils::{connected_components_under_edges}
+    bounds::Bound, canonize::CanonizeMode, enumerate::EnumerateMode, kernels::KernelMode, matches::Matches, memoize::{MemoizeMode, NewCache}, molecule:: Molecule, state::State, utils::connected_components_under_edges
 };
 
 /// Parallelization strategy for the recursive search phase.
@@ -173,37 +173,25 @@ fn _is_usable(state: &[BitSet], h1: &BitSet, h2: &BitSet, masks: &mut Vec<Vec<Bi
 pub fn recurse_index_search(
     mol: &Molecule,
     matches: &Matches,
-    last_removed: usize,
-    removal_order: Vec<usize>,
-    state: &[BitSet],
-    state_index: usize,
+    state: &State,
     best_index: Arc<AtomicUsize>,
     bounds: &[Bound],
     cache: &mut NewCache,
     parallel_mode: ParallelMode,
-    kernel_mode: KernelMode,
 ) -> (usize, usize) {
     // if removal_order.len() == 1 {println!("{:?}", removal_order);}
     // println!("{:?}", state);
 
-    // let mut memoize_time = Duration::new(0, 0);
-    // let mut bucket_time = Duration::new(0, 0);
-    // let mut component_time = Duration::new(0, 0);
-    // let mut int_time = Duration::new(0, 0);
-    // let mut vec_time = Duration::new(0, 0);
-    // let mut matches_time = Duration::new(0, 0);
-    // let mut sort_time = Duration::new(0, 0);
-
     // Memoization
-    if cache.memoize_state(mol, &state, state_index, &removal_order) {
-        return (state_index, 1);
+    if cache.memoize_state(mol, state) {
+        return (state.index(), 1);
     }
 
-    let valid_matches = matches.generate_matches(mol, state, state_index, last_removed, best_index.load(Relaxed), bounds);
+    let (frags, valid_matches) = matches.generate_matches(mol, state, best_index.load(Relaxed), bounds);
 
     // Keep track of the best assembly index found in any of this assembly
     // state's children and the number of states searched, including this one.
-    let best_child_index = AtomicUsize::from(state_index);
+    let best_child_index = AtomicUsize::from(state.index());
     let states_searched = AtomicUsize::from(1);
     
     // Define a closure that handles recursing to a new assembly state based on
@@ -214,7 +202,7 @@ pub fn recurse_index_search(
         let h2 = matches.get_frag(v.1);
         
         // TODO: try keeping track of fragment to elimate some some search
-        if let Some(fragments) = fragments(mol, &state, h1, h2) {
+        if let Some(fragments) = fragments(mol, &frags, h1, h2) {
             // If using depth-one parallelism, all descendant states should be
             // computed serially.
             let new_parallel = if parallel_mode == ParallelMode::DepthOne {
@@ -223,24 +211,17 @@ pub fn recurse_index_search(
                 parallel_mode
             };
 
+            let new_state = state.update(fragments, h1.len(), i, matches.match_id(&v).unwrap());
+
             // Recurse using the remaining matches and updated fragments.
             let (child_index, child_states_searched) = recurse_index_search(
                 mol,
                 matches,
-                matches.get_match_id(&v).unwrap(),
-                {
-                    let mut clone = removal_order.clone();
-                    // clone.push(*matches.get(&v).unwrap());
-                    clone.push(i);
-                    clone
-                },
-                &fragments,
-                state_index - h1.len() + 1,
+                &new_state,
                 best_index.clone(),
                 bounds,
                 &mut cache.clone(),
                 new_parallel,
-                kernel_mode,
             );
 
             // Update the best assembly indices (across children states and
@@ -339,47 +320,29 @@ pub fn index_search(
     canonize_mode: CanonizeMode,
     parallel_mode: ParallelMode,
     memoize_mode: MemoizeMode,
-    kernel_mode: KernelMode,
+    _kernel_mode: KernelMode,
     bounds: &[Bound],
     _clique: bool,
 ) -> (u32, u32, usize) {
-    // Catch not-yet-implemented modes.
-
     // Enumerate non-overlapping isomorphic subgraph pairs.
     let matches = Matches::new(mol, canonize_mode);
 
     // Create memoization cache.
-    //let mut cache = Cache::new(memoize_mode, canonize_mode);
     let mut cache = NewCache::new(memoize_mode);
 
-    /*let graph = {
-        if clique {
-            Some(CompatGraph::new(&matches, &dag))
-        }
-        else {
-            None
-        }
-    };*/
-
-    // Initialize the first fragment as the entire graph.
-    let mut init = BitSet::new();
-    init.extend(mol.graph().edge_indices().map(|ix| ix.index()));
+    // Initialize state
+    let state = State::new(mol);
 
     // Search for the shortest assembly pathway recursively.
-    let edge_count = mol.graph().edge_count();
-    let best_index = Arc::new(AtomicUsize::from(edge_count - 1));
+    let best_index = Arc::new(AtomicUsize::from(mol.graph().edge_count() - 1));
     let (index, states_searched) = recurse_index_search(
         mol,
         &matches,
-        0,
-        Vec::new(),
-        &[init],
-        edge_count - 1,
+        &state,
         best_index,
         bounds,
         &mut cache,
         parallel_mode,
-        kernel_mode,
     );
 
     // println!("Bounded: {}", cache.count());
diff --git a/src/lib.rs b/src/lib.rs
index 25cd9a3a..80529e9d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -54,6 +54,7 @@ pub mod kernels;
 pub mod memoize;
 pub mod reductions;
 pub mod matches;
+pub mod state;
 mod vf3;
 
 // Python wrapper.
diff --git a/src/matches.rs b/src/matches.rs
index 43a46f43..818fd012 100644
--- a/src/matches.rs
+++ b/src/matches.rs
@@ -5,7 +5,7 @@ use fxhash::FxHashMap;
 use itertools::Itertools;
 use petgraph::graph::EdgeIndex;
 
-use crate::{bounds::Bound, canonize::{canonize, CanonizeMode, Labeling}, molecule::{Bond, Element, Molecule}, reductions::CompatGraph, utils::{connected_components_under_edges, edge_neighbors}};
+use crate::{bounds::Bound, canonize::{canonize, CanonizeMode, Labeling}, molecule::{Bond, Element, Molecule}, reductions::CompatGraph, state::State, utils::{connected_components_under_edges, edge_neighbors}};
 
 pub struct DagNode {
     fragment: BitSet,
@@ -258,8 +258,11 @@ impl Matches {
         }
     }
 
-    pub fn generate_matches(&self, mol: &Molecule, state: &[BitSet], state_index: usize, last_removed: usize, best: usize, bounds: &[Bound]) -> Vec<(usize, usize)> {
+    pub fn generate_matches(&self, mol: &Molecule, state: &State, best: usize, bounds: &[Bound]) -> (Vec<BitSet>, Vec<(usize, usize)>) {
         let num_edges = mol.graph().edge_count();
+        let state_frags = state.fragments();
+        let state_index = state.index();
+        let last_removed = state.last_removed();
 
         let mut valid_matches: Vec<(usize, usize)> = Vec::new();
         let mut subgraphs: Vec<(usize, usize)> = Vec::new();
@@ -267,7 +270,7 @@ impl Matches {
         let mut buckets_by_len: Vec<HashMap<usize, Vec<(usize, usize)>>> = Vec::new();
 
         // Create subgraphs of size 1
-        for (state_id, fragment) in state.iter().enumerate() {
+        for (state_id, fragment) in state_frags.iter().enumerate() {
             for edge_idx in fragment.iter() {
                 subgraphs.push((edge_idx, state_id));
             }
@@ -277,12 +280,12 @@ impl Matches {
             // Subgraphs to extend in next loop
             let mut buckets: HashMap<usize, Vec<(usize, usize)>> = HashMap::new();
             let mut new_subgraphs = Vec::new();
-            let mut used_edge_mask = vec![BitSet::with_capacity(num_edges); state.len()];
+            let mut used_edge_mask = vec![BitSet::with_capacity(num_edges); state_frags.len()];
 
             // Extend each subgraph and bucket
             for (frag_id, state_id) in subgraphs {
                 // Get state frament that this subgraph is contained in
-                let state_frag = &state[state_id];
+                let state_frag = &state_frags[state_id];
 
                 // Get ids of extensions of this subgraph
                 let children_ids = &self.dag[frag_id].children;
@@ -494,7 +497,7 @@ impl Matches {
         valid_matches.sort();
         valid_matches.reverse();
 
-        valid_matches
+        (state, valid_matches)
     }
 
     pub fn len(&self) -> usize {
@@ -505,7 +508,7 @@ impl Matches {
         &self.dag[id].fragment
     }
 
-    pub fn get_match_id(&self, mat: &(usize, usize)) -> Option<usize> {
+    pub fn match_id(&self, mat: &(usize, usize)) -> Option<usize> {
         self.match_to_id.get(mat).copied()
     }
 }
diff --git a/src/memoize.rs b/src/memoize.rs
index a251639e..3d4e5572 100644
--- a/src/memoize.rs
+++ b/src/memoize.rs
@@ -9,7 +9,7 @@ use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
 
 use crate::{
     canonize::{canonize, CanonizeMode, Labeling},
-    molecule::Molecule,
+    molecule::Molecule, state::State,
 };
 
 /// Strategy for memoizing assembly states in the search phase.
@@ -160,13 +160,17 @@ impl NewCache {
         }
     }
 
-    pub fn memoize_state(&mut self, mol: &Molecule, state: &[BitSet], state_index: usize, removal_order: &Vec<usize>) -> bool {
+    pub fn memoize_state(&mut self, mol: &Molecule, state: &State) -> bool {
+        let fragments = state.fragments();
+        let state_index = state.index();
+        let removal_order = state.removal_order();
+        
         if self.memoize_mode == MemoizeMode::None {
             return false;
         }
         
         let mut frag_ids = Vec::new();
-        for frag in state {
+        for frag in fragments {
             let id = self.get_canon_id(mol, frag);
             frag_ids.push(id);
         }
diff --git a/src/state.rs b/src/state.rs
new file mode 100644
index 00000000..4193bf2c
--- /dev/null
+++ b/src/state.rs
@@ -0,0 +1,54 @@
+use bit_set::BitSet;
+
+use crate::molecule::Molecule;
+
+pub struct State {
+    fragments: Vec<BitSet>,
+    index: usize,
+    removal_order: Vec<usize>,
+    last_removed: usize,
+}
+
+impl State {
+    pub fn new(mol: &Molecule) -> Self {
+        Self {
+            fragments: {
+                let mut init = BitSet::new();
+                init.extend(mol.graph().edge_indices().map(|ix| ix.index()));
+                vec![init]
+            },
+            index: mol.graph().edge_count(),
+            removal_order: Vec::new(),
+            last_removed: 0,
+        }
+    }
+
+    pub fn update(&self, fragments: Vec<BitSet>, removed_len: usize, removed_idx: usize, last_removed: usize) -> Self {
+        Self {
+            fragments,
+            index: self.index - removed_len + 1,
+            removal_order: {
+                let mut clone = self.removal_order.clone();
+                clone.push(removed_idx);
+                clone
+            },
+            last_removed,
+        }
+    }
+
+    pub fn fragments(&self) -> &[BitSet] {
+        &self.fragments
+    }
+
+    pub fn index(&self) -> usize {
+        self.index
+    }
+
+    pub fn removal_order(&self) -> &Vec<usize> {
+        &self.removal_order
+    }
+
+    pub fn last_removed(&self) -> usize {
+        self.last_removed
+    }
+}
\ No newline at end of file

From 688bbc685d22bbe92f96737c57f8949bd4b743bf Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 15:11:27 -0700
Subject: [PATCH 46/51] Refactor bounding

---
 src/assembly.rs |  1 +
 src/matches.rs  | 66 +++++++++++++++++++++++++++----------------------
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 69b661ad..df1fe6ac 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -187,6 +187,7 @@ pub fn recurse_index_search(
         return (state.index(), 1);
     }
 
+    // Generate matches
     let (frags, valid_matches) = matches.generate_matches(mol, state, best_index.load(Relaxed), bounds);
 
     // Keep track of the best assembly index found in any of this assembly
diff --git a/src/matches.rs b/src/matches.rs
index 818fd012..79576c5c 100644
--- a/src/matches.rs
+++ b/src/matches.rs
@@ -392,8 +392,44 @@ impl Matches {
 
         // Use bounding strategy to find the largest length match
         // to try removing from this state.
+        let largest_length = self.bound(state_index, best, &masks, bounds);
+
+        // Create matches
+        for bucket in buckets_by_len[largest_length-2..].iter().rev() {
+            for fragments in bucket.values() {
+                for i in 0..fragments.len() {
+                    for j in i+1..fragments.len() {
+                        let mut frag1_id = fragments[i].0;
+                        let mut frag2_id = fragments[j].0;
+
+                        if frag1_id < frag2_id {
+                            let temp = frag1_id;
+                            frag1_id = frag2_id;
+                            frag2_id = temp;
+                        }
+
+                        if let Some(x) = self.match_to_id.get(&(frag1_id, frag2_id)){
+                            if *x >= last_removed {
+                                valid_matches.push((frag1_id, frag2_id));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Sort matches
+        valid_matches.sort();
+        valid_matches.reverse();
+
+        (state, valid_matches)
+    }
+
+    // Return the largest size removal that can possible result in savings
+    pub fn bound(&self, state_index: usize, best: usize, masks: &Vec<Vec<BitSet>>, bounds: &[Bound]) -> usize {        
         let mut largest_length = 2;
 
+
         for (i, list) in masks.iter().enumerate() {
             let mut stop = false;
             for bound_type in bounds {
@@ -469,35 +505,7 @@ impl Matches {
             }
         }
 
-        // Create matches
-        for bucket in buckets_by_len[largest_length-2..].iter().rev() {
-            for fragments in bucket.values() {
-                for i in 0..fragments.len() {
-                    for j in i+1..fragments.len() {
-                        let mut frag1_id = fragments[i].0;
-                        let mut frag2_id = fragments[j].0;
-
-                        if frag1_id < frag2_id {
-                            let temp = frag1_id;
-                            frag1_id = frag2_id;
-                            frag2_id = temp;
-                        }
-
-                        if let Some(x) = self.match_to_id.get(&(frag1_id, frag2_id)){
-                            if *x >= last_removed {
-                                valid_matches.push((frag1_id, frag2_id));
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        // Sort matches
-        valid_matches.sort();
-        valid_matches.reverse();
-
-        (state, valid_matches)
+        largest_length
     }
 
     pub fn len(&self) -> usize {

From 39b72c413805392a24f540e213bf3583700a94ac Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 15:44:05 -0700
Subject: [PATCH 47/51] Build clique

---
 src/matches.rs    | 13 ++++++++++++-
 src/reductions.rs | 32 ++++++++++++++++++++++----------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/matches.rs b/src/matches.rs
index 79576c5c..b60c5045 100644
--- a/src/matches.rs
+++ b/src/matches.rs
@@ -23,7 +23,7 @@ pub struct Matches {
     id_to_match: Vec<(usize, usize)>,
     match_to_id: FxHashMap<(usize, usize), usize>,
     dag: Vec<DagNode>,
-    // clique: CompatGraph,
+    clique: CompatGraph,
     // clique_offset: usize,
     edge_types: Vec<usize>,
 }
@@ -37,6 +37,14 @@ impl DagNode {
             children: Vec::new(),
         }
     }
+
+    pub fn fragment(&self) -> &BitSet {
+        &self.fragment
+    }
+
+    pub fn len(&self) -> usize {
+        self.fragment.len()
+    }
 }
 
 impl Matches {
@@ -250,10 +258,13 @@ impl Matches {
             }
         }
 
+        let clique = CompatGraph::new(&dag, &id_to_match, 2);
+
         Self {
             id_to_match,
             match_to_id,
             dag,
+            clique,
             edge_types,
         }
     }
diff --git a/src/reductions.rs b/src/reductions.rs
index f7e699c6..71f75c79 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -11,35 +11,46 @@
 
 use bit_set::BitSet;
 use std::time::Instant;
-use std::collections::HashMap;
+
+use crate::matches::DagNode;
 
 /// Graph representation of the compatibility of duplicatable subgraph pairs.
 pub struct CompatGraph {
     /// The graph is implemented as an adjacency matrix. The ith element of graph
     /// has a 1 at position j if {i, j} is an edge.
     graph: Vec<BitSet>,
+    offset: usize,
 }
 
 impl CompatGraph {
     /// Constructs a compatibility graph given a set of matches.
-    pub fn new(init_matches: &Vec<(usize, usize)>, dag: &HashMap<usize, (BitSet, Vec<usize>)>) -> Self {
+    pub fn new(dag: &Vec<DagNode>, matches: &Vec<(usize, usize)>, largest_len: usize) -> Self {
         let start = Instant::now();
-        let size = init_matches.len();
+
+        // Find the index of the first match in the clique
+        // TODO: what if largest_len < maximum length in dag?
+        let mut offset = 0;
+        while dag[matches[offset].0].len() < largest_len {
+            offset += 1;
+        }
+
+        // Compute number of nodes in clique
+        let size = matches.len() - offset;
 
         // Initialize weights and empty graph
         let mut init_graph: Vec<BitSet> = Vec::with_capacity(size);
-        for _ in 0..init_matches.len() {
+        for _ in 0..size {
             init_graph.push(BitSet::with_capacity(size));
         }
 
         // Populate graph
-        for (idx1, (h1, h2)) in init_matches.iter().enumerate() {
-            for (idx2, (h1p, h2p)) in init_matches[idx1 + 1..].iter().enumerate() {
+        for (idx1, match1) in matches[offset..].iter().enumerate() {
+            for (idx2, match2) in matches[offset + idx1 + 1..].iter().enumerate() {
                 let idx2 = idx2 + idx1 + 1;
-                let h1 = &dag.get(h1).unwrap().0;
-                let h1p = &dag.get(h1p).unwrap().0;
-                let h2 = &dag.get(h2).unwrap().0;
-                let h2p = &dag.get(h2p).unwrap().0;
+                let h1 = dag[match1.0].fragment();
+                let h2 = dag[match1.1].fragment();
+                let h1p = dag[match2.0].fragment();
+                let h2p = dag[match2.1].fragment();
 
                 let forward_compatible = {
                     h2.is_disjoint(h1p) && 
@@ -59,6 +70,7 @@ impl CompatGraph {
 
         Self {
             graph: init_graph,
+            offset,
         }
     }
 

From 9a64cad57de0ab70e2486b70f1e94b4b11bc7986 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 15:53:25 -0700
Subject: [PATCH 48/51] Fix off by 1 error in state

---
 src/state.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/state.rs b/src/state.rs
index 4193bf2c..1d709b9b 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -17,7 +17,7 @@ impl State {
                 init.extend(mol.graph().edge_indices().map(|ix| ix.index()));
                 vec![init]
             },
-            index: mol.graph().edge_count(),
+            index: mol.graph().edge_count() - 1,
             removal_order: Vec::new(),
             last_removed: 0,
         }

From eae23de3ce0d77b1d997c07761408aabe0b38540 Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 16:09:35 -0700
Subject: [PATCH 49/51] Make clique an option

---
 src/assembly.rs |  4 ++--
 src/matches.rs  | 11 ++++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index df1fe6ac..74825f77 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -323,10 +323,10 @@ pub fn index_search(
     memoize_mode: MemoizeMode,
     _kernel_mode: KernelMode,
     bounds: &[Bound],
-    _clique: bool,
+    clique: bool,
 ) -> (u32, u32, usize) {
     // Enumerate non-overlapping isomorphic subgraph pairs.
-    let matches = Matches::new(mol, canonize_mode);
+    let matches = Matches::new(mol, canonize_mode, clique);
 
     // Create memoization cache.
     let mut cache = NewCache::new(memoize_mode);
diff --git a/src/matches.rs b/src/matches.rs
index b60c5045..5e753e9a 100644
--- a/src/matches.rs
+++ b/src/matches.rs
@@ -23,7 +23,7 @@ pub struct Matches {
     id_to_match: Vec<(usize, usize)>,
     match_to_id: FxHashMap<(usize, usize), usize>,
     dag: Vec<DagNode>,
-    clique: CompatGraph,
+    clique: Option<CompatGraph>,
     // clique_offset: usize,
     edge_types: Vec<usize>,
 }
@@ -49,7 +49,7 @@ impl DagNode {
 
 impl Matches {
     // Generate DAG, clique, and other info from the base molecule.
-    pub fn new(mol: &Molecule, canonize_mode: CanonizeMode) -> Self {
+    pub fn new(mol: &Molecule, canonize_mode: CanonizeMode, use_clique: bool) -> Self {
         let num_edges = mol.graph().edge_count();
 
         let mut subgraphs: Vec<usize> = Vec::new();
@@ -258,7 +258,12 @@ impl Matches {
             }
         }
 
-        let clique = CompatGraph::new(&dag, &id_to_match, 2);
+        let clique = if use_clique {
+            Some(CompatGraph::new(&dag, &id_to_match, 2))
+        }
+        else {
+            None
+        };
 
         Self {
             id_to_match,

From 19533da28ee25e9803342ff005fa942d74103aab Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 18:16:58 -0700
Subject: [PATCH 50/51] Add handoff from dag to clique

---
 src/assembly.rs   |  9 ++++---
 src/matches.rs    | 69 +++++++++++++++++++++++++++++++++++++++++------
 src/reductions.rs | 21 ++++++++++++---
 src/state.rs      | 26 ++++++++++++++----
 4 files changed, 106 insertions(+), 19 deletions(-)

diff --git a/src/assembly.rs b/src/assembly.rs
index 74825f77..315e84fc 100644
--- a/src/assembly.rs
+++ b/src/assembly.rs
@@ -181,14 +181,17 @@ pub fn recurse_index_search(
 ) -> (usize, usize) {
     // if removal_order.len() == 1 {println!("{:?}", removal_order);}
     // println!("{:?}", state);
-
+    
     // Memoization
     if cache.memoize_state(mol, state) {
         return (state.index(), 1);
     }
 
     // Generate matches
-    let (frags, valid_matches) = matches.generate_matches(mol, state, best_index.load(Relaxed), bounds);
+    // Some additional fragmentation of the state may happen while generating matches, returned as frags
+    // Valid matches is list of matches to remove as pairs of fragment ids
+    // Clique subgraph is the nodes present in the clique at this state
+    let (frags, valid_matches, clique_subgraph) = matches.generate_matches(mol, state, best_index.load(Relaxed), bounds);
 
     // Keep track of the best assembly index found in any of this assembly
     // state's children and the number of states searched, including this one.
@@ -212,7 +215,7 @@ pub fn recurse_index_search(
                 parallel_mode
             };
 
-            let new_state = state.update(fragments, h1.len(), i, matches.match_id(&v).unwrap());
+            let new_state = state.update(matches, fragments, &v, i, &clique_subgraph);
 
             // Recurse using the remaining matches and updated fragments.
             let (child_index, child_states_searched) = recurse_index_search(
diff --git a/src/matches.rs b/src/matches.rs
index 5e753e9a..8fbd5a55 100644
--- a/src/matches.rs
+++ b/src/matches.rs
@@ -49,6 +49,7 @@ impl DagNode {
 
 impl Matches {
     // Generate DAG, clique, and other info from the base molecule.
+    // TODO: generate and return state framework instaed of triplet
     pub fn new(mol: &Molecule, canonize_mode: CanonizeMode, use_clique: bool) -> Self {
         let num_edges = mol.graph().edge_count();
 
@@ -274,7 +275,18 @@ impl Matches {
         }
     }
 
-    pub fn generate_matches(&self, mol: &Molecule, state: &State, best: usize, bounds: &[Bound]) -> (Vec<BitSet>, Vec<(usize, usize)>) {
+    pub fn generate_matches(&self, mol: &Molecule, state: &State, best: usize, bounds: &[Bound]) -> (Vec<BitSet>, Vec<(usize, usize)>, Option<BitSet>) {
+        self.valid_matches_dag(mol, state, best, bounds)
+        
+        // if state.use_subgraph() {
+        //     self.valid_matches_subgraph()
+        // }
+        // else {
+        //     self.valid_matches_dag()
+        // }
+    }
+
+    fn valid_matches_dag(&self, mol: &Molecule, state: &State, best: usize, bounds: &[Bound]) -> (Vec<BitSet>, Vec<(usize, usize)>, Option<BitSet>) {
         let num_edges = mol.graph().edge_count();
         let state_frags = state.fragments();
         let state_index = state.index();
@@ -410,8 +422,28 @@ impl Matches {
         // to try removing from this state.
         let largest_length = self.bound(state_index, best, &masks, bounds);
 
-        // Create matches
-        for bucket in buckets_by_len[largest_length-2..].iter().rev() {
+        // Clique reduction subgraph from this state. If none we do not create a subgraph
+        let mut clique_subgraph = 
+            if let Some(clique) = &self.clique {
+                if clique.max_len() >= largest_length {
+                    Some(BitSet::with_capacity(clique.len()))
+                }
+                else {
+                    None
+                }
+            } else {
+                None
+            };  
+
+        // Create valid matches and subgraph if needed
+        for (bucket_idx, bucket) in buckets_by_len.iter().enumerate().rev() {
+            // If we do not have to create the subgraph, and buckets with this size
+            // have been bounded, then we can stop generating valid matches
+            let match_len = bucket_idx + 2;
+            if clique_subgraph == None && match_len < largest_length {
+                break;
+            }
+
             for fragments in bucket.values() {
                 for i in 0..fragments.len() {
                     for j in i+1..fragments.len() {
@@ -424,9 +456,17 @@ impl Matches {
                             frag2_id = temp;
                         }
 
-                        if let Some(x) = self.match_to_id.get(&(frag1_id, frag2_id)){
-                            if *x >= last_removed {
+                        if let Some(match_id) = self.match_to_id.get(&(frag1_id, frag2_id)) {
+                            if *match_id >= last_removed {
                                 valid_matches.push((frag1_id, frag2_id));
+                                
+                                // Add to subgraph if conditions are met
+                                if let Some(sub) = clique_subgraph.as_mut() {
+                                    let clique_id = self.clique.as_ref().unwrap().clique_id(*match_id);
+                                    if clique_id != -1 {
+                                        sub.insert(clique_id as usize);
+                                    }
+                                }
                             }
                         }
                     }
@@ -438,7 +478,11 @@ impl Matches {
         valid_matches.sort();
         valid_matches.reverse();
 
-        (state, valid_matches)
+        (state, valid_matches, clique_subgraph)
+    }
+
+    fn valid_matches_subgraph(&self) {
+        
     }
 
     // Return the largest size removal that can possible result in savings
@@ -528,12 +572,21 @@ impl Matches {
         self.id_to_match.len()
     }
 
-    pub fn get_frag(&self, id: usize) -> &BitSet {
-        &self.dag[id].fragment
+    pub fn get_frag(&self, frag_id: usize) -> &BitSet {
+        &self.dag[frag_id].fragment
     }
 
     pub fn match_id(&self, mat: &(usize, usize)) -> Option<usize> {
         self.match_to_id.get(mat).copied()
     }
+
+    pub fn clique_max_len(&self) -> usize {
+        if let Some(clique) = &self.clique {
+            clique.max_len()
+        }
+        else {
+            0
+        }
+    }
 }
 
diff --git a/src/reductions.rs b/src/reductions.rs
index 71f75c79..4e27300e 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -19,18 +19,19 @@ pub struct CompatGraph {
     /// The graph is implemented as an adjacency matrix. The ith element of graph
     /// has a 1 at position j if {i, j} is an edge.
     graph: Vec<BitSet>,
+    max_len: usize,
     offset: usize,
 }
 
 impl CompatGraph {
     /// Constructs a compatibility graph given a set of matches.
-    pub fn new(dag: &Vec<DagNode>, matches: &Vec<(usize, usize)>, largest_len: usize) -> Self {
+    pub fn new(dag: &Vec<DagNode>, matches: &Vec<(usize, usize)>, max_len: usize) -> Self {
         let start = Instant::now();
 
         // Find the index of the first match in the clique
-        // TODO: what if largest_len < maximum length in dag?
+        // TODO: what if max_len < maximum length in dag?
         let mut offset = 0;
-        while dag[matches[offset].0].len() < largest_len {
+        while dag[matches[offset].0].len() < max_len {
             offset += 1;
         }
 
@@ -70,6 +71,7 @@ impl CompatGraph {
 
         Self {
             graph: init_graph,
+            max_len,
             offset,
         }
     }
@@ -79,6 +81,19 @@ impl CompatGraph {
         self.graph.len()
     }
 
+    pub fn max_len(&self) -> usize {
+        self.max_len
+    }
+
+    pub fn clique_id(&self, match_id: usize) -> isize {
+        if match_id < self.offset {
+            -1
+        }
+        else {
+            (match_id - self.offset) as isize
+        }
+    }
+
     /// Returns the degree vertex v in the subgraph
     pub fn degree(&self, v: usize, subgraph: &BitSet) -> usize {
         self.graph[v].intersection(subgraph).count()
diff --git a/src/state.rs b/src/state.rs
index 1d709b9b..69d4f145 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -1,9 +1,10 @@
 use bit_set::BitSet;
 
-use crate::molecule::Molecule;
+use crate::{matches::Matches, molecule::Molecule};
 
 pub struct State {
     fragments: Vec<BitSet>,
+    clique_subgraph: Option<BitSet>,
     index: usize,
     removal_order: Vec<usize>,
     last_removed: usize,
@@ -17,22 +18,33 @@ impl State {
                 init.extend(mol.graph().edge_indices().map(|ix| ix.index()));
                 vec![init]
             },
+            clique_subgraph: None,
             index: mol.graph().edge_count() - 1,
             removal_order: Vec::new(),
             last_removed: 0,
         }
     }
 
-    pub fn update(&self, fragments: Vec<BitSet>, removed_len: usize, removed_idx: usize, last_removed: usize) -> Self {
+    pub fn update(&self, matches: &Matches, fragments: Vec<BitSet>, mat: &(usize, usize), order_idx: usize, subgraph: &Option<BitSet>) -> Self {        
+        let match_len = matches.get_frag(mat.0).len();
+        
         Self {
             fragments,
-            index: self.index - removed_len + 1,
+            index: self.index - match_len + 1,
             removal_order: {
                 let mut clone = self.removal_order.clone();
-                clone.push(removed_idx);
+                clone.push(order_idx);
                 clone
             },
-            last_removed,
+            last_removed: matches.match_id(mat).unwrap(),
+            clique_subgraph: {
+                if *subgraph != None && match_len <= matches.clique_max_len() {
+                    Some(subgraph.as_ref().unwrap().clone())
+                }
+                else {
+                    None
+                }
+            }
         }
     }
 
@@ -51,4 +63,8 @@ impl State {
     pub fn last_removed(&self) -> usize {
         self.last_removed
     }
+
+    pub fn use_subgraph(&self) -> bool {
+        self.clique_subgraph != None
+    }
 }
\ No newline at end of file

From 83ca9815ec6eeb68d4997367ad09245f8deb020f Mon Sep 17 00:00:00 2001
From: Garrett-Pz <garrettparzych@gmail.com>
Date: Thu, 18 Sep 2025 23:40:16 -0700
Subject: [PATCH 51/51] Small fixes

---
 src/reductions.rs | 4 +++-
 src/state.rs      | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/reductions.rs b/src/reductions.rs
index 4e27300e..5b60771b 100644
--- a/src/reductions.rs
+++ b/src/reductions.rs
@@ -31,7 +31,7 @@ impl CompatGraph {
         // Find the index of the first match in the clique
         // TODO: what if max_len < maximum length in dag?
         let mut offset = 0;
-        while dag[matches[offset].0].len() < max_len {
+        while dag[matches[offset].0].len() > max_len {
             offset += 1;
         }
 
@@ -85,6 +85,8 @@ impl CompatGraph {
         self.max_len
     }
 
+    // Takes a match id and returns its id in the clique
+    // Returns -1 if the match is not used in the clique
     pub fn clique_id(&self, match_id: usize) -> isize {
         if match_id < self.offset {
             -1
diff --git a/src/state.rs b/src/state.rs
index 69d4f145..4ad1a82f 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -39,7 +39,7 @@ impl State {
             last_removed: matches.match_id(mat).unwrap(),
             clique_subgraph: {
                 if *subgraph != None && match_len <= matches.clique_max_len() {
-                    Some(subgraph.as_ref().unwrap().clone())
+                    subgraph.clone()
                 }
                 else {
                     None