From 04f51762a97e9864c0219cdea8e13e57704cf744 Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Fri, 20 Feb 2026 14:57:31 -0800
Subject: [PATCH 1/9] Hide inital callback error

---
 Dash_interface/chart_section_n.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dash_interface/chart_section_n.py b/Dash_interface/chart_section_n.py
index dcc4627..c6241ea 100644
--- a/Dash_interface/chart_section_n.py
+++ b/Dash_interface/chart_section_n.py
@@ -672,6 +672,7 @@ def apply_structure_filter(data, siteLocatorObj):
         Output("download_heatmap_svg", "data"),
         Input("download_svg_button", "n_clicks"),
         State("hidden_svg", "children"),
+        prevent_initial_call=True,
     )
     def download_svg(n_clicks, svg):
         if n_clicks:

From 02b90c62e2d219a34a767a93b53d45c3e5f97420 Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Mon, 23 Feb 2026 16:51:32 -0800
Subject: [PATCH 2/9] Prepare for MF Base API update. * Swap MF Base API calls
 for Dashboard worker API calls * Move spectrum filtering to Dashboard worker
 * Update Object instantiation to match new API :

---
 Dash_interface/computation_n.py | 449 +++++++++++++++++++++++++++++++-
 ModiFinder_base                 |   2 +-
 pages/visualizer.py             |   3 +-
 3 files changed, 438 insertions(+), 16 deletions(-)

diff --git a/Dash_interface/computation_n.py b/Dash_interface/computation_n.py
index 7c9d205..7c9fb66 100644
--- a/Dash_interface/computation_n.py
+++ b/Dash_interface/computation_n.py
@@ -1,10 +1,394 @@
+import traceback
+
 from dash import Dash, html, dcc, Input, Output, State, dash_table
 import base64
 import pickle
 import json
 import copy
+from urllib.parse import quote
+from typing import List
+import sys
+
+import requests
 from modifinder import ModiFinder, Compound
 from rdkit import Chem
+from datetime import datetime
+
+adduct_mapping = {'M+H': '[M+H]+',
+'[M+H]': '[M+H]+',
+'[M+H]+': '[M+H]+',
+'M+H]': '[M+H]+',
+'M+Na': '[M+Na]+',
+'[M+Na]': '[M+Na]+',
+'[M+Na]+': '[M+Na]+',
+'2M+Na': '[2M+Na]+',
+'M2+Na': '[2M+Na]+',
+'[2M+Na]+': '[2M+Na]+',
+'[2M+Na]': '[2M+Na]+',
+'M+K': '[M+K]+',
+'[M+K]': '[M+K]+',
+'[M+K]+': '[M+K]+',
+'[2M+K]+': '[2M+K]+',
+'2M+K': '[2M+K]+',
+'[2M+K]': '[2M+K]+',
+'M+H-H20': '[M-H2O+H]+',
+'M+H-H2O': '[M-H2O+H]+',
+'[M-H2O+H]+': '[M-H2O+H]+',
+'M-H20+H': '[M-H2O+H]+',
+'[M+H-H2O]+': '[M-H2O+H]+',
+'M-H2O+H': '[M-H2O+H]+',
+'M+H-2H2O': '[M-2H2O+H]+',
+'M-2H2O+H': '[M-2H2O+H]+',
+'[M-2H2O+H]+': '[M-2H2O+H]+',
+'M-2(H2O)+H': '[M-2H2O+H]+',
+'2M+Na-2H': '[2M-2H+Na]-',
+'2M-2H+Na': '[2M-2H+Na]-',
+'M-H': '[M-H]-',
+'[M-H]': '[M-H]-',
+'[M-H]-': '[M-H]-',
+'M-H-': '[M-H]-',
+'M-H1': '[M-H]-',
+'3M+Na': '[3M+Na]+',
+'[3M+Na]+': '[3M+Na]+',
+'[M]+': '[M]+',
+'M+': '[M]+',
+'M-e': '[M]+',
+'M2+H': '[2M+H]+',
+'2M+H': '[2M+H]+',
+'[2M+H]+': '[2M+H]+',
+'[2M+H]': '[2M+H]+',
+'[M+2H]': '[M+2H]2+',
+'[M+2H]2+': '[M+2H]2+',
+'M+2H]': '[M+2H]2+',
+'M+2H+2': '[M+2H]2+',
+'M+2H': '[M+2H]2+',
+'M+acetate': '[M+CH3COOH-H]-',
+'M+CH3COOH-H': '[M+CH3COOH-H]-',
+'M+CH3COO': '[M+CH3COOH-H]-',
+'M+ACN+H': '[M+CH3CN+H]+',
+'[M+ACN+H]+': '[M+CH3CN+H]+',
+'[M+H+CH3CN]': '[M+CH3CN+H]+',
+'M+2Na': '[M+2Na]2+',
+'M+2Na]': '[M+2Na]2+',
+'M+HCOO': '[M+HCOOH-H]-',
+'[M-H+HCOOH]': '[M+HCOOH-H]-',
+'M+FA-H': '[M+HCOOH-H]-',
+'M+formate': '[M+HCOOH-H]-',
+'[M+H+HCOOH]': '[M+HCOOH-H]-',
+'2M+FA-H': '[2M+HCOOH-H]-',
+'[2M-H+HCOOH]': '[2M+HCOOH-H]-',
+'M+NH4': '[M+NH3+H]+',
+'[M+NH4]+': '[M+NH3+H]+',
+'[M+NH4]': '[M+NH3+H]+',
+'2M+Hac-H': '[2M+CH3COOH-H]-',
+'2M-H': '[2M-H]-',
+'[2M-H]': '[2M-H]-',
+'2M+NH4': '[2M+NH3+H]+',
+'[2M+NH4]+': '[2M+NH3+H]+',
+'[2M+NH4]': '[2M+NH3+H]+',
+'[2M+Ca]2+': '[2M+Ca]2+',
+'[M+Ca]2+': '[M+Ca]2+',
+'[3M+Ca]2+': '[3M+Ca]2+',
+'[2M+Ca-H]+': '[2M-H+Ca]+',
+'[2M-H2O+H]+': '[2M-H2O+H]+',
+'[4M+Ca]2+': '[4M+Ca]2+',
+'[3M+NH4]+': '[3M+NH3+H]+',
+'3M+NH4': '[3M+NH3+H]+',
+'[2M-2H2O+H]+': '[2M-2H2O+H]+',
+'[M+ACN+NH4]+': '[M+CH3CN+NH3+H]+',
+'[5M+Ca]2+': '[5M+Ca]2+',
+'[3M+K]+': '[3M+K]+',
+'[3M+Ca-H]+': '[3M-H+Ca]2+',
+'[M-H+2Na]+': '[M-H+2Na]+',
+'M-H+2Na': '[M-H+2Na]+',
+'[M-3H2O+H]+': '[M-3H2O+H]+',
+'M-3H2O+H': '[M-3H2O+H]+',
+'[M-3H2O+2H]2+': '[M-3H2O+2H]2+',
+'[M-2H2O+2H]2+': '[M-2H2O+2H]2+',
+'[M-4H2O+H]+': '[M-4H2O+H]+',
+'[M-5H2O+H]+': '[M-5H2O+H]+',
+'[M+Ca-H]+': '[M+Ca-H]+',
+'[2M-H+2Na]+': '[2M-H+2Na]+',
+'[2M-3H2O+H]+': '[2M-3H2O+H]+',
+'[M+H+Na]2+': '[M+Na+H]2+',
+'[M-2H2O+NH4]+': '[M-2H2O+NH3+H]+',
+'[2M-2H+Na]': '[2M-2H+Na]-',
+'[M-H+CH3OH]': '[M+CH3OH-H]-',
+'M+MeOH-H': '[M+CH3OH-H]-',
+'M-H2O-H': '[M-H2O-H]-',
+'[M-H-H2O]': '[M-H2O-H]-',
+'M+Cl-': '[M+Cl]-',
+'M+Cl': '[M+Cl]-',
+'[M+Cl]': '[M+Cl]-',
+'M+K-2H': '[M-2H+K]-',
+'[M-2H+K]': '[M-2H+K]-',
+'M-2H]': '[M-2H]2-',
+'M-2H': '[M-2H]2-',
+'M-2H-': '[M-2H]2-',
+'M+Na-2H': '[M-2H+Na]-',
+'[M-2H+Na]': '[M-2H+Na]-',
+'M+Br': '[M+Br]-',
+'3M-H': '[3M-H]-',
+'[3M-H]': '[3M-H]-',
+'[M+H+CH3OH]': '[M+CH3OH+H]+',
+'M+CH3OH+H': '[M+CH3OH+H]+',
+'[2M+H+CH3CN]': '[2M+CH3CN+H]+',
+'M-CO2-H': '[M-CO2-H]-',
+'[2M-2H+K]': '[2M-2H+K]-',
+'2M+K-2H': '[2M-2H+K]-',
+'[M+Na+CH3CN]': '[M+CH3CN+Na]+',
+'M-H2+H': '[M-H2+H]-',
+'M-H+Cl]': '[M-H+Cl]2-',
+'M-H+Cl': '[M-H+Cl]2-',
+'3M+H': '[3M+H]+',
+'[3M+H]': '[3M+H]+',
+'M+H-NH3': '[M-NH3+H]+',
+'M-NH3+H': '[M-NH3+H]+',
+'M-H+C2H2O': '[M+C2H2O-H]-',
+'M+H-C2H2O': '[M+C2H2O-H]-',
+'M-H+CH2O2': '[M+CH2O2-H]-',
+'M+CH2O2-H': '[M+CH2O2-H]-',
+'M+TFA-H': '[M+C2HF3O2-H]-',
+'M-C2HF3O2-H': '[M+C2HF3O2-H]-',
+'[M]1+': '[M]1+'}
+
+
+gnps_keys_mapping = {
+    ## precursor
+    "precursor_mz": "precursor_mz",
+    ## charge
+    "precursor_charge": "precursor_charge",
+    "charge": "precursor_charge", 
+    ## smiles
+    "smiles": "smiles",
+    "smile": "smiles",
+    ## adduct
+    "adduct": "adduct",
+    ## peaks
+    "peaks": "peaks",
+    ## instrument
+    "instrument": "instrument",
+    ## name
+    "name": "name",
+    "compound_name": "name",
+    ## spectrum_id
+    "spectrum_id": "spectrum_id",
+    "spectrumid": "spectrum_id",
+    ## exact mass
+    "exact_mass": "exact_mass",
+    "exactmass": "exact_mass",
+    ## mz
+    "fragment_mz": "mz",
+    "mz": "mz",
+    "mzs": "mz",
+    ## intensity
+    "fragment_intensities": "intensity",
+    "intensities": "intensity",
+}
+
+def filter_peaks_by_ratio_to_base_peak(spectrum, ratio_to_base_peak:float = 0.01):
+        """Remove peaks with intensity lower than a given ratio to the base peak.
+        
+        Parameters
+        ----------
+        ratio_to_base_peak : float (0, 1), default is 0.01
+            The ratio to the base peak.
+        change_spectrum : bool, default is True
+            If True, the peaks with intensity lower than the given ratio will be removed in place.
+            If False, a new Spectrum object with the peaks removed will be returned.
+        """
+        
+        base_peak = max(spectrum.intensity)
+        new_mz = []
+        new_intensity = []
+        for index, intensity in enumerate(spectrum.intensity):
+            if intensity >= float(ratio_to_base_peak) * base_peak:
+                new_mz.append(spectrum.mz[index])
+                new_intensity.append(intensity)
+        
+        spectrum.mz = new_mz
+        spectrum.intensity = new_intensity
+
+        return spectrum
+
+def remove_larger_than_precursor_peaks(spectrum):
+        """
+        Remove peaks that are larger than the precursor m/z value.
+        """
+        
+        new_mz = []
+        new_intensity = []
+        for mz, intensity in zip(spectrum.mz, spectrum.intensity):
+            if mz < spectrum.precursor_mz * 0.99:
+                new_mz.append(mz)
+                new_intensity.append(intensity)
+        
+        spectrum.mz = new_mz
+        spectrum.intensity = new_intensity
+
+        return spectrum
+
+
+    
+def harmonize_spectrum_keys(data):
+    """
+    Parse the data to a universal format.
+
+    This function takes a dictionary of data and converts it into a universal format.
+    It processes specific keys like "peaks_json" and "Charge" differently, and attempts
+    to convert other values to floats. If the conversion to float is successful and the
+    key is "Charge", it further converts the value to an integer.
+
+    Args:
+        :data (dict): The input data dictionary to be parsed.
+
+    Returns:
+        :dict: A dictionary with keys converted to a universal format and values processed
+              accordingly.
+    """
+    def _convert_to_universal_key(key: str) -> str:
+        """
+        Convert different types of keys to universal keys.
+        This function standardizes various key names to a universal format. 
+
+        Args:
+            :key (str): The key to be converted.
+        
+        Returns:
+            :str: The converted key.
+        """
+        key = key.lower()
+        key = key.replace(" ", "_")
+        return gnps_keys_mapping.get(key, key)
+
+    res = {}
+    for key, value in data.items():
+        converted_key = _convert_to_universal_key(key)
+        if key == "peaks_json":
+            res['peaks'] = json.loads(value)
+        elif converted_key == "adduct":
+            res[converted_key] = adduct_mapping.get(value, value)
+        else:
+            try:
+                if converted_key in ["precursor_charge", "precursor_mz", "ms_level", "scan", "exact_mass"]:
+                    value = float(value)
+                if converted_key in ["precursor_charge", "charge", "ms_level"]:
+                        value = int(value)
+            except Exception:
+                raise ValueError(f"Could not convert {key} to number")
+            res[converted_key] = value
+    return res
+
+def get_from_metabolomics_resolver(identifier: str) -> dict:
+    """
+    Get partial data (ms2 data) from USI
+    param identifier: str - USI
+    return: dict - dictionary of data with keys: precursor_mz, precursor_charge, mz: list, intensity: list
+    """
+    url = 'https://metabolomics-usi.gnps2.org/json/' + "?usi1=" + identifier
+    try:
+        r = requests.get(url)
+        data = json.loads(r.text)
+    except:
+        raise Exception("Error in retrieving data from GNPS for identifier: {}, link: {}".format(identifier, url))
+
+    data = harmonize_spectrum_keys(data)
+    return data
+
+def get_data(identifier: str) -> dict:
+    """
+    Get data from GNPS, either from USI or Accession. if the identifier points to a known item in gnps,
+      it will return the full data, otherwise it will return partial data (ms2 data)
+    param identifier: str - USI or Accession
+    return: dict - dictionary of data
+    """
+
+    data = dict()
+    data['usi'] = None
+
+    if "mzspec" in identifier:                              # It's a USI
+        data['usi'] = identifier
+
+        if "accession" in identifier:                       #       It's a library spectrum
+            original_identifier = str(identifier)
+            identifier = identifier.split(":")[-1]
+        else:                                               #       It's a USI that isn't a library spectrum
+            data = get_from_metabolomics_resolver(identifier)
+            data['id'] = identifier
+            data = harmonize_spectrum_keys(data)
+
+            # Sort peaks if needed
+            if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
+                data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
+
+            return data
+
+    link = "https://external.gnps2.org/gnpsspectrum?SpectrumID={}".format(identifier)
+    try:
+        res = requests.get(link)
+        parsed = res.json()
+    except Exception:
+        data = get_from_metabolomics_resolver(original_identifier)
+        data['usi'] = original_identifier
+        data['id'] = identifier
+        data = harmonize_spectrum_keys(data)
+        return data
+
+    try:
+        data.update(parsed['annotations'][0])
+    except KeyError:
+        pass
+    try:
+        data.update(parsed['spectruminfo'])
+    except KeyError:
+        pass
+    try:
+        data['comments'] = parsed['comments']
+    except KeyError:
+        pass
+
+    data = harmonize_spectrum_keys(data)
+    data['id'] = identifier
+
+    # Ensure peaks are sorted
+    if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
+        data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
+
+    return data
+
+def load_helpers(
+        data: List[str],
+        ratio_to_base_peak: float = None,
+        remove_large_peaks: bool = True
+        ) -> List[Compound]:
+    """ Load helpers from a list of identifiers, failing gracefully if the smile string is invalid.
+    """
+    loaded_helpers = []
+    failed_helpers = []
+    for h in data:
+        try:
+            lh = get_data(h)
+            ch = Compound(
+                spectrum=lh['peaks'],
+                precursor_mz=lh['precursor_mz'],
+                precursor_charge=lh['precursor_charge'],
+                adduct=lh.get('adduct', None),
+                smiles=lh.get('smiles', None)
+            )
+            if ratio_to_base_peak:
+                ch.spectrum = filter_peaks_by_ratio_to_base_peak(ch.spectrum, ratio_to_base_peak=ratio_to_base_peak)
+            if remove_large_peaks:
+                ch.spectrum = remove_larger_than_precursor_peaks(ch.spectrum)
+            loaded_helpers.append(ch)
+        except Exception as e:
+            # Print the traceback
+            print(f"Error loading helper compound {h}: {str(e)}", flush=True)
+            traceback.print_exc(file=sys.stderr)
+            failed_helpers.append(h)
+            raise e
+    
+    print(f"Loaded {len(loaded_helpers)} helper compounds successfully. Failed to load {len(failed_helpers)} helper compounds: {failed_helpers}", flush=True)
+    return loaded_helpers
 
 def get_callbacks(app):
     
@@ -23,11 +407,27 @@ def calculate_module(data):
         # remove SMILES and USI from args
         args.pop('SMILES1', None)
         args.pop('SMILES2', None)
-        args.pop('USI1', None)
-        args.pop('USI2', None)
-        args["normalize_peaks"] = True
-        args["remove_large_peaks"] = True
-        args["ratio_to_base_peak"] = float(args["filter_peaks_variable"])
+        usi1 = args.pop('USI1', None)
+        usi2 = args.pop('USI2', None)
+        
+        spectrum1 = get_data(usi1)
+        spectrum2 = get_data(usi2)
+        if spectrum1['adduct'] is None:
+            # Replace with adduct from data
+            spectrum1['adduct'] = data.get('adduct', None)
+        if spectrum2['adduct'] is None:
+            # Replace with adduct from data
+            spectrum2['adduct'] = data.get('adduct', None)
+
+        # TODO: What to do if adduct differs at this point?
+
+        # TODO: Filter adducts in Helpers?
+
+        # Options propagated out of ModiFinder 
+        ratio_to_base_peak = args.pop('filter_peaks_variable', None)
+        remove_large_peaks = True
+        
+        # Args to pass to ModiFinder
         args['ppm_tolerance'] = float(args['ppm_tolerance'])
         helper_compounds = args.pop('Helpers', "").strip(' \t\n\r')
         helper_compounds = helper_compounds.replace(" ", "")
@@ -36,6 +436,11 @@ def calculate_module(data):
         helper_compounds = list(filter(None, helper_compounds))
         # remove "" strings
         helper_compounds = list(filter(lambda x: x != "", helper_compounds))
+        helper_compounds = load_helpers(
+            helper_compounds,
+            ratio_to_base_peak=ratio_to_base_peak,
+            remove_large_peaks=remove_large_peaks
+        )
 
         if data["SMILES1"] == "" or data["SMILES1"] is None:
             data["SMILES1"] = None
@@ -46,15 +451,20 @@ def calculate_module(data):
         try:
             if data['adduct']:
                 args['adduct'] = data['adduct']
-            main_compound = Compound(data['USI1'], **args)
-            if data["SMILES1"] is not None:
-                main_compound.update(smiles=data["SMILES1"])
-            mod_compound = Compound(data['USI2'], **args)
-            if data["SMILES2"] is not None:
-                if data["SMILES2"] !=  ".":
-                    mod_compound.update(smiles=data["SMILES2"])
-            if data["SMILES2"] is None:
-                mod_compound.structure = None
+            main_compound = Compound(
+                spectrum=spectrum1['peaks'],
+                precursor_mz=spectrum1['precursor_mz'],
+                precursor_charge=spectrum1['precursor_charge'],
+                adduct=spectrum1['adduct'],
+                smiles=data["SMILES1"]
+            )
+            mod_compound = Compound(
+                spectrum=spectrum2['peaks'],
+                precursor_mz=spectrum2['precursor_mz'],
+                precursor_charge=spectrum2['precursor_charge'],
+                adduct=spectrum2['adduct'],
+                smiles=data["SMILES2"] if data["SMILES2"] is not None and data["SMILES2"] != "" else None
+            )
             
         except Exception as e:
             raise e
@@ -67,6 +477,17 @@ def calculate_module(data):
         
         if main_compound.structure is None:
             return None, None, None, None, "Error loading SMILES1"
+        
+        # Perform actions for  ratio_to_base_peak filter
+        if ratio_to_base_peak:
+            ratio_to_base_peak = float(ratio_to_base_peak)
+            main_compound.spectrum = filter_peaks_by_ratio_to_base_peak(main_compound.spectrum, ratio_to_base_peak)
+            mod_compound.spectrum = filter_peaks_by_ratio_to_base_peak(mod_compound.spectrum, ratio_to_base_peak)
+
+        # Perform actions for remove_large_peaks filter
+        if remove_large_peaks:
+            main_compound.spectrum = remove_larger_than_precursor_peaks(main_compound.spectrum)
+            mod_compound.spectrum = remove_larger_than_precursor_peaks(mod_compound.spectrum)
 
         siteLocator = ModiFinder(main_compound, mod_compound, helpers=helper_compounds, **args)
         
diff --git a/ModiFinder_base b/ModiFinder_base
index ecb5ecf..9001142 160000
--- a/ModiFinder_base
+++ b/ModiFinder_base
@@ -1 +1 @@
-Subproject commit ecb5ecfc9d92602fe917ba392e1874d0a6223177
+Subproject commit 9001142467cb1adf46de3d3e2dad85b7a38ac981
diff --git a/pages/visualizer.py b/pages/visualizer.py
index c9ead0f..3652ce2 100644
--- a/pages/visualizer.py
+++ b/pages/visualizer.py
@@ -173,7 +173,8 @@
     dbc.CardHeader(html.H5("Contributors")),
     dbc.CardBody(
         [
-            "Reza Shahneh - UC Riverside",
+            "Reza Shahneh, Ph.D. - UC Riverside", html.Br(),
+            "Michael Strobel - UC Riverside",
             html.Br(),
             html.Br(),
             html.H5("Citation"),

From 274843484a4974a5adf755adfc5e3048aad2fab8 Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Mon, 23 Feb 2026 21:30:09 -0800
Subject: [PATCH 3/9] Update dash interface to reflect switch the m/z rather
 than index annotation.

---
 Dash_interface/chart_section_n.py | 125 +++++++++++-------------------
 Dash_interface/computation_n.py   |   6 +-
 2 files changed, 48 insertions(+), 83 deletions(-)

diff --git a/Dash_interface/chart_section_n.py b/Dash_interface/chart_section_n.py
index c6241ea..a0ae17b 100644
--- a/Dash_interface/chart_section_n.py
+++ b/Dash_interface/chart_section_n.py
@@ -295,6 +295,7 @@ def update_peaks(data):  # , slider_value):
         if data == None:
             return {}, {"display": "none"}
         peaksObj = pickle.loads(base64.b64decode(data))
+        
         main_compound_peaks = peaksObj["main_compound_peaks"]
         mod_compound_peaks = peaksObj["mod_compound_peaks"]
         matched_peaks = peaksObj["matched_peaks"]
@@ -304,13 +305,15 @@ def update_peaks(data):  # , slider_value):
 
         fig = go.Figure()
         typesInxMain = {"matched_shifted": [], "matched_unshifted": [], "unmatched": []}
+
+        ### Assemble matched and unmatched peaks for main compound
+
         x1 = []
         y1 = []
         for peak in main_compound_peaks:
             x1.append(peak[0])
             y1.append(peak[1])
 
-        # topPeakCount = slider_value
         topPeakCount = max(
             len(main_compound_peaks),
             len(mod_compound_peaks),
@@ -319,29 +322,32 @@ def update_peaks(data):  # , slider_value):
         hoverData = {"main": [], "modified": []}
         for i in topPeaksInxModif:
             flag = False
-            for j in matched_peaks:
-                if j[0] == i:
+            for main_match_mz, mod_match_mz in matched_peaks:
+                if abs(main_compound_peaks[i][0] - main_match_mz) < 1e-6: # We have found a match for our specific peak
                     if (
                         abs(
                             main_compound_peaks[i][0]
-                            - mod_compound_peaks[j[1]][0]
+                            - mod_match_mz
                         )
                         > args["mz_tolerance"]
                     ):
-                        typesInxMain["matched_shifted"].append(i)
-                        hoverData["main"].append(j[1])
+                        typesInxMain["matched_shifted"].append([main_match_mz, y1[i], f"Shifted Matched ({mod_match_mz:.2f}, {main_compound_peaks[i][0]:.2f})"])
                     else:
-                        typesInxMain["matched_unshifted"].append(i)
+                        typesInxMain["matched_unshifted"].append([main_match_mz, y1[i], f"Matched ({mod_match_mz:.2f}, {main_compound_peaks[i][0]:.2f})"])
                     flag = True
-                    break
+                    break                
             if not flag:
-                typesInxMain["unmatched"].append(i)
+                typesInxMain["unmatched"].append([main_compound_peaks[i][0], y1[i], "Unmatched"])
+            
 
         typesInxModified = {
             "matched_shifted": [],
             "matched_unshifted": [],
             "unmatched": [],
         }
+
+        ### Assemble matched and unmatched peaks for modified compound
+
         x2 = []
         y2 = []
         for peak in mod_compound_peaks:
@@ -351,42 +357,39 @@ def update_peaks(data):  # , slider_value):
         topPeaksInxModif = sorted(range(len(y2)), key=lambda i: y2[i])[-topPeakCount:]
         for i in topPeaksInxModif:
             flag = False
-            for j in matched_peaks:
-                if j[1] == i:
+            for main_match_mz, mod_match_mz in matched_peaks:
+
+                if abs(mod_compound_peaks[i][0] - mod_match_mz) < 1e-6: # We have found a match for our specific peak
                     if (
                         abs(
-                            main_compound_peaks[j[0]][0]
-                            - mod_compound_peaks[j[1]][0]
+                            mod_compound_peaks[i][0]
+                            - main_match_mz
                         )
-                        > 0.1
+                        > args["mz_tolerance"]
                     ):
-                        typesInxModified["matched_shifted"].append([i, j[0]])
-                        hoverData["modified"].append(j[0])
+                        typesInxModified["matched_shifted"].append([mod_match_mz, -y2[i], f"Shifted Matched ({main_match_mz:.2f}, {mod_compound_peaks[i][0]:.2f})"])
+                        # hoverData["modified"].append(j[0])
                     else:
-                        typesInxModified["matched_unshifted"].append([i, j[0]])
+                        typesInxModified["matched_unshifted"].append([mod_match_mz, -y2[i], f"Matched ({main_match_mz:.2f}, {mod_compound_peaks[i][0]:.2f})"])
                     flag = True
                     break
             if not flag:
-                typesInxModified["unmatched"].append([i, -1])
+                typesInxModified["unmatched"].append([mod_compound_peaks[i][0], -y2[i], "Unmatched"])
 
         minX = min(min(x1), min(x2))
         maxX = max(max(x1), max(x2))
         minX = min(minX, main_precursor_mz, mod_precursor_mz)
         maxX = max(maxX, main_precursor_mz, mod_precursor_mz)
 
+        ### Plotting
+
         for inx_type in typesInxMain:
-            x_main = [round(x1[j], 4) for j in typesInxMain[inx_type]]
-            y1_ = [y1[j] for j in typesInxMain[inx_type]]
-            y_main = [y / max(y1_) * 100 for y in y1_]
-            x_modified = [round(x2[j[0]], 4) for j in typesInxModified[inx_type]]
-            y2_ = [y2[j[0]] for j in typesInxModified[inx_type]]
-            y_modified = [-j / max(y2_) * 100 for j in y2_]
-            indicis = typesInxMain[inx_type] + [
-                j[0] for j in typesInxModified[inx_type]
-            ]
-            x_ = x_main + x_modified
-            y_ = y_main + y_modified
-            colors = [colorsInxMain[inx_type]] * len(x_)
+
+            x = [j[0] for j in typesInxMain[inx_type]] + [j[0] for j in typesInxModified[inx_type]]
+            y = [j[1] for j in typesInxMain[inx_type]] + [j[1] for j in typesInxModified[inx_type]]
+            y = [y_i / max(y) * 100 for y_i in y]
+            hovertext = [j[2] for j in typesInxMain[inx_type]] + [j[2] for j in typesInxModified[inx_type]]
+            colors = [colorsInxMain[inx_type]] * len(x)
             if inx_type == "unmatched":
                 visibility = "legendonly"
                 if len(typesInxModified["matched_shifted"]) == 0 and len(
@@ -396,36 +399,20 @@ def update_peaks(data):  # , slider_value):
                 
                 fig.add_trace(
                     go.Bar(
-                        x=x_,
-                        y=y_,
+                        x=x,
+                        y=y,
                         width=(maxX - minX) / 500,
-                        hovertext=indicis,
+                        hovertext=hovertext,
                         name=inx_type,
                         visible=visibility,
                         marker_color=colors,
                     )
                 )
             elif inx_type == "matched_shifted":
-                hovertext = []
-                for i in range(len(x_main)):
-                    hovertext.append(
-                        str(indicis[i])
-                        + " "
-                        + "matched to:"
-                        + str(hoverData["main"][i])
-                    )
-                for i in range(len(x_main), len(x_main) + len(x_modified)):
-                    hovertext.append(
-                        str(indicis[i])
-                        + " "
-                        + "matched to:"
-                        + str(hoverData["modified"][i - len(x_main)])
-                    )
-
                 fig.add_trace(
                     go.Bar(
-                        x=x_,
-                        y=y_,
+                        x=x,
+                        y=y,
                         hovertext=hovertext,
                         name=inx_type,
                         width=(maxX - minX) / 500,
@@ -435,9 +422,9 @@ def update_peaks(data):  # , slider_value):
             else:
                 fig.add_trace(
                     go.Bar(
-                        x=x_,
-                        y=y_,
-                        hovertext=indicis,
+                        x=x,
+                        y=y,
+                        hovertext=hovertext,
                         name=inx_type,
                         width=(maxX - minX) / 500,
                         marker_color=colors,
@@ -452,8 +439,6 @@ def update_peaks(data):  # , slider_value):
                 mode="lines",
                 line=go.scatter.Line(color="black", dash="dash", width= (maxX - minX) / 600),
                 name='known precursor m/z',
-                # showlegend=False,
-                # hoverinfo='skip'
             )
         )
         fig.add_trace(
@@ -463,17 +448,9 @@ def update_peaks(data):  # , slider_value):
                 mode="lines",
                 line=go.scatter.Line(color="black", dash="dot", width= (maxX - minX) / 600),
                 name='modified precursor m/z',
-                # showlegend=False,
-                # hoverinfo='skip'
             )
         )
 
-        # minX = min(minX, main_precursor_mz, mod_precursor_mz)
-        # maxX = max(maxX, main_precursor_mz, mod_precursor_mz)
-
-        # fig.update_traces(
-        #     width=(maxX - minX) / 400,
-        # )
         fig.update_layout(
             title="Alignment of Peaks",
             bargap=0,
@@ -501,18 +478,6 @@ def update_peaks(data):  # , slider_value):
             "zIndex": "1",
         }
 
-
-    # @app.callback(
-    #     Output("peak_info", "children", allow_duplicate=True),
-    #     Input("siteLocatorObj", "data"), 
-    #     prevent_initial_call=True,
-    # )
-    # def clear_peak_info(data):
-    #     if data == None:
-    #         return ""
-    #     else:
-    #         return "Select a peak to see its fragments"
-
     @app.callback(
         Output("peak_info", "children", allow_duplicate=True),
         Input("peaks", "clickData"),
@@ -635,18 +600,18 @@ def apply_structure_filter(data, siteLocatorObj):
         main_compound_peaks = [(main_compound.spectrum.mz[i], main_compound.spectrum.intensity[i]) for i in range(len(main_compound.spectrum.mz))]
         modified_compound = siteLocator.network.nodes[modified_compound_id]['compound']
         
-        ind = main_compound.spectrum.get_peak_indexes(data["mz"])
-        main_compound.spectrum.peak_fragments_map[ind[0]] = [data["all_fragments"][i] for i in data["selected_fragments"]]
+        mzs = data["mz"]
+        main_compound.spectrum.peak_fragment_dict[mzs[0]] = [data["all_fragments"][i] for i in data["selected_fragments"]]
         
         fragmentsObj = {
-            "frags_map": main_compound.spectrum.peak_fragments_map,
+            "frags_map": main_compound.spectrum.peak_fragment_dict,
             "structure": main_compound.structure,
             "peaks": main_compound_peaks,
             "Precursor_MZ": main_compound.spectrum.precursor_mz,
         }
 
 
-        fragments = list(main_compound.spectrum.peak_fragments_map[ind[0]])
+        fragments = list(main_compound.spectrum.peak_fragment_dict[mzs[0]])
         result_posibility_indicies = []
         for fragment in fragments:
             fragment_indicies = []
diff --git a/Dash_interface/computation_n.py b/Dash_interface/computation_n.py
index 7c9fb66..9f411b6 100644
--- a/Dash_interface/computation_n.py
+++ b/Dash_interface/computation_n.py
@@ -412,10 +412,10 @@ def calculate_module(data):
         
         spectrum1 = get_data(usi1)
         spectrum2 = get_data(usi2)
-        if spectrum1['adduct'] is None:
+        if spectrum1.get('adduct') is None:
             # Replace with adduct from data
             spectrum1['adduct'] = data.get('adduct', None)
-        if spectrum2['adduct'] is None:
+        if spectrum2.get('adduct') is None:
             # Replace with adduct from data
             spectrum2['adduct'] = data.get('adduct', None)
 
@@ -528,7 +528,7 @@ def calculate_module(data):
         }
 
         fragmentsObj = {
-            "frags_map": main_compound.spectrum.peak_fragments_map,
+            "frags_map": main_compound.spectrum.peak_fragment_dict,
             "structure": main_compound.structure,
             "peaks": main_compound_peaks,
             "Precursor_MZ": main_compound.spectrum.precursor_mz,

From ee12d9d8254a6a7a1f9eb3c8158e06fabcf03e29 Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Tue, 24 Feb 2026 17:19:20 -0800
Subject: [PATCH 4/9] Update for new API.

---
 Dash_interface/chart_section_n.py    | 62 +++++++++++++++--------
 Dash_interface/computation_n.py      | 74 +++++++++++-----------------
 Dash_interface/fragment_selection.py |  2 +-
 ModiFinder_base                      |  2 +-
 4 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/Dash_interface/chart_section_n.py b/Dash_interface/chart_section_n.py
index a0ae17b..af1456d 100644
--- a/Dash_interface/chart_section_n.py
+++ b/Dash_interface/chart_section_n.py
@@ -1,3 +1,5 @@
+import sys
+
 from dash import Dash, html, dcc, Input, Output, State, dash_table, Patch
 from dash.exceptions import PreventUpdate
 import pickle
@@ -303,6 +305,11 @@ def update_peaks(data):  # , slider_value):
         main_precursor_mz = peaksObj["main_precursor_mz"]
         mod_precursor_mz = peaksObj["mod_precursor_mz"]
 
+        # Convert m/z values back down from keys
+        main_compound_peaks = [(mz/1e6, intensity) for mz, intensity in main_compound_peaks]
+        mod_compound_peaks = [(mz/1e6, intensity) for mz, intensity in mod_compound_peaks]
+        matched_peaks = [(main_mz/1e6, mod_mz/1e6) for main_mz, mod_mz in matched_peaks]
+
         fig = go.Figure()
         typesInxMain = {"matched_shifted": [], "matched_unshifted": [], "unmatched": []}
 
@@ -331,9 +338,9 @@ def update_peaks(data):  # , slider_value):
                         )
                         > args["mz_tolerance"]
                     ):
-                        typesInxMain["matched_shifted"].append([main_match_mz, y1[i], f"Shifted Matched ({mod_match_mz:.2f}, {main_compound_peaks[i][0]:.2f})"])
+                        typesInxMain["matched_shifted"].append([main_match_mz, y1[i], f"{mod_match_mz:.2f}:{main_compound_peaks[i][0]:.2f}"])
                     else:
-                        typesInxMain["matched_unshifted"].append([main_match_mz, y1[i], f"Matched ({mod_match_mz:.2f}, {main_compound_peaks[i][0]:.2f})"])
+                        typesInxMain["matched_unshifted"].append([main_match_mz, y1[i], f"{mod_match_mz:.2f}:{main_compound_peaks[i][0]:.2f}"])
                     flag = True
                     break                
             if not flag:
@@ -367,10 +374,10 @@ def update_peaks(data):  # , slider_value):
                         )
                         > args["mz_tolerance"]
                     ):
-                        typesInxModified["matched_shifted"].append([mod_match_mz, -y2[i], f"Shifted Matched ({main_match_mz:.2f}, {mod_compound_peaks[i][0]:.2f})"])
+                        typesInxModified["matched_shifted"].append([mod_match_mz, -y2[i], f"{main_match_mz:.2f}:{mod_compound_peaks[i][0]:.2f}"])
                         # hoverData["modified"].append(j[0])
                     else:
-                        typesInxModified["matched_unshifted"].append([mod_match_mz, -y2[i], f"Matched ({main_match_mz:.2f}, {mod_compound_peaks[i][0]:.2f})"])
+                        typesInxModified["matched_unshifted"].append([mod_match_mz, -y2[i], f"{main_match_mz:.2f}:{mod_compound_peaks[i][0]:.2f}"])
                     flag = True
                     break
             if not flag:
@@ -387,7 +394,13 @@ def update_peaks(data):  # , slider_value):
 
             x = [j[0] for j in typesInxMain[inx_type]] + [j[0] for j in typesInxModified[inx_type]]
             y = [j[1] for j in typesInxMain[inx_type]] + [j[1] for j in typesInxModified[inx_type]]
-            y = [y_i / max(y) * 100 for y_i in y]
+            # Separate norm constants for pos and neg y
+            if len(y) == 0:
+                continue
+            
+            max_y = max(y) if max(y) > 0 else 1
+            min_y = min(y) if min(y) < 0 else -1
+            y = [y_i / max_y * 100 if y_i > 0 else -(y_i / min_y) * 100 for y_i in y]
             hovertext = [j[2] for j in typesInxMain[inx_type]] + [j[2] for j in typesInxModified[inx_type]]
             colors = [colorsInxMain[inx_type]] * len(x)
             if inx_type == "unmatched":
@@ -498,17 +511,24 @@ def display_click_data(clickData, fragmentsObj):
                 
                 structure = fragmentsObj["structure"]
                 frags_map = fragmentsObj["frags_map"]
-                peaks = fragmentsObj["peaks"]
+                peak_keys     = [int(x[0]) for x in fragmentsObj["peaks"]]
 
-                peak_index = -1
-                for i, peak in enumerate(peaks):
-                    if abs(peak[0]- clicked_peak_x)/clicked_peak_x*1000000 < 40:
-                        peak_index = i
+                peak_key = None
+                for k in peak_keys:
+                    if abs((k/1e6)- clicked_peak_x)/clicked_peak_x*1000000 < 40:
+                        peak_key = k   # Cast to int (numpy ints won't key)
                         break
-                if peak_index == -1:
-                    return "error in finding peak index"
+                if peak_key is None:
+                    raise ValueError(f"Clicked peak not found in peaks list "f"(clicked_peak_x: {clicked_peak_x}, peaks: {peak_keys})")
+                
+                try:
+                    fragments = list(frags_map[peak_key])
+                except KeyError:
+                    # Check for the closest key
+                    closest_key = min(frags_map.keys(), key=lambda k: abs(k/1e6 - clicked_peak_x))
+
+                    raise ValueError(f"Fragment map does not contain peak key {peak_key} (type {type(peak_key)}), closest key is {closest_key} (type {type(closest_key)} with m/z {closest_key/1e6}, clicked m/z was {clicked_peak_x}")
                 
-                fragments = list(frags_map[peak_index])
                 result_posibility_indicies = []
                 for fragment in fragments:
                     fragment_indicies = []
@@ -529,9 +549,9 @@ def display_click_data(clickData, fragmentsObj):
                 )
             except:
                 import traceback
-
-                traceback.print_exc()
+                traceback.print_exc(file=sys.stderr)
                 return "siteLocator object not found"
+            
         return None
 
     # change the color of the bar when clicked
@@ -564,9 +584,9 @@ def change_bar_color(clickData, figure):
                             # figure["data"][i]["marker"]["color"][j] = "green"
                             # if matched shifted peak, highlight the corresponding peak in the other bar
                             if figure["data"][i]["name"] == "matched_shifted":
-                                index = figure["data"][i]["hovertext"][j].split(":")[1]
+                                peak_x = str(figure["data"][i]["hovertext"][j].split(":")[0]).strip()
                                 for l in range(len(figure["data"][i]["x"])):
-                                    if (figure["data"][i]["hovertext"][l].split(" ")[0] == index and figure["data"][i]["y"][l] < 0):
+                                    if (str(figure["data"][i]["hovertext"][l].split(':')[1]).strip() == peak_x and figure["data"][i]["y"][l] < 0):
                                         patched_figure["data"][i]["marker"]["color"][l] = "olive"
                                         break
 
@@ -582,7 +602,7 @@ def change_bar_color(clickData, figure):
     @app.callback(
         [Output("siteLocatorObj", "data", allow_duplicate=True), 
          Output("peak_info", "children", allow_duplicate=True),
-         Output('fragmentsObj', 'data', allow_duplicate=True)],
+         Output("fragmentsObj", "data", allow_duplicate=True)],
         Input(FragmentsDisplayAIO.ids.fragment_data("fragmentDisplay"), "data"),
         State("siteLocatorObj", "data"),
         prevent_initial_call=True,
@@ -597,11 +617,11 @@ def apply_structure_filter(data, siteLocatorObj):
         modified_compound_id = siteLocator._get_unknown()
         main_compound_id = siteLocator._get_known_neighbor(modified_compound_id)
         main_compound = siteLocator.network.nodes[main_compound_id]['compound']
-        main_compound_peaks = [(main_compound.spectrum.mz[i], main_compound.spectrum.intensity[i]) for i in range(len(main_compound.spectrum.mz))]
+        main_compound_peaks = [(main_compound.spectrum.mz_key[i], main_compound.spectrum.intensity[i]) for i in range(len(main_compound.spectrum.mz_key))]
         modified_compound = siteLocator.network.nodes[modified_compound_id]['compound']
         
         mzs = data["mz"]
-        main_compound.spectrum.peak_fragment_dict[mzs[0]] = [data["all_fragments"][i] for i in data["selected_fragments"]]
+        main_compound.spectrum.peak_fragment_dict[int(mzs[0])] = [data["all_fragments"][i] for i in data["selected_fragments"]]
         
         fragmentsObj = {
             "frags_map": main_compound.spectrum.peak_fragment_dict,
@@ -611,7 +631,7 @@ def apply_structure_filter(data, siteLocatorObj):
         }
 
 
-        fragments = list(main_compound.spectrum.peak_fragment_dict[mzs[0]])
+        fragments = list(main_compound.spectrum.peak_fragment_dict[int(mzs[0])])
         result_posibility_indicies = []
         for fragment in fragments:
             fragment_indicies = []
diff --git a/Dash_interface/computation_n.py b/Dash_interface/computation_n.py
index 9f411b6..296f2c1 100644
--- a/Dash_interface/computation_n.py
+++ b/Dash_interface/computation_n.py
@@ -17,10 +17,12 @@
 adduct_mapping = {'M+H': '[M+H]+',
 '[M+H]': '[M+H]+',
 '[M+H]+': '[M+H]+',
+'[M+H]1+': '[M+H]+',
 'M+H]': '[M+H]+',
 'M+Na': '[M+Na]+',
 '[M+Na]': '[M+Na]+',
 '[M+Na]+': '[M+Na]+',
+'[M+Na]1+': '[M+Na]+',
 '2M+Na': '[2M+Na]+',
 'M2+Na': '[2M+Na]+',
 '[2M+Na]+': '[2M+Na]+',
@@ -28,6 +30,7 @@
 'M+K': '[M+K]+',
 '[M+K]': '[M+K]+',
 '[M+K]+': '[M+K]+',
+'[M+K]1+': '[M+K]+',
 '[2M+K]+': '[2M+K]+',
 '2M+K': '[2M+K]+',
 '[2M+K]': '[2M+K]+',
@@ -46,6 +49,7 @@
 'M-H': '[M-H]-',
 '[M-H]': '[M-H]-',
 '[M-H]-': '[M-H]-',
+'[M-H]1-': '[M-H]-',
 'M-H-': '[M-H]-',
 'M-H1': '[M-H]-',
 '3M+Na': '[3M+Na]+',
@@ -79,6 +83,7 @@
 '[2M-H+HCOOH]': '[2M+HCOOH-H]-',
 'M+NH4': '[M+NH3+H]+',
 '[M+NH4]+': '[M+NH3+H]+',
+'[M+NH4]1+': '[M+NH3+H]+',
 '[M+NH4]': '[M+NH3+H]+',
 '2M+Hac-H': '[2M+CH3COOH-H]-',
 '2M-H': '[2M-H]-',
@@ -120,6 +125,7 @@
 'M+Cl-': '[M+Cl]-',
 'M+Cl': '[M+Cl]-',
 '[M+Cl]': '[M+Cl]-',
+'[M+Cl]1-': '[M+Cl]-',
 'M+K-2H': '[M-2H+K]-',
 '[M-2H+K]': '[M-2H+K]-',
 'M-2H]': '[M-2H]2-',
@@ -128,6 +134,7 @@
 'M+Na-2H': '[M-2H+Na]-',
 '[M-2H+Na]': '[M-2H+Na]-',
 'M+Br': '[M+Br]-',
+'[M+Br]1-': '[M+Br]-',
 '3M-H': '[3M-H]-',
 '[3M-H]': '[3M-H]-',
 '[M+H+CH3OH]': '[M+CH3OH+H]+',
@@ -203,10 +210,10 @@ def filter_peaks_by_ratio_to_base_peak(spectrum, ratio_to_base_peak:float = 0.01
         new_intensity = []
         for index, intensity in enumerate(spectrum.intensity):
             if intensity >= float(ratio_to_base_peak) * base_peak:
-                new_mz.append(spectrum.mz[index])
+                new_mz.append(spectrum.mz_key[index]) # TODO, swap back once mz_key full integrated
                 new_intensity.append(intensity)
         
-        spectrum.mz = new_mz
+        spectrum.mz_key = new_mz
         spectrum.intensity = new_intensity
 
         return spectrum
@@ -218,12 +225,12 @@ def remove_larger_than_precursor_peaks(spectrum):
         
         new_mz = []
         new_intensity = []
-        for mz, intensity in zip(spectrum.mz, spectrum.intensity):
-            if mz < spectrum.precursor_mz * 0.99:
+        for mz, intensity in zip(spectrum.mz_key, spectrum.intensity): # TODO, swap back once mz_key full integrated
+            if mz < (spectrum.precursor_mz * 1e6)* 0.99:
                 new_mz.append(mz)
                 new_intensity.append(intensity)
         
-        spectrum.mz = new_mz
+        spectrum.mz_key = new_mz
         spectrum.intensity = new_intensity
 
         return spectrum
@@ -320,7 +327,6 @@ def get_data(identifier: str) -> dict:
             # Sort peaks if needed
             if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
                 data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
-
             return data
 
     link = "https://external.gnps2.org/gnpsspectrum?SpectrumID={}".format(identifier)
@@ -353,7 +359,6 @@ def get_data(identifier: str) -> dict:
     # Ensure peaks are sorted
     if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
         data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
-
     return data
 
 def load_helpers(
@@ -393,7 +398,13 @@ def load_helpers(
 def get_callbacks(app):
     
     @app.callback(
-        [Output('siteLocatorObj', 'data'),  Output('siriusData', 'children'), Output('peaksObj', 'data'), Output('fragmentsObj', 'data')], Output('error-input', 'children'),
+        [
+            Output('siteLocatorObj', 'data'),
+            Output('siriusData', 'children'),
+            Output('peaksObj', 'data'),
+            Output('fragmentsObj', 'data')
+         ],
+         Output('error-input', 'children'),
         Input('InputData', 'data'),
         )
     def calculate_module(data):
@@ -414,10 +425,10 @@ def calculate_module(data):
         spectrum2 = get_data(usi2)
         if spectrum1.get('adduct') is None:
             # Replace with adduct from data
-            spectrum1['adduct'] = data.get('adduct', None)
+            spectrum1['adduct'] = adduct_mapping[data['adduct']]    # Should raise error here if we don't know what it is
         if spectrum2.get('adduct') is None:
             # Replace with adduct from data
-            spectrum2['adduct'] = data.get('adduct', None)
+            spectrum2['adduct'] = adduct_mapping[data['adduct']]
 
         # TODO: What to do if adduct differs at this point?
 
@@ -449,8 +460,9 @@ def calculate_module(data):
             data["SMILES2"] = None
 
         try:
-            if data['adduct']:
-                args['adduct'] = data['adduct']
+            # Use known compound adduct
+            args['adduct'] = spectrum1.get('adduct', None)
+
             main_compound = Compound(
                 spectrum=spectrum1['peaks'],
                 precursor_mz=spectrum1['precursor_mz'],
@@ -490,48 +502,18 @@ def calculate_module(data):
             mod_compound.spectrum = remove_larger_than_precursor_peaks(mod_compound.spectrum)
 
         siteLocator = ModiFinder(main_compound, mod_compound, helpers=helper_compounds, **args)
-        
 
-        if mod_compound.structure is not None:
-            if not (mod_compound.structure.HasSubstructMatch(main_compound.structure) or main_compound.structure.HasSubstructMatch(mod_compound.structure)):
-                return None, None, None, None, "None of the structures are substructures of the other"
-            if mod_compound.structure.HasSubstructMatch(main_compound.structure) and main_compound.structure.HasSubstructMatch(mod_compound.structure):
-                return None, None, None, None, "Structures are the same"
+        peaksObj, fragmentsObj = siteLocator.get_result()
         
         siriusText = "SIRIUS data was not available"
-        # else:
-        #     print("SIRIUS data was not available", data['USI1'])
-        # if siteLocator.main_compound.Precursor_MZ > siteLocator.modified_compound.Precursor_MZ:
-        #     return None, "Molecule precursor mass is higher than modified precursor mass", siriusText
-        # else:
+       
         args = copy.deepcopy(data)
         # remove SMILES and USI from args
         args.pop('SMILES1', None)
         args.pop('SMILES2', None)
         args.pop('USI1', None)
         args.pop('USI2', None)
-        
-        main_compound_peaks = [(main_compound.spectrum.mz[i], main_compound.spectrum.intensity[i]) for i in range(len(main_compound.spectrum.mz))]
-        mod_compound_peaks = [(mod_compound.spectrum.mz[i], mod_compound.spectrum.intensity[i]) for i in range(len(mod_compound.spectrum.mz))]
-        matched_peaks = siteLocator.get_edge_detail(main_compound.id, mod_compound.id)
-        if matched_peaks is None:
-            matched_peaks = []
-        else:
-            matched_peaks = matched_peaks.get_matches_pairs()
-        peaksObj = {
-            "main_compound_peaks": main_compound_peaks,
-            "mod_compound_peaks": mod_compound_peaks,
-            "matched_peaks": matched_peaks,
-            "args": args,
-            "main_precursor_mz": main_compound.spectrum.precursor_mz,
-            "mod_precursor_mz": mod_compound.spectrum.precursor_mz,
-        }
-
-        fragmentsObj = {
-            "frags_map": main_compound.spectrum.peak_fragment_dict,
-            "structure": main_compound.structure,
-            "peaks": main_compound_peaks,
-            "Precursor_MZ": main_compound.spectrum.precursor_mz,
-        }
+
+        peaksObj.update({"args": args})
 
         return base64.b64encode(pickle.dumps(siteLocator)).decode(),  siriusText, base64.b64encode(pickle.dumps(peaksObj)).decode(), base64.b64encode(pickle.dumps(fragmentsObj)).decode(), None
\ No newline at end of file
diff --git a/Dash_interface/fragment_selection.py b/Dash_interface/fragment_selection.py
index 96ab102..0f370b6 100644
--- a/Dash_interface/fragment_selection.py
+++ b/Dash_interface/fragment_selection.py
@@ -88,7 +88,7 @@ def __init__(self, fragments_indicies, mol, info, aio_id=None, *args, **kwargs):
                 ),
             ],
         )
-        print("fragments_indicies", fragments_indicies)
+
         self.aio_id = aio_id
         super().__init__(
             children=[
diff --git a/ModiFinder_base b/ModiFinder_base
index 9001142..72f2455 160000
--- a/ModiFinder_base
+++ b/ModiFinder_base
@@ -1 +1 @@
-Subproject commit 9001142467cb1adf46de3d3e2dad85b7a38ac981
+Subproject commit 72f2455760453716abb85c1618f1c1f5752d3235

From f397350de9210314b5d8988fe27bbf99c2b1710c Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Tue, 24 Feb 2026 17:25:34 -0800
Subject: [PATCH 5/9] Deprecate USI and spectrum input for molcule drawer.

---
 pages/visualizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pages/visualizer.py b/pages/visualizer.py
index 3652ce2..51c4647 100644
--- a/pages/visualizer.py
+++ b/pages/visualizer.py
@@ -117,11 +117,11 @@
     html.H1("Molecule Drawer"),
     dbc.Card([
     dbc.InputGroup(
-            [dbc.InputGroupText('Smiles1'), dbc.Input(placeholder='SMILES, InChI, Spectrum ID, or USI',id='Mol1', value = "")],
+            [dbc.InputGroupText('Smiles1'), dbc.Input(placeholder='SMILES or InChI',id='Mol1', value = "")],
             style = {'width': '90vw', 'margin': '1vh auto'}
     ),
     dbc.InputGroup(
-            [dbc.InputGroupText('Smiles2'), dbc.Input(placeholder='SMILES, InChI, Spectrum ID, or USI',id='Mol2', value = "")],
+            [dbc.InputGroupText('Smiles2'), dbc.Input(placeholder='SMILES or InChI',id='Mol2', value = "")],
             style = {'width': '90vw', 'margin': '1vh auto'}
     ),
     dbc.Checklist(

From 952e612ebde478942d52e0dba29953caa967c8a7 Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Wed, 25 Feb 2026 11:27:36 -0800
Subject: [PATCH 6/9] Update visualizer.

---
 Dash_interface/computation_n.py | 412 +-------------------------------
 app_utils.py                    | 367 ++++++++++++++++++++++++++++
 pages/visualizer.py             |  16 +-
 3 files changed, 390 insertions(+), 405 deletions(-)
 create mode 100644 app_utils.py

diff --git a/Dash_interface/computation_n.py b/Dash_interface/computation_n.py
index 296f2c1..6e0c2b9 100644
--- a/Dash_interface/computation_n.py
+++ b/Dash_interface/computation_n.py
@@ -3,397 +3,12 @@
 from dash import Dash, html, dcc, Input, Output, State, dash_table
 import base64
 import pickle
-import json
 import copy
 from urllib.parse import quote
-from typing import List
-import sys
 
-import requests
 from modifinder import ModiFinder, Compound
-from rdkit import Chem
-from datetime import datetime
 
-adduct_mapping = {'M+H': '[M+H]+',
-'[M+H]': '[M+H]+',
-'[M+H]+': '[M+H]+',
-'[M+H]1+': '[M+H]+',
-'M+H]': '[M+H]+',
-'M+Na': '[M+Na]+',
-'[M+Na]': '[M+Na]+',
-'[M+Na]+': '[M+Na]+',
-'[M+Na]1+': '[M+Na]+',
-'2M+Na': '[2M+Na]+',
-'M2+Na': '[2M+Na]+',
-'[2M+Na]+': '[2M+Na]+',
-'[2M+Na]': '[2M+Na]+',
-'M+K': '[M+K]+',
-'[M+K]': '[M+K]+',
-'[M+K]+': '[M+K]+',
-'[M+K]1+': '[M+K]+',
-'[2M+K]+': '[2M+K]+',
-'2M+K': '[2M+K]+',
-'[2M+K]': '[2M+K]+',
-'M+H-H20': '[M-H2O+H]+',
-'M+H-H2O': '[M-H2O+H]+',
-'[M-H2O+H]+': '[M-H2O+H]+',
-'M-H20+H': '[M-H2O+H]+',
-'[M+H-H2O]+': '[M-H2O+H]+',
-'M-H2O+H': '[M-H2O+H]+',
-'M+H-2H2O': '[M-2H2O+H]+',
-'M-2H2O+H': '[M-2H2O+H]+',
-'[M-2H2O+H]+': '[M-2H2O+H]+',
-'M-2(H2O)+H': '[M-2H2O+H]+',
-'2M+Na-2H': '[2M-2H+Na]-',
-'2M-2H+Na': '[2M-2H+Na]-',
-'M-H': '[M-H]-',
-'[M-H]': '[M-H]-',
-'[M-H]-': '[M-H]-',
-'[M-H]1-': '[M-H]-',
-'M-H-': '[M-H]-',
-'M-H1': '[M-H]-',
-'3M+Na': '[3M+Na]+',
-'[3M+Na]+': '[3M+Na]+',
-'[M]+': '[M]+',
-'M+': '[M]+',
-'M-e': '[M]+',
-'M2+H': '[2M+H]+',
-'2M+H': '[2M+H]+',
-'[2M+H]+': '[2M+H]+',
-'[2M+H]': '[2M+H]+',
-'[M+2H]': '[M+2H]2+',
-'[M+2H]2+': '[M+2H]2+',
-'M+2H]': '[M+2H]2+',
-'M+2H+2': '[M+2H]2+',
-'M+2H': '[M+2H]2+',
-'M+acetate': '[M+CH3COOH-H]-',
-'M+CH3COOH-H': '[M+CH3COOH-H]-',
-'M+CH3COO': '[M+CH3COOH-H]-',
-'M+ACN+H': '[M+CH3CN+H]+',
-'[M+ACN+H]+': '[M+CH3CN+H]+',
-'[M+H+CH3CN]': '[M+CH3CN+H]+',
-'M+2Na': '[M+2Na]2+',
-'M+2Na]': '[M+2Na]2+',
-'M+HCOO': '[M+HCOOH-H]-',
-'[M-H+HCOOH]': '[M+HCOOH-H]-',
-'M+FA-H': '[M+HCOOH-H]-',
-'M+formate': '[M+HCOOH-H]-',
-'[M+H+HCOOH]': '[M+HCOOH-H]-',
-'2M+FA-H': '[2M+HCOOH-H]-',
-'[2M-H+HCOOH]': '[2M+HCOOH-H]-',
-'M+NH4': '[M+NH3+H]+',
-'[M+NH4]+': '[M+NH3+H]+',
-'[M+NH4]1+': '[M+NH3+H]+',
-'[M+NH4]': '[M+NH3+H]+',
-'2M+Hac-H': '[2M+CH3COOH-H]-',
-'2M-H': '[2M-H]-',
-'[2M-H]': '[2M-H]-',
-'2M+NH4': '[2M+NH3+H]+',
-'[2M+NH4]+': '[2M+NH3+H]+',
-'[2M+NH4]': '[2M+NH3+H]+',
-'[2M+Ca]2+': '[2M+Ca]2+',
-'[M+Ca]2+': '[M+Ca]2+',
-'[3M+Ca]2+': '[3M+Ca]2+',
-'[2M+Ca-H]+': '[2M-H+Ca]+',
-'[2M-H2O+H]+': '[2M-H2O+H]+',
-'[4M+Ca]2+': '[4M+Ca]2+',
-'[3M+NH4]+': '[3M+NH3+H]+',
-'3M+NH4': '[3M+NH3+H]+',
-'[2M-2H2O+H]+': '[2M-2H2O+H]+',
-'[M+ACN+NH4]+': '[M+CH3CN+NH3+H]+',
-'[5M+Ca]2+': '[5M+Ca]2+',
-'[3M+K]+': '[3M+K]+',
-'[3M+Ca-H]+': '[3M-H+Ca]2+',
-'[M-H+2Na]+': '[M-H+2Na]+',
-'M-H+2Na': '[M-H+2Na]+',
-'[M-3H2O+H]+': '[M-3H2O+H]+',
-'M-3H2O+H': '[M-3H2O+H]+',
-'[M-3H2O+2H]2+': '[M-3H2O+2H]2+',
-'[M-2H2O+2H]2+': '[M-2H2O+2H]2+',
-'[M-4H2O+H]+': '[M-4H2O+H]+',
-'[M-5H2O+H]+': '[M-5H2O+H]+',
-'[M+Ca-H]+': '[M+Ca-H]+',
-'[2M-H+2Na]+': '[2M-H+2Na]+',
-'[2M-3H2O+H]+': '[2M-3H2O+H]+',
-'[M+H+Na]2+': '[M+Na+H]2+',
-'[M-2H2O+NH4]+': '[M-2H2O+NH3+H]+',
-'[2M-2H+Na]': '[2M-2H+Na]-',
-'[M-H+CH3OH]': '[M+CH3OH-H]-',
-'M+MeOH-H': '[M+CH3OH-H]-',
-'M-H2O-H': '[M-H2O-H]-',
-'[M-H-H2O]': '[M-H2O-H]-',
-'M+Cl-': '[M+Cl]-',
-'M+Cl': '[M+Cl]-',
-'[M+Cl]': '[M+Cl]-',
-'[M+Cl]1-': '[M+Cl]-',
-'M+K-2H': '[M-2H+K]-',
-'[M-2H+K]': '[M-2H+K]-',
-'M-2H]': '[M-2H]2-',
-'M-2H': '[M-2H]2-',
-'M-2H-': '[M-2H]2-',
-'M+Na-2H': '[M-2H+Na]-',
-'[M-2H+Na]': '[M-2H+Na]-',
-'M+Br': '[M+Br]-',
-'[M+Br]1-': '[M+Br]-',
-'3M-H': '[3M-H]-',
-'[3M-H]': '[3M-H]-',
-'[M+H+CH3OH]': '[M+CH3OH+H]+',
-'M+CH3OH+H': '[M+CH3OH+H]+',
-'[2M+H+CH3CN]': '[2M+CH3CN+H]+',
-'M-CO2-H': '[M-CO2-H]-',
-'[2M-2H+K]': '[2M-2H+K]-',
-'2M+K-2H': '[2M-2H+K]-',
-'[M+Na+CH3CN]': '[M+CH3CN+Na]+',
-'M-H2+H': '[M-H2+H]-',
-'M-H+Cl]': '[M-H+Cl]2-',
-'M-H+Cl': '[M-H+Cl]2-',
-'3M+H': '[3M+H]+',
-'[3M+H]': '[3M+H]+',
-'M+H-NH3': '[M-NH3+H]+',
-'M-NH3+H': '[M-NH3+H]+',
-'M-H+C2H2O': '[M+C2H2O-H]-',
-'M+H-C2H2O': '[M+C2H2O-H]-',
-'M-H+CH2O2': '[M+CH2O2-H]-',
-'M+CH2O2-H': '[M+CH2O2-H]-',
-'M+TFA-H': '[M+C2HF3O2-H]-',
-'M-C2HF3O2-H': '[M+C2HF3O2-H]-',
-'[M]1+': '[M]1+'}
-
-
-gnps_keys_mapping = {
-    ## precursor
-    "precursor_mz": "precursor_mz",
-    ## charge
-    "precursor_charge": "precursor_charge",
-    "charge": "precursor_charge", 
-    ## smiles
-    "smiles": "smiles",
-    "smile": "smiles",
-    ## adduct
-    "adduct": "adduct",
-    ## peaks
-    "peaks": "peaks",
-    ## instrument
-    "instrument": "instrument",
-    ## name
-    "name": "name",
-    "compound_name": "name",
-    ## spectrum_id
-    "spectrum_id": "spectrum_id",
-    "spectrumid": "spectrum_id",
-    ## exact mass
-    "exact_mass": "exact_mass",
-    "exactmass": "exact_mass",
-    ## mz
-    "fragment_mz": "mz",
-    "mz": "mz",
-    "mzs": "mz",
-    ## intensity
-    "fragment_intensities": "intensity",
-    "intensities": "intensity",
-}
-
-def filter_peaks_by_ratio_to_base_peak(spectrum, ratio_to_base_peak:float = 0.01):
-        """Remove peaks with intensity lower than a given ratio to the base peak.
-        
-        Parameters
-        ----------
-        ratio_to_base_peak : float (0, 1), default is 0.01
-            The ratio to the base peak.
-        change_spectrum : bool, default is True
-            If True, the peaks with intensity lower than the given ratio will be removed in place.
-            If False, a new Spectrum object with the peaks removed will be returned.
-        """
-        
-        base_peak = max(spectrum.intensity)
-        new_mz = []
-        new_intensity = []
-        for index, intensity in enumerate(spectrum.intensity):
-            if intensity >= float(ratio_to_base_peak) * base_peak:
-                new_mz.append(spectrum.mz_key[index]) # TODO, swap back once mz_key full integrated
-                new_intensity.append(intensity)
-        
-        spectrum.mz_key = new_mz
-        spectrum.intensity = new_intensity
-
-        return spectrum
-
-def remove_larger_than_precursor_peaks(spectrum):
-        """
-        Remove peaks that are larger than the precursor m/z value.
-        """
-        
-        new_mz = []
-        new_intensity = []
-        for mz, intensity in zip(spectrum.mz_key, spectrum.intensity): # TODO, swap back once mz_key full integrated
-            if mz < (spectrum.precursor_mz * 1e6)* 0.99:
-                new_mz.append(mz)
-                new_intensity.append(intensity)
-        
-        spectrum.mz_key = new_mz
-        spectrum.intensity = new_intensity
-
-        return spectrum
-
-
-    
-def harmonize_spectrum_keys(data):
-    """
-    Parse the data to a universal format.
-
-    This function takes a dictionary of data and converts it into a universal format.
-    It processes specific keys like "peaks_json" and "Charge" differently, and attempts
-    to convert other values to floats. If the conversion to float is successful and the
-    key is "Charge", it further converts the value to an integer.
-
-    Args:
-        :data (dict): The input data dictionary to be parsed.
-
-    Returns:
-        :dict: A dictionary with keys converted to a universal format and values processed
-              accordingly.
-    """
-    def _convert_to_universal_key(key: str) -> str:
-        """
-        Convert different types of keys to universal keys.
-        This function standardizes various key names to a universal format. 
-
-        Args:
-            :key (str): The key to be converted.
-        
-        Returns:
-            :str: The converted key.
-        """
-        key = key.lower()
-        key = key.replace(" ", "_")
-        return gnps_keys_mapping.get(key, key)
-
-    res = {}
-    for key, value in data.items():
-        converted_key = _convert_to_universal_key(key)
-        if key == "peaks_json":
-            res['peaks'] = json.loads(value)
-        elif converted_key == "adduct":
-            res[converted_key] = adduct_mapping.get(value, value)
-        else:
-            try:
-                if converted_key in ["precursor_charge", "precursor_mz", "ms_level", "scan", "exact_mass"]:
-                    value = float(value)
-                if converted_key in ["precursor_charge", "charge", "ms_level"]:
-                        value = int(value)
-            except Exception:
-                raise ValueError(f"Could not convert {key} to number")
-            res[converted_key] = value
-    return res
-
-def get_from_metabolomics_resolver(identifier: str) -> dict:
-    """
-    Get partial data (ms2 data) from USI
-    param identifier: str - USI
-    return: dict - dictionary of data with keys: precursor_mz, precursor_charge, mz: list, intensity: list
-    """
-    url = 'https://metabolomics-usi.gnps2.org/json/' + "?usi1=" + identifier
-    try:
-        r = requests.get(url)
-        data = json.loads(r.text)
-    except:
-        raise Exception("Error in retrieving data from GNPS for identifier: {}, link: {}".format(identifier, url))
-
-    data = harmonize_spectrum_keys(data)
-    return data
-
-def get_data(identifier: str) -> dict:
-    """
-    Get data from GNPS, either from USI or Accession. if the identifier points to a known item in gnps,
-      it will return the full data, otherwise it will return partial data (ms2 data)
-    param identifier: str - USI or Accession
-    return: dict - dictionary of data
-    """
-
-    data = dict()
-    data['usi'] = None
-
-    if "mzspec" in identifier:                              # It's a USI
-        data['usi'] = identifier
-
-        if "accession" in identifier:                       #       It's a library spectrum
-            original_identifier = str(identifier)
-            identifier = identifier.split(":")[-1]
-        else:                                               #       It's a USI that isn't a library spectrum
-            data = get_from_metabolomics_resolver(identifier)
-            data['id'] = identifier
-            data = harmonize_spectrum_keys(data)
-
-            # Sort peaks if needed
-            if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
-                data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
-            return data
-
-    link = "https://external.gnps2.org/gnpsspectrum?SpectrumID={}".format(identifier)
-    try:
-        res = requests.get(link)
-        parsed = res.json()
-    except Exception:
-        data = get_from_metabolomics_resolver(original_identifier)
-        data['usi'] = original_identifier
-        data['id'] = identifier
-        data = harmonize_spectrum_keys(data)
-        return data
-
-    try:
-        data.update(parsed['annotations'][0])
-    except KeyError:
-        pass
-    try:
-        data.update(parsed['spectruminfo'])
-    except KeyError:
-        pass
-    try:
-        data['comments'] = parsed['comments']
-    except KeyError:
-        pass
-
-    data = harmonize_spectrum_keys(data)
-    data['id'] = identifier
-
-    # Ensure peaks are sorted
-    if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
-        data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
-    return data
-
-def load_helpers(
-        data: List[str],
-        ratio_to_base_peak: float = None,
-        remove_large_peaks: bool = True
-        ) -> List[Compound]:
-    """ Load helpers from a list of identifiers, failing gracefully if the smile string is invalid.
-    """
-    loaded_helpers = []
-    failed_helpers = []
-    for h in data:
-        try:
-            lh = get_data(h)
-            ch = Compound(
-                spectrum=lh['peaks'],
-                precursor_mz=lh['precursor_mz'],
-                precursor_charge=lh['precursor_charge'],
-                adduct=lh.get('adduct', None),
-                smiles=lh.get('smiles', None)
-            )
-            if ratio_to_base_peak:
-                ch.spectrum = filter_peaks_by_ratio_to_base_peak(ch.spectrum, ratio_to_base_peak=ratio_to_base_peak)
-            if remove_large_peaks:
-                ch.spectrum = remove_larger_than_precursor_peaks(ch.spectrum)
-            loaded_helpers.append(ch)
-        except Exception as e:
-            # Print the traceback
-            print(f"Error loading helper compound {h}: {str(e)}", flush=True)
-            traceback.print_exc(file=sys.stderr)
-            failed_helpers.append(h)
-            raise e
-    
-    print(f"Loaded {len(loaded_helpers)} helper compounds successfully. Failed to load {len(failed_helpers)} helper compounds: {failed_helpers}", flush=True)
-    return loaded_helpers
+from app_utils import get_data, load_helpers, filter_peaks_by_ratio_to_base_peak, adduct_mapping
 
 def get_callbacks(app):
     
@@ -436,7 +51,6 @@ def calculate_module(data):
 
         # Options propagated out of ModiFinder 
         ratio_to_base_peak = args.pop('filter_peaks_variable', None)
-        remove_large_peaks = True
         
         # Args to pass to ModiFinder
         args['ppm_tolerance'] = float(args['ppm_tolerance'])
@@ -450,7 +64,6 @@ def calculate_module(data):
         helper_compounds = load_helpers(
             helper_compounds,
             ratio_to_base_peak=ratio_to_base_peak,
-            remove_large_peaks=remove_large_peaks
         )
 
         if data["SMILES1"] == "" or data["SMILES1"] is None:
@@ -463,15 +76,22 @@ def calculate_module(data):
             # Use known compound adduct
             args['adduct'] = spectrum1.get('adduct', None)
 
+            spectrum1_peaks = spectrum1['peaks']
+            spectrum2_peaks = spectrum2['peaks']
+
+            if ratio_to_base_peak:
+                spectrum1_peaks = filter_peaks_by_ratio_to_base_peak(spectrum1_peaks, ratio_to_base_peak=ratio_to_base_peak)
+                spectrum2_peaks = filter_peaks_by_ratio_to_base_peak(spectrum2_peaks, ratio_to_base_peak=ratio_to_base_peak)
+
             main_compound = Compound(
-                spectrum=spectrum1['peaks'],
+                spectrum=spectrum1_peaks,
                 precursor_mz=spectrum1['precursor_mz'],
                 precursor_charge=spectrum1['precursor_charge'],
                 adduct=spectrum1['adduct'],
                 smiles=data["SMILES1"]
             )
             mod_compound = Compound(
-                spectrum=spectrum2['peaks'],
+                spectrum=spectrum2_peaks,
                 precursor_mz=spectrum2['precursor_mz'],
                 precursor_charge=spectrum2['precursor_charge'],
                 adduct=spectrum2['adduct'],
@@ -479,7 +99,6 @@ def calculate_module(data):
             )
             
         except Exception as e:
-            raise e
             # if exception is of type value error, return the error message
             if type(e) == ValueError:
                 return None, None, None, None, str(e)
@@ -489,17 +108,6 @@ def calculate_module(data):
         
         if main_compound.structure is None:
             return None, None, None, None, "Error loading SMILES1"
-        
-        # Perform actions for  ratio_to_base_peak filter
-        if ratio_to_base_peak:
-            ratio_to_base_peak = float(ratio_to_base_peak)
-            main_compound.spectrum = filter_peaks_by_ratio_to_base_peak(main_compound.spectrum, ratio_to_base_peak)
-            mod_compound.spectrum = filter_peaks_by_ratio_to_base_peak(mod_compound.spectrum, ratio_to_base_peak)
-
-        # Perform actions for remove_large_peaks filter
-        if remove_large_peaks:
-            main_compound.spectrum = remove_larger_than_precursor_peaks(main_compound.spectrum)
-            mod_compound.spectrum = remove_larger_than_precursor_peaks(mod_compound.spectrum)
 
         siteLocator = ModiFinder(main_compound, mod_compound, helpers=helper_compounds, **args)
 
diff --git a/app_utils.py b/app_utils.py
new file mode 100644
index 0000000..456b89a
--- /dev/null
+++ b/app_utils.py
@@ -0,0 +1,367 @@
+import sys
+import traceback
+import requests
+import json
+from typing import List, Tuple
+
+from modifinder import Compound
+
+adduct_mapping = {'M+H': '[M+H]+',
+'[M+H]': '[M+H]+',
+'[M+H]+': '[M+H]+',
+'[M+H]1+': '[M+H]+',
+'M+H]': '[M+H]+',
+'M+Na': '[M+Na]+',
+'[M+Na]': '[M+Na]+',
+'[M+Na]+': '[M+Na]+',
+'[M+Na]1+': '[M+Na]+',
+'2M+Na': '[2M+Na]+',
+'M2+Na': '[2M+Na]+',
+'[2M+Na]+': '[2M+Na]+',
+'[2M+Na]': '[2M+Na]+',
+'M+K': '[M+K]+',
+'[M+K]': '[M+K]+',
+'[M+K]+': '[M+K]+',
+'[M+K]1+': '[M+K]+',
+'[2M+K]+': '[2M+K]+',
+'2M+K': '[2M+K]+',
+'[2M+K]': '[2M+K]+',
+'M+H-H20': '[M-H2O+H]+',
+'M+H-H2O': '[M-H2O+H]+',
+'[M-H2O+H]+': '[M-H2O+H]+',
+'M-H20+H': '[M-H2O+H]+',
+'[M+H-H2O]+': '[M-H2O+H]+',
+'M-H2O+H': '[M-H2O+H]+',
+'M+H-2H2O': '[M-2H2O+H]+',
+'M-2H2O+H': '[M-2H2O+H]+',
+'[M-2H2O+H]+': '[M-2H2O+H]+',
+'M-2(H2O)+H': '[M-2H2O+H]+',
+'2M+Na-2H': '[2M-2H+Na]-',
+'2M-2H+Na': '[2M-2H+Na]-',
+'M-H': '[M-H]-',
+'[M-H]': '[M-H]-',
+'[M-H]-': '[M-H]-',
+'[M-H]1-': '[M-H]-',
+'M-H-': '[M-H]-',
+'M-H1': '[M-H]-',
+'3M+Na': '[3M+Na]+',
+'[3M+Na]+': '[3M+Na]+',
+'[M]+': '[M]+',
+'M+': '[M]+',
+'M-e': '[M]+',
+'M2+H': '[2M+H]+',
+'2M+H': '[2M+H]+',
+'[2M+H]+': '[2M+H]+',
+'[2M+H]': '[2M+H]+',
+'[M+2H]': '[M+2H]2+',
+'[M+2H]2+': '[M+2H]2+',
+'M+2H]': '[M+2H]2+',
+'M+2H+2': '[M+2H]2+',
+'M+2H': '[M+2H]2+',
+'M+acetate': '[M+CH3COOH-H]-',
+'M+CH3COOH-H': '[M+CH3COOH-H]-',
+'M+CH3COO': '[M+CH3COOH-H]-',
+'M+ACN+H': '[M+CH3CN+H]+',
+'[M+ACN+H]+': '[M+CH3CN+H]+',
+'[M+H+CH3CN]': '[M+CH3CN+H]+',
+'M+2Na': '[M+2Na]2+',
+'M+2Na]': '[M+2Na]2+',
+'M+HCOO': '[M+HCOOH-H]-',
+'[M-H+HCOOH]': '[M+HCOOH-H]-',
+'M+FA-H': '[M+HCOOH-H]-',
+'M+formate': '[M+HCOOH-H]-',
+'[M+H+HCOOH]': '[M+HCOOH-H]-',
+'2M+FA-H': '[2M+HCOOH-H]-',
+'[2M-H+HCOOH]': '[2M+HCOOH-H]-',
+'M+NH4': '[M+NH3+H]+',
+'[M+NH4]+': '[M+NH3+H]+',
+'[M+NH4]1+': '[M+NH3+H]+',
+'[M+NH4]': '[M+NH3+H]+',
+'2M+Hac-H': '[2M+CH3COOH-H]-',
+'2M-H': '[2M-H]-',
+'[2M-H]': '[2M-H]-',
+'2M+NH4': '[2M+NH3+H]+',
+'[2M+NH4]+': '[2M+NH3+H]+',
+'[2M+NH4]': '[2M+NH3+H]+',
+'[2M+Ca]2+': '[2M+Ca]2+',
+'[M+Ca]2+': '[M+Ca]2+',
+'[3M+Ca]2+': '[3M+Ca]2+',
+'[2M+Ca-H]+': '[2M-H+Ca]+',
+'[2M-H2O+H]+': '[2M-H2O+H]+',
+'[4M+Ca]2+': '[4M+Ca]2+',
+'[3M+NH4]+': '[3M+NH3+H]+',
+'3M+NH4': '[3M+NH3+H]+',
+'[2M-2H2O+H]+': '[2M-2H2O+H]+',
+'[M+ACN+NH4]+': '[M+CH3CN+NH3+H]+',
+'[5M+Ca]2+': '[5M+Ca]2+',
+'[3M+K]+': '[3M+K]+',
+'[3M+Ca-H]+': '[3M-H+Ca]2+',
+'[M-H+2Na]+': '[M-H+2Na]+',
+'M-H+2Na': '[M-H+2Na]+',
+'[M-3H2O+H]+': '[M-3H2O+H]+',
+'M-3H2O+H': '[M-3H2O+H]+',
+'[M-3H2O+2H]2+': '[M-3H2O+2H]2+',
+'[M-2H2O+2H]2+': '[M-2H2O+2H]2+',
+'[M-4H2O+H]+': '[M-4H2O+H]+',
+'[M-5H2O+H]+': '[M-5H2O+H]+',
+'[M+Ca-H]+': '[M+Ca-H]+',
+'[2M-H+2Na]+': '[2M-H+2Na]+',
+'[2M-3H2O+H]+': '[2M-3H2O+H]+',
+'[M+H+Na]2+': '[M+Na+H]2+',
+'[M-2H2O+NH4]+': '[M-2H2O+NH3+H]+',
+'[2M-2H+Na]': '[2M-2H+Na]-',
+'[M-H+CH3OH]': '[M+CH3OH-H]-',
+'M+MeOH-H': '[M+CH3OH-H]-',
+'M-H2O-H': '[M-H2O-H]-',
+'[M-H-H2O]': '[M-H2O-H]-',
+'M+Cl-': '[M+Cl]-',
+'M+Cl': '[M+Cl]-',
+'[M+Cl]': '[M+Cl]-',
+'[M+Cl]1-': '[M+Cl]-',
+'M+K-2H': '[M-2H+K]-',
+'[M-2H+K]': '[M-2H+K]-',
+'M-2H]': '[M-2H]2-',
+'M-2H': '[M-2H]2-',
+'M-2H-': '[M-2H]2-',
+'M+Na-2H': '[M-2H+Na]-',
+'[M-2H+Na]': '[M-2H+Na]-',
+'M+Br': '[M+Br]-',
+'[M+Br]1-': '[M+Br]-',
+'3M-H': '[3M-H]-',
+'[3M-H]': '[3M-H]-',
+'[M+H+CH3OH]': '[M+CH3OH+H]+',
+'M+CH3OH+H': '[M+CH3OH+H]+',
+'[2M+H+CH3CN]': '[2M+CH3CN+H]+',
+'M-CO2-H': '[M-CO2-H]-',
+'[2M-2H+K]': '[2M-2H+K]-',
+'2M+K-2H': '[2M-2H+K]-',
+'[M+Na+CH3CN]': '[M+CH3CN+Na]+',
+'M-H2+H': '[M-H2+H]-',
+'M-H+Cl]': '[M-H+Cl]2-',
+'M-H+Cl': '[M-H+Cl]2-',
+'3M+H': '[3M+H]+',
+'[3M+H]': '[3M+H]+',
+'M+H-NH3': '[M-NH3+H]+',
+'M-NH3+H': '[M-NH3+H]+',
+'M-H+C2H2O': '[M+C2H2O-H]-',
+'M+H-C2H2O': '[M+C2H2O-H]-',
+'M-H+CH2O2': '[M+CH2O2-H]-',
+'M+CH2O2-H': '[M+CH2O2-H]-',
+'M+TFA-H': '[M+C2HF3O2-H]-',
+'M-C2HF3O2-H': '[M+C2HF3O2-H]-',
+'[M]1+': '[M]1+'}
+
+
+gnps_keys_mapping = {
+    ## precursor
+    "precursor_mz": "precursor_mz",
+    ## charge
+    "precursor_charge": "precursor_charge",
+    "charge": "precursor_charge", 
+    ## smiles
+    "smiles": "smiles",
+    "smile": "smiles",
+    ## adduct
+    "adduct": "adduct",
+    ## peaks
+    "peaks": "peaks",
+    ## instrument
+    "instrument": "instrument",
+    ## name
+    "name": "name",
+    "compound_name": "name",
+    ## spectrum_id
+    "spectrum_id": "spectrum_id",
+    "spectrumid": "spectrum_id",
+    ## exact mass
+    "exact_mass": "exact_mass",
+    "exactmass": "exact_mass",
+    ## mz
+    "fragment_mz": "mz",
+    "mz": "mz",
+    "mzs": "mz",
+    ## intensity
+    "fragment_intensities": "intensity",
+    "intensities": "intensity",
+}
+
+def filter_peaks_by_ratio_to_base_peak(peaks:List[Tuple[float,float]], ratio_to_base_peak:float = 0.01):
+        """Remove peaks with intensity lower than a given ratio to the base peak.
+        
+        Parameters
+        ----------
+        peaks : List[Tuple[float, float]]
+            List of (mz, intensity) tuples representing the spectrum peaks.
+        ratio_to_base_peak : float (0, 1), default is 0.01
+            The ratio to the base peak.
+        """
+
+        base_peak_intensity = max([intensity for (mz, intensity) in peaks])
+        new_mz = []
+        new_intensity = []
+        for index, intensity in enumerate([peak[1] for peak in peaks]):
+            if intensity >= float(ratio_to_base_peak) * base_peak_intensity:
+                new_mz.append(peaks[index][0])
+                new_intensity.append(intensity)
+        
+        return list(zip(new_mz, new_intensity))
+    
+
+
+def get_data(identifier: str) -> dict:
+    """
+    Get data from GNPS, either from USI or Accession. if the identifier points to a known item in gnps,
+      it will return the full data, otherwise it will return partial data (ms2 data)
+    param identifier: str - USI or Accession
+    return: dict - dictionary of data
+    """
+
+    data = dict()
+    data['usi'] = None
+
+    if "mzspec" in identifier:                              # It's a USI
+        data['usi'] = identifier
+
+        if "accession" in identifier:                       #       It's a library spectrum
+            original_identifier = str(identifier)
+            identifier = identifier.split(":")[-1]
+        else:                                               #       It's a USI that isn't a library spectrum
+            data = _get_from_metabolomics_resolver(identifier)
+            data['id'] = identifier
+            data = _harmonize_spectrum_keys(data)
+
+            # Sort peaks if needed
+            if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
+                data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
+            return data
+
+    link = "https://external.gnps2.org/gnpsspectrum?SpectrumID={}".format(identifier)
+    try:
+        res = requests.get(link)
+        parsed = res.json()
+    except Exception:
+        data = _get_from_metabolomics_resolver(original_identifier)
+        data['usi'] = original_identifier
+        data['id'] = identifier
+        data = _harmonize_spectrum_keys(data)
+        return data
+
+    try:
+        data.update(parsed['annotations'][0])
+    except KeyError:
+        pass
+    try:
+        data.update(parsed['spectruminfo'])
+    except KeyError:
+        pass
+    try:
+        data['comments'] = parsed['comments']
+    except KeyError:
+        pass
+
+    data = _harmonize_spectrum_keys(data)
+    data['id'] = identifier
+
+    # Ensure peaks are sorted
+    if 'peaks' in data and isinstance(data['peaks'], list) and len(data['peaks']) > 0:
+        data['peaks'] = sorted(data['peaks'], key=lambda x: x[0])
+    return data
+
+def load_helpers(
+        data: List[str],
+        ratio_to_base_peak: float = None,
+        ) -> List[Compound]:
+    """ Load helpers from a list of identifiers, failing gracefully if the smile string is invalid.
+    """
+    loaded_helpers = []
+    failed_helpers = []
+    for h in data:
+        try:
+            lh = get_data(h)
+
+            if ratio_to_base_peak:
+                lh['peaks'] = filter_peaks_by_ratio_to_base_peak(lh['peaks'], ratio_to_base_peak=ratio_to_base_peak)
+
+            ch = Compound(
+                spectrum=lh['peaks'],
+                precursor_mz=lh['precursor_mz'],
+                precursor_charge=lh['precursor_charge'],
+                adduct=lh.get('adduct', None),
+                smiles=lh.get('smiles', None)
+            )
+
+            loaded_helpers.append(ch)
+        except Exception as e:
+            # Print the traceback
+            print(f"Error loading helper compound {h}: {str(e)}", flush=True)
+            traceback.print_exc(file=sys.stderr)
+            failed_helpers.append(h)
+            raise e
+    
+    print(f"Loaded {len(loaded_helpers)} helper compounds successfully. Failed to load {len(failed_helpers)} helper compounds: {failed_helpers}", flush=True)
+    return loaded_helpers
+
+def _harmonize_spectrum_keys(data):
+    """
+    Parse the data to a universal format.
+
+    This function takes a dictionary of data and converts it into a universal format.
+    It processes specific keys like "peaks_json" and "Charge" differently, and attempts
+    to convert other values to floats. If the conversion to float is successful and the
+    key is "Charge", it further converts the value to an integer.
+
+    Args:
+        :data (dict): The input data dictionary to be parsed.
+
+    Returns:
+        :dict: A dictionary with keys converted to a universal format and values processed
+              accordingly.
+    """
+    def _convert_to_universal_key(key: str) -> str:
+        """
+        Convert different types of keys to universal keys.
+        This function standardizes various key names to a universal format. 
+
+        Args:
+            :key (str): The key to be converted.
+        
+        Returns:
+            :str: The converted key.
+        """
+        key = key.lower()
+        key = key.replace(" ", "_")
+        return gnps_keys_mapping.get(key, key)
+
+    res = {}
+    for key, value in data.items():
+        converted_key = _convert_to_universal_key(key)
+        if key == "peaks_json":
+            res['peaks'] = json.loads(value)
+        elif converted_key == "adduct":
+            res[converted_key] = adduct_mapping.get(value, value)
+        else:
+            try:
+                if converted_key in ["precursor_charge", "precursor_mz", "ms_level", "scan", "exact_mass"]:
+                    value = float(value)
+                if converted_key in ["precursor_charge", "charge", "ms_level"]:
+                        value = int(value)
+            except Exception:
+                raise ValueError(f"Could not convert {key} to number")
+            res[converted_key] = value
+    return res
+
+def _get_from_metabolomics_resolver(identifier: str) -> dict:
+    """
+    Get partial data (ms2 data) from USI
+    param identifier: str - USI
+    return: dict - dictionary of data with keys: precursor_mz, precursor_charge, mz: list, intensity: list
+    """
+    url = 'https://metabolomics-usi.gnps2.org/json/' + "?usi1=" + identifier
+    try:
+        r = requests.get(url)
+        data = json.loads(r.text)
+    except:
+        raise Exception("Error in retrieving data from GNPS for identifier: {}, link: {}".format(identifier, url))
+
+    data = _harmonize_spectrum_keys(data)
+    return data
\ No newline at end of file
diff --git a/pages/visualizer.py b/pages/visualizer.py
index 51c4647..625605d 100644
--- a/pages/visualizer.py
+++ b/pages/visualizer.py
@@ -28,6 +28,8 @@
 from flask import Flask, send_file, request, jsonify
 import json
 from app import app
+from app_utils import get_data
+import traceback
 from furl import furl
 from myopic_mces import MCES
 
@@ -413,17 +415,25 @@ def update_spectra_output(Spec1, Spec2, boolean_inputs):
                 input = Spec2
             else:
                 input = Spec1
-            input = Spectrum(input, ignore_adduct_format=True)
+
+            data = get_data(input)
+            input = Spectrum(**data, ignore_adduct_format=True)
+
             png = mf_vis.draw_spectrum(input, **kwargs)
         else:
-            Spec1 = Spectrum(Spec1, ignore_adduct_format=True)
-            Spec2 = Spectrum(Spec2, ignore_adduct_format=True)
+
+            data1 = get_data(Spec1)
+            data2 = get_data(Spec2)
+
+            Spec1 = Spectrum(**data1, ignore_adduct_format=True)
+            Spec2 = Spectrum(**data2, ignore_adduct_format=True)
             cosine, matches = _cosine_fast(Spec1, Spec2, 0.1, 40, True)
             png = mf_vis.draw_alignment([Spec1, Spec2], [matches], **kwargs)
         
         img = png_to_showable_src(png)
         return html.Img(src=img, style={'margin': 'auto', 'height': '50vh'})
     except Exception as e:
+        print(traceback.format_exc(), file=sys.stderr)
         return str(e)
 
 

From 1659c14b3622078370bee5b544c50593c0866a3e Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Wed, 25 Feb 2026 11:28:21 -0800
Subject: [PATCH 7/9] Bump base version on remote.

---
 ModiFinder_base | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ModiFinder_base b/ModiFinder_base
index 72f2455..fff4fde 160000
--- a/ModiFinder_base
+++ b/ModiFinder_base
@@ -1 +1 @@
-Subproject commit 72f2455760453716abb85c1618f1c1f5752d3235
+Subproject commit fff4fde7e6a69ffdf0d7955fc3208f964379af62

From 94bd53d7d9cecd8bcf1f9af4f59300198be06161 Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Wed, 25 Feb 2026 11:43:30 -0800
Subject: [PATCH 8/9] Sync base version.

---
 ModiFinder_base | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ModiFinder_base b/ModiFinder_base
index fff4fde..a511460 160000
--- a/ModiFinder_base
+++ b/ModiFinder_base
@@ -1 +1 @@
-Subproject commit fff4fde7e6a69ffdf0d7955fc3208f964379af62
+Subproject commit a511460410faf57f9fb5303f5843d4cc2094c3bf

From f8ef7e5f29f86f47c7b080ff833d3cfe5942faba Mon Sep 17 00:00:00 2001
From: Michael Strobel <lidstromfan29@gmail.com>
Date: Wed, 25 Feb 2026 11:51:29 -0800
Subject: [PATCH 9/9] Update prod data path.

---
 docker-compose-prod.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml
index d90a97f..ae5b0c9 100644
--- a/docker-compose-prod.yml
+++ b/docker-compose-prod.yml
@@ -2,5 +2,5 @@ version: '3'
 services:
   mod-site:
     volumes:
-      - /home/user/LabData/Reza/data:/app/data:rw
+      - /nas-services/data_resources/modifinder:/app/data:ro
     
\ No newline at end of file