From ada3654dfa741ac116bb45f3dc9a68d2842c5102 Mon Sep 17 00:00:00 2001 From: matteopilz Date: Mon, 1 Sep 2025 09:30:29 +0200 Subject: [PATCH 1/2] change to PeptideIdentificationList --- docs/source/user_guide/PSM_to_features.rst | 2 +- docs/source/user_guide/export_files_GNPS.rst | 2 +- .../source/user_guide/export_pandas_dataframe.rst | 2 +- docs/source/user_guide/identification_data.rst | 5 +++-- docs/source/user_guide/interactive_plots.rst | 15 ++++++--------- docs/source/user_guide/other_ms_data_formats.rst | 6 +++--- docs/source/user_guide/peptide_search.rst | 10 +++++----- docs/source/user_guide/quality_control.rst | 2 +- .../untargeted_metabolomics_preprocessing.rst | 4 ++-- requirements.txt | 1 + 10 files changed, 24 insertions(+), 25 deletions(-) diff --git a/docs/source/user_guide/PSM_to_features.rst b/docs/source/user_guide/PSM_to_features.rst index a17b053d3..58effd90b 100644 --- a/docs/source/user_guide/PSM_to_features.rst +++ b/docs/source/user_guide/PSM_to_features.rst @@ -45,7 +45,7 @@ Next, load the PeptideIdentifications from an `.idXML` file: .. code-block:: python - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() protein_ids = [] oms.IdXMLFile().load(idxml_file, protein_ids, peptide_ids) diff --git a/docs/source/user_guide/export_files_GNPS.rst b/docs/source/user_guide/export_files_GNPS.rst index 100e6f212..172b489ff 100644 --- a/docs/source/user_guide/export_files_GNPS.rst +++ b/docs/source/user_guide/export_files_GNPS.rst @@ -47,7 +47,7 @@ from your :py:class:`~.ConsensusMap` that have no :term:`MS2` spectra annotated. filtered_map = oms.ConsensusMap(consensus_map) filtered_map.clear(False) for feature in consensus_map: - if feature.getPeptideIdentifications(): + if feature.getPeptideIdentifications().size() > 0: filtered_map.push_back(feature) consensusXML_file = "filtered.consensusXML" diff --git a/docs/source/user_guide/export_pandas_dataframe.rst b/docs/source/user_guide/export_pandas_dataframe.rst index 5d38de028..17f6a3de7 100644 --- a/docs/source/user_guide/export_pandas_dataframe.rst +++ b/docs/source/user_guide/export_pandas_dataframe.rst @@ -110,7 +110,7 @@ PeptideIdentification urlretrieve(url + "small.idXML", "small.idXML") prot_ids = [] - pep_ids = [] + pep_ids = oms.PeptideIdentificationList() oms.IdXMLFile().load("small.idXML", prot_ids, pep_ids) df = oms.peptide_identifications_to_df(pep_ids) diff --git a/docs/source/user_guide/identification_data.rst b/docs/source/user_guide/identification_data.rst index e05a78872..9d3fe5589 100644 --- a/docs/source/user_guide/identification_data.rst +++ b/docs/source/user_guide/identification_data.rst @@ -161,7 +161,8 @@ We can now display the peptides we just stored: :linenos: # Iterate over PeptideIdentification - peptide_ids = [peptide_id] + peptide_ids = oms.PeptideIdentificationList() + peptide_ids.push_back(peptide_id) for peptide_id in peptide_ids: # Peptide identification values print("Peptide ID m/z:", peptide_id.getMZ()) @@ -193,7 +194,7 @@ discussed :ref:`anchor-other-id-data`) which we would do as follows: oms.IdXMLFile().store("out.idXML", [protein_id], peptide_ids) # and load it back into memory prot_ids = [] - pep_ids = [] + pep_ids = oms.PeptideIdentificationList() oms.IdXMLFile().load("out.idXML", prot_ids, pep_ids) # Iterate over all protein hits diff --git a/docs/source/user_guide/interactive_plots.rst b/docs/source/user_guide/interactive_plots.rst index 428981568..73d14dfbe 100644 --- a/docs/source/user_guide/interactive_plots.rst +++ b/docs/source/user_guide/interactive_plots.rst @@ -75,17 +75,14 @@ interactively zoomed-in if you execute the code in a notebook min_alpha=0, ) .opts(active_tools=["box_zoom"], tools=["hover"], hooks=[new_bounds_hook]) - .opts( # weird.. I have no idea why one has to do this. But with one opts you will get an error - plot=dict( - width=800, - height=800, - xlabel="Retention time (s)", - ylabel="mass/charge (Da)", - ) - ) ) - hd.dynspread(raster, threshold=0.7, how="add", shape="square") +hd.dynspread(raster, threshold=0.7, how="add", shape="square").opts( + width=800, + height=800, + xlabel="Retention time (s)", + ylabel="mass/charge (Da)", +) Result: diff --git a/docs/source/user_guide/other_ms_data_formats.rst b/docs/source/user_guide/other_ms_data_formats.rst index f1268afb6..2de416e71 100644 --- a/docs/source/user_guide/other_ms_data_formats.rst +++ b/docs/source/user_guide/other_ms_data_formats.rst @@ -17,7 +17,7 @@ You can store and load identification data from an `idXML` file as follows: gh = gh = "https://raw.githubusercontent.com/OpenMS/pyopenms-docs/master" urlretrieve(gh + "/src/data/IdXMLFile_whole.idXML", "test.idXML") protein_ids = [] - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() oms.IdXMLFile().load("test.idXML", protein_ids, peptide_ids) oms.IdXMLFile().store("test.out.idXML", protein_ids, peptide_ids) @@ -31,7 +31,7 @@ You can store and load identification data from an `mzIdentML` file as follows: gh = gh = "https://raw.githubusercontent.com/OpenMS/pyopenms-docs/master" urlretrieve(gh + "/src/data/MzIdentML_3runs.mzid", "test.mzid") protein_ids = [] - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() oms.MzIdentMLFile().load("test.mzid", protein_ids, peptide_ids) oms.MzIdentMLFile().store("test.out.mzid", protein_ids, peptide_ids) .. # alternatively: -- dont do this, doesnt work @@ -48,7 +48,7 @@ You can store and load identification data from a TPP `pepXML` file as follows: gh = gh = "https://raw.githubusercontent.com/OpenMS/pyopenms-docs/master" urlretrieve(gh + "/src/data/PepXMLFile_test.pepxml", "test.pepxml") protein_ids = [] - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() oms.PepXMLFile().load("test.pepxml", protein_ids, peptide_ids) oms.PepXMLFile().store("test.out.pepxml", protein_ids, peptide_ids) diff --git a/docs/source/user_guide/peptide_search.rst b/docs/source/user_guide/peptide_search.rst index 3d144064b..c30c2950a 100644 --- a/docs/source/user_guide/peptide_search.rst +++ b/docs/source/user_guide/peptide_search.rst @@ -31,7 +31,7 @@ a fasta database of protein sequences: urlretrieve(gh + "/src/data/SimpleSearchEngine_1.mzML", "searchfile.mzML") urlretrieve(gh + "/src/data/SimpleSearchEngine_1.fasta", "search.fasta") protein_ids = [] - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() oms.SimpleSearchEngineAlgorithm().search( "searchfile.mzML", "search.fasta", protein_ids, peptide_ids ) @@ -143,9 +143,9 @@ ppm\ (\pm 2\ ppm)`, we expect that we will not find the hit at :math:`775.38` m/ salgo.setParameters(p) protein_ids = [] - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() salgo.search("searchfile.mzML", "search.fasta", protein_ids, peptide_ids) - print("Found", len(peptide_ids), "peptides") + print("Found", peptide_ids.size(), "peptides") As we can see, using a smaller precursor mass tolerance leads the algorithm to find only one hit instead of two. Similarly, if we use the wrong enzyme for @@ -189,7 +189,7 @@ Now include some additional decoy database generation step as well as subsequent # Run SimpleSearchAlgorithm, store protein and peptide ids protein_ids = [] - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() # set some custom search parameters simplesearch = oms.SimpleSearchEngineAlgorithm() @@ -224,7 +224,7 @@ This is done by applying one of the available protein inference algorithms on th :linenos: protein_ids = [] - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() # Re-run search since we need to keep decoy hits for inference simplesearch.search(searchfile, target_decoy_database, protein_ids, peptide_ids) diff --git a/docs/source/user_guide/quality_control.rst b/docs/source/user_guide/quality_control.rst index 6cf0fa639..21cab536f 100644 --- a/docs/source/user_guide/quality_control.rst +++ b/docs/source/user_guide/quality_control.rst @@ -43,7 +43,7 @@ proteomics and metabolomics quality metrics. oms.FeatureXMLFile().load("features.featureXML", feature_map) prot_ids = [] # list of ProteinIdentification() - pep_ids = [] # list of PeptideIdentification() + pep_ids = oms.PeptideIdentificationList() # list of PeptideIdentification() # OPTIONAL: get protein and peptide identifications from idXML file urlretrieve(gh + "/src/data/OpenPepXL_output.idXML", "ids.idXML") oms.IdXMLFile().load("ids.idXML", prot_ids, pep_ids) diff --git a/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst b/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst index 7064019fc..a0e8ebd0d 100644 --- a/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst +++ b/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst @@ -141,7 +141,7 @@ Map :term:`MS2` spectra to features as :py:class:`~.PeptideIdentification` objec if feature_map.getMetaValue("spectra_data")[ 0 ].decode() == exp.getMetaValue("mzML_path"): - peptide_ids = [] + peptide_ids = oms.PeptideIdentificationList() protein_ids = [] mapper.annotate( feature_map, @@ -161,7 +161,7 @@ Map :term:`MS2` spectra to features as :py:class:`~.PeptideIdentification` objec prot_ids.append(prot_id) fm_new.setProteinIdentifications(prot_ids) for feature in feature_map: - pep_ids = [] + pep_ids = oms.PeptideIdentificationList() for pep_id in feature.getPeptideIdentifications(): pep_id.setIdentifier(f"Identifier_{i}") pep_ids.append(pep_id) diff --git a/requirements.txt b/requirements.txt index 76a798e56..8b58c040b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ scikit-learn tabulate requests bokeh +jupyter_bokeh datashader holoviews pyviz_comms From 132f3f78fbae7fd698e4ba803bb6bf632f1c0c15 Mon Sep 17 00:00:00 2001 From: matteopilz Date: Mon, 1 Sep 2025 15:05:40 +0200 Subject: [PATCH 2/2] update & bug fix --- docs/source/user_guide/interactive_plots.rst | 2 +- .../source/user_guide/untargeted_metabolomics_preprocessing.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user_guide/interactive_plots.rst b/docs/source/user_guide/interactive_plots.rst index 73d14dfbe..207f615cb 100644 --- a/docs/source/user_guide/interactive_plots.rst +++ b/docs/source/user_guide/interactive_plots.rst @@ -35,7 +35,7 @@ interactively zoomed-in if you execute the code in a notebook exp.updateRanges() expandcols = ["RT", "mz", "inty"] spectraarrs2d = exp.get2DPeakDataLong( - exp.getMinRT(), exp.getMaxRT(), exp.getMinMZ(), exp.getMaxMZ() + exp.getMinRT(), exp.getMaxRT(), exp.getMinMZ(), exp.getMaxMZ(), 1 ) spectradf = pd.DataFrame(dict(zip(expandcols, spectraarrs2d))) spectradf = spectradf.set_index(["RT", "mz"]) diff --git a/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst b/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst index a0e8ebd0d..91144032b 100644 --- a/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst +++ b/docs/source/user_guide/untargeted_metabolomics_preprocessing.rst @@ -164,7 +164,7 @@ Map :term:`MS2` spectra to features as :py:class:`~.PeptideIdentification` objec pep_ids = oms.PeptideIdentificationList() for pep_id in feature.getPeptideIdentifications(): pep_id.setIdentifier(f"Identifier_{i}") - pep_ids.append(pep_id) + pep_ids.push_back(pep_id) feature.setPeptideIdentifications(pep_ids) fm_new.push_back(feature) feature_maps_mapped.append(fm_new)