diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5cf3657..0e59430 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -20,7 +20,7 @@ jobs: - name: install python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: install package and dependencies run: pip install -e . && pip install -r test_requirements.txt diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5b18141..ee6c00e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,13 @@ All notable changes to this project will be documented in this file. The format is based on `Keep a Changelog `_. +6.17 +---- +- Fixed bug in ``PolyclonalAverage`` due to epitope harmonization when sequential integer sites are being used, see `here `_. This fix may only work when just one epitope is being used, with multiple epitopes there still may be issues with how the sites are assigned: + + Do not mutate the input ``models_df`` in ``PolyclonalAverage``; make a copy + + When there is just one epitope, return a deepcopy of self when harmonizing epitopes +- Test on Python 3.12 rather than 3.11. + 6.16 ---- - Compute standard deviations for ``PolyclonalCollection`` using population rather than sample standard deviations. This changes the values of these standard deviations (makes them smaller), makes them zero rather than NaN when only one model being averaged, and fixes problem with ``PolyclonalCollection`` plots when only a single model. diff --git a/notebooks/reference_site_numbering.ipynb b/notebooks/reference_site_numbering.ipynb index 55750ca..8d41a21 100644 --- a/notebooks/reference_site_numbering.ipynb +++ b/notebooks/reference_site_numbering.ipynb @@ -1200,7 +1200,7 @@ "pd.testing.assert_frame_equal(\n", " mut_escape,\n", " mut_escape_sequential,\n", - " atol=1.5,\n", + " atol=2.5,\n", ")\n", "assert 0.99 < mut_escape[\"escape\"].corr(mut_escape_sequential[\"escape\"])" ] diff --git a/polyclonal/__init__.py b/polyclonal/__init__.py index 8be3166..9649787 100644 --- a/polyclonal/__init__.py +++ b/polyclonal/__init__.py @@ -31,7 +31,7 @@ __author__ = "`the Bloom lab `_" __email__ = "jbloom@fredhutch.org" -__version__ = "6.16" +__version__ = "6.17" __url__ = "https://github.com/jbloomlab/polyclonal" from polyclonal.alphabets import AAS diff --git a/polyclonal/pdb_utils.py b/polyclonal/pdb_utils.py index c80f731..65609ad 100644 --- a/polyclonal/pdb_utils.py +++ b/polyclonal/pdb_utils.py @@ -240,13 +240,13 @@ def reassign_b_factor( Now spot check some key lines in the output PDB. Chain A has all sites with B factors (last entry) re-assigned to 0: - >>> print(pdb_text[0].strip()) + >>> print(pdb_text[0].strip()) # doctest: +NORMALIZE_WHITESPACE ATOM 1 N SER A 19 -31.455 49.474 2.505 1.00 0.00 N Chain E has sites 333 and 334 with B-factors assigned to values in `df`, and other sites (such as 335) assigned to -1: - >>> print('\n'.join(line.strip() for line in pdb_text[5010: 5025])) + >>> print('\n'.join(line.strip() for line in pdb_text[5010: 5025])) # doctest: +NORMALIZE_WHITESPACE ATOM 5010 O THR E 333 -34.954 13.568 46.370 1.00 0.50 O ATOM 5011 CB THR E 333 -33.695 14.409 48.627 1.00 0.50 C ATOM 5012 OG1 THR E 333 -34.797 14.149 49.507 1.00 0.50 O diff --git a/polyclonal/polyclonal.py b/polyclonal/polyclonal.py index 9f02f20..a4b9beb 100644 --- a/polyclonal/polyclonal.py +++ b/polyclonal/polyclonal.py @@ -661,7 +661,10 @@ class Polyclonal: self_initial_epitope self_harmonized_epitope ref_epitope correlation 0 e1 e1 e1 1.0 1 e2 e2 e2 1.0 - >>> assert model.mut_escape_df.equals(model_harmonized.mut_escape_df) + >>> if not model.mut_escape_df.equals(model_harmonized.mut_escape_df): + ... raise ValueError( + ... f"{model.mut_escape_df=}\n{model_harmonized.mut_escape_df=}" + ... ) >>> inverted_harmonized, harmonize_df = inverted_model.epitope_harmonized_model( ... ref_model @@ -3259,6 +3262,10 @@ def epitope_harmonized_model(self, ref_poly): f"cannot harmonize 1-to-1:\n{corr_df=}\n{harmonize_df=}" ) + # if only one epitope, do not need to do anything more + if len(self.epitopes) == 1: + return copy.deepcopy(self), harmonize_df + map_dict = harmonize_df.set_index("self_initial_epitope")[ "self_harmonized_epitope" ].to_dict() diff --git a/polyclonal/polyclonal_collection.py b/polyclonal/polyclonal_collection.py index 3bdda23..4598cb7 100644 --- a/polyclonal/polyclonal_collection.py +++ b/polyclonal/polyclonal_collection.py @@ -1561,9 +1561,11 @@ def __init__( if harmonize_to is None: harmonize_to = models_df.iloc[0]["model"] - models_df["model"] = [ - m.epitope_harmonized_model(harmonize_to)[0] for m in models_df["model"] - ] + models_df = models_df.assign( + model=[ + m.epitope_harmonized_model(harmonize_to)[0] for m in models_df["model"] + ] + ) super().__init__( models_df, region_col=region_col, default_avg_to_plot=default_avg_to_plot