From c3b07d253be208ee2b4163560440a5878cbb6490 Mon Sep 17 00:00:00 2001
From: jbloom <jbloom@fredhutch.org>
Date: Thu, 11 Dec 2025 16:38:41 -0800
Subject: [PATCH 1/6] Fixe bug in `PolyclonalAverage` when sequential integer
 sites are being used

See [here](https://github.com/dms-vep/MERS-Spike-EMC2012-DMS/issues/21)
---
 CHANGELOG.rst            | 4 ++++
 polyclonal/__init__.py   | 2 +-
 polyclonal/polyclonal.py | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5b18141..66a642a 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,10 @@ All notable changes to this project will be documented in this file.
 
 The format is based on `Keep a Changelog <https://keepachangelog.com>`_.
 
+6.17
+----
+- Fixed bug in ``PolyclonalAverage`` when sequential integer sites are being used, see `here <https://github.com/dms-vep/MERS-Spike-EMC2012-DMS/issues/21>`_.
+
 6.16
 ----
 - Compute standard deviations for ``PolyclonalCollection`` using population rather than sample standard deviations. This changes the values of these standard deviations (makes them smaller), makes them zero rather than NaN when only one model being averaged, and fixes problem with ``PolyclonalCollection`` plots when only a single model.
diff --git a/polyclonal/__init__.py b/polyclonal/__init__.py
index 8be3166..9649787 100644
--- a/polyclonal/__init__.py
+++ b/polyclonal/__init__.py
@@ -31,7 +31,7 @@
 
 __author__ = "`the Bloom lab <https://jbloomlab.org>`_"
 __email__ = "jbloom@fredhutch.org"
-__version__ = "6.16"
+__version__ = "6.17"
 __url__ = "https://github.com/jbloomlab/polyclonal"
 
 from polyclonal.alphabets import AAS
diff --git a/polyclonal/polyclonal.py b/polyclonal/polyclonal.py
index 9f02f20..7e0b624 100644
--- a/polyclonal/polyclonal.py
+++ b/polyclonal/polyclonal.py
@@ -3297,7 +3297,7 @@ def epitope_harmonized_model(self, ref_poly):
             alphabet=self.alphabet,
             epitope_colors=ref_poly.epitope_colors,
             data_mut_escape_overlap="exact_match",  # should be exact match in self
-            sites=None if self.sequential_integer_sites else self.sites,
+            sites=self.sites,
         )
         assert ref_poly.epitopes == harmonized_model.epitopes
         return harmonized_model, harmonize_df

From b348381e3f0c102c46fcce9125e92dbf1c2f8f92 Mon Sep 17 00:00:00 2001
From: jbloom <jbloom@fredhutch.org>
Date: Thu, 11 Dec 2025 19:47:33 -0800
Subject: [PATCH 2/6] test on Python 3.12 rather than 3.11

---
 .github/workflows/test.yaml | 2 +-
 CHANGELOG.rst               | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 5cf3657..0e59430 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -20,7 +20,7 @@ jobs:
       - name: install python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: install package and dependencies
         run: pip install -e . && pip install -r test_requirements.txt
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 66a642a..6008132 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,6 +9,7 @@ The format is based on `Keep a Changelog <https://keepachangelog.com>`_.
 6.17
 ----
 - Fixed bug in ``PolyclonalAverage`` when sequential integer sites are being used, see `here <https://github.com/dms-vep/MERS-Spike-EMC2012-DMS/issues/21>`_.
+- Test on Python 3.12 rather than 3.11.
 
 6.16
 ----

From 3e38b55a2b31bcda52064dede33b58077229a39f Mon Sep 17 00:00:00 2001
From: jbloom <jbloom@fredhutch.org>
Date: Thu, 11 Dec 2025 19:47:48 -0800
Subject: [PATCH 3/6] make `pdb_utils` pass doctest

---
 polyclonal/pdb_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/polyclonal/pdb_utils.py b/polyclonal/pdb_utils.py
index c80f731..65609ad 100644
--- a/polyclonal/pdb_utils.py
+++ b/polyclonal/pdb_utils.py
@@ -240,13 +240,13 @@ def reassign_b_factor(
     Now spot check some key lines in the output PDB.
     Chain A has all sites with B factors (last entry) re-assigned to 0:
 
-    >>> print(pdb_text[0].strip())
+    >>> print(pdb_text[0].strip())  # doctest: +NORMALIZE_WHITESPACE
     ATOM      1  N   SER A  19     -31.455  49.474   2.505  1.00  0.00           N
 
     Chain E has sites 333 and 334 with B-factors assigned to values in `df`, and
     other sites (such as 335) assigned to -1:
 
-    >>> print('\n'.join(line.strip() for line in pdb_text[5010: 5025]))
+    >>> print('\n'.join(line.strip() for line in pdb_text[5010: 5025]))  # doctest: +NORMALIZE_WHITESPACE
     ATOM   5010  O   THR E 333     -34.954  13.568  46.370  1.00  0.50           O
     ATOM   5011  CB  THR E 333     -33.695  14.409  48.627  1.00  0.50           C
     ATOM   5012  OG1 THR E 333     -34.797  14.149  49.507  1.00  0.50           O

From 5cb9ec9f68d677cdc13837f47cf4057d84462a88 Mon Sep 17 00:00:00 2001
From: jbloom <jbloom@fredhutch.org>
Date: Fri, 12 Dec 2025 04:37:59 -0800
Subject: [PATCH 4/6] do not mutate `models_df` in `PolyclonalAverage`, make
 copy

---
 CHANGELOG.rst                       | 3 ++-
 polyclonal/polyclonal_collection.py | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 6008132..384261c 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,7 +8,8 @@ The format is based on `Keep a Changelog <https://keepachangelog.com>`_.
 
 6.17
 ----
-- Fixed bug in ``PolyclonalAverage`` when sequential integer sites are being used, see `here <https://github.com/dms-vep/MERS-Spike-EMC2012-DMS/issues/21>`_.
+- Fixed bug in ``PolyclonalAverage`` when sequential integer sites are being used, see `here <https://github.com/dms-vep/MERS-Spike-EMC2012-DMS/issues/21>`_:
+  + Do not mutate the input ``models_df`` in ``PolyclonalAverage``; make a copy
 - Test on Python 3.12 rather than 3.11.
 
 6.16
diff --git a/polyclonal/polyclonal_collection.py b/polyclonal/polyclonal_collection.py
index 3bdda23..4598cb7 100644
--- a/polyclonal/polyclonal_collection.py
+++ b/polyclonal/polyclonal_collection.py
@@ -1561,9 +1561,11 @@ def __init__(
         if harmonize_to is None:
             harmonize_to = models_df.iloc[0]["model"]
 
-        models_df["model"] = [
-            m.epitope_harmonized_model(harmonize_to)[0] for m in models_df["model"]
-        ]
+        models_df = models_df.assign(
+            model=[
+                m.epitope_harmonized_model(harmonize_to)[0] for m in models_df["model"]
+            ]
+        )
 
         super().__init__(
             models_df, region_col=region_col, default_avg_to_plot=default_avg_to_plot

From b99e9b939d744c9d419c1c3f498dbaf0d00c5b4b Mon Sep 17 00:00:00 2001
From: jbloom <jbloom@fredhutch.org>
Date: Fri, 12 Dec 2025 05:06:42 -0800
Subject: [PATCH 5/6] epitope harmonization returns deepcopy of original model
 if just one epitope, reverted earlier change to how sequential sites handled

---
 CHANGELOG.rst            |  3 ++-
 polyclonal/polyclonal.py | 11 +++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 384261c..ee6c00e 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,8 +8,9 @@ The format is based on `Keep a Changelog <https://keepachangelog.com>`_.
 
 6.17
 ----
-- Fixed bug in ``PolyclonalAverage`` when sequential integer sites are being used, see `here <https://github.com/dms-vep/MERS-Spike-EMC2012-DMS/issues/21>`_:
+- Fixed bug in ``PolyclonalAverage`` due to epitope harmonization when sequential integer sites are being used, see `here <https://github.com/dms-vep/MERS-Spike-EMC2012-DMS/issues/21>`_. This fix may only work when just one epitope is being used, with multiple epitopes there still may be issues with how the sites are assigned:
   + Do not mutate the input ``models_df`` in ``PolyclonalAverage``; make a copy
+  + When there is just one epitope, return a deepcopy of self when harmonizing epitopes
 - Test on Python 3.12 rather than 3.11.
 
 6.16
diff --git a/polyclonal/polyclonal.py b/polyclonal/polyclonal.py
index 7e0b624..a4b9beb 100644
--- a/polyclonal/polyclonal.py
+++ b/polyclonal/polyclonal.py
@@ -661,7 +661,10 @@ class Polyclonal:
       self_initial_epitope self_harmonized_epitope ref_epitope  correlation
     0                   e1                      e1          e1          1.0
     1                   e2                      e2          e2          1.0
-    >>> assert model.mut_escape_df.equals(model_harmonized.mut_escape_df)
+    >>> if not model.mut_escape_df.equals(model_harmonized.mut_escape_df):
+    ...     raise ValueError(
+    ...         f"{model.mut_escape_df=}\n{model_harmonized.mut_escape_df=}"
+    ...     )
 
     >>> inverted_harmonized, harmonize_df = inverted_model.epitope_harmonized_model(
     ...     ref_model
@@ -3259,6 +3262,10 @@ def epitope_harmonized_model(self, ref_poly):
                 f"cannot harmonize 1-to-1:\n{corr_df=}\n{harmonize_df=}"
             )
 
+        # if only one epitope, do not need to do anything more
+        if len(self.epitopes) == 1:
+            return copy.deepcopy(self), harmonize_df
+
         map_dict = harmonize_df.set_index("self_initial_epitope")[
             "self_harmonized_epitope"
         ].to_dict()
@@ -3297,7 +3304,7 @@ def epitope_harmonized_model(self, ref_poly):
             alphabet=self.alphabet,
             epitope_colors=ref_poly.epitope_colors,
             data_mut_escape_overlap="exact_match",  # should be exact match in self
-            sites=self.sites,
+            sites=None if self.sequential_integer_sites else self.sites,
         )
         assert ref_poly.epitopes == harmonized_model.epitopes
         return harmonized_model, harmonize_df

From 82e75482cc0ea0a8760bf0641c9442e637624d96 Mon Sep 17 00:00:00 2001
From: jbloom <jbloom@fredhutch.org>
Date: Fri, 12 Dec 2025 05:38:13 -0800
Subject: [PATCH 6/6] increase test tolerance in `reference_site_numbering` to
 accommodate numerical precision variation

---
 notebooks/reference_site_numbering.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/reference_site_numbering.ipynb b/notebooks/reference_site_numbering.ipynb
index 55750ca..8d41a21 100644
--- a/notebooks/reference_site_numbering.ipynb
+++ b/notebooks/reference_site_numbering.ipynb
@@ -1200,7 +1200,7 @@
     "pd.testing.assert_frame_equal(\n",
     "    mut_escape,\n",
     "    mut_escape_sequential,\n",
-    "    atol=1.5,\n",
+    "    atol=2.5,\n",
     ")\n",
     "assert 0.99 < mut_escape[\"escape\"].corr(mut_escape_sequential[\"escape\"])"
    ]