From a2c58959ed8165150abef2a5d0ad779efc89c2b1 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Mar 2026 15:49:20 -0500
Subject: [PATCH 01/49] Install postgres dependencies for tests

---
 .github/workflows/build.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 3828057a..75f5dbcc 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -36,12 +36,17 @@ jobs:
           mamba install -y -q pip wheel
           pip install uv
 
+      - name: Install Postgres for testing
+        shell: bash -l {0}
+        run: |
+          mamba install -y -q postgresql
+
       - name: Install dependencies
         shell: bash -l {0}
         run: |
           uv pip install -r requirements.txt
+          uv pip install testing.postgresql
 
-      # We have two cores so we can speed up the testing with xdist
       - name: Install pytest packages
         shell: bash -l {0}
         run: |

From 0e697d490e80fe9d55221557274ba3e678faf05d Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 17 Feb 2026 15:45:31 -0600
Subject: [PATCH 02/49] Rename test modules using snakecase

---
 tests/{test_ppdbBigQuery.py => test_ppdb_bigquery.py} | 0
 tests/{test_ppdbSql.py => test_ppdb_sql.py}           | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/{test_ppdbBigQuery.py => test_ppdb_bigquery.py} (100%)
 rename tests/{test_ppdbSql.py => test_ppdb_sql.py} (100%)

diff --git a/tests/test_ppdbBigQuery.py b/tests/test_ppdb_bigquery.py
similarity index 100%
rename from tests/test_ppdbBigQuery.py
rename to tests/test_ppdb_bigquery.py
diff --git a/tests/test_ppdbSql.py b/tests/test_ppdb_sql.py
similarity index 100%
rename from tests/test_ppdbSql.py
rename to tests/test_ppdb_sql.py

From 074b4b7254808a88563a4e1104d4db7d0921e68b Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 10 Feb 2026 17:33:21 -0600
Subject: [PATCH 03/49] Write update records to JSON file when storing replica
 chunks

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   |  72 +++--
 .../lsst/dax/ppdb/bigquery/update_records.py  | 115 ++++++++
 tests/test_ppdb_bigquery.py                   | 246 +++++++++++++++++-
 3 files changed, 408 insertions(+), 25 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/update_records.py

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 877bb229..871531a6 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -46,6 +46,7 @@
 from ..sql import PpdbSqlBase, PpdbSqlBaseConfig
 from .manifest import Manifest, TableStats
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
+from .update_records import UpdateRecords
 
 __all__ = ["ConfigValidationError", "PpdbBigQuery", "PpdbBigQueryConfig"]
 
@@ -178,22 +179,11 @@ def store(
         # Docstring is inherited.
         _LOG.info("Processing %s", replica_chunk.id)
 
-        # TODO: APDB does not generate ApdbUpdateRecords yet, but we will
-        # eventually have to add support for it.
-        if update_records:
-            raise NotImplementedError("PpdbBigQuery does not support record updates yet.")
-
         try:
-            chunk_dir = self._get_chunk_path(replica_chunk)
-
-            if chunk_dir.exists():
-                if not self.delete_existing_dirs:
-                    raise FileExistsError(f"Directory already exists for {replica_chunk.id}: {chunk_dir}")
-                _LOG.warning("Overwriting existing directory for %s: %s", replica_chunk.id, chunk_dir)
-                shutil.rmtree(chunk_dir)
+            chunk_dir = self._create_chunk_dir(replica_chunk)
 
-            chunk_dir.mkdir(parents=True)
-            _LOG.info("Created directory for %s: %s", replica_chunk.id, chunk_dir)
+            if update_records:
+                self._handle_updates(replica_chunk, update_records, chunk_dir)
 
             table_dict = {
                 ApdbTables.DiaObject.value: objects,
@@ -261,15 +251,32 @@ def store(
 
         _LOG.info("Done processing %s", replica_chunk.id)
 
-    def _get_chunk_path(self, chunk: ReplicaChunk) -> Path:
+    def _create_chunk_dir(self, chunk: ReplicaChunk) -> Path:
+        """Create the directory for the replica chunk based on its last update
+        time and ID.
+
+        Returns
+        -------
+        chunk_dir
+            Path to the created directory for the replica chunk.
+        """
         last_update_time = chunk.last_update_time.to_datetime()
         assert isinstance(last_update_time, datetime.datetime)
-        path = Path(
+        chunk_dir = Path(
             self.replication_path,
             chunk.last_update_time.strftime("%Y/%m/%d"),
             str(chunk.id),
         )
-        return path
+        if chunk_dir.exists():
+            if not self.delete_existing_dirs:
+                raise FileExistsError(f"Directory already exists for {chunk.id}: {chunk_dir}")
+            _LOG.warning("Overwriting existing directory for %s: %s", chunk.id, chunk_dir)
+            shutil.rmtree(chunk_dir)
+
+        chunk_dir.mkdir(parents=True)
+        _LOG.info("Created directory for %s: %s", chunk.id, chunk_dir)
+
+        return chunk_dir
 
     def get_replica_chunks(self, start_chunk_id: int | None = None) -> Sequence[PpdbReplicaChunk] | None:
         # Docstring is inherited.
@@ -567,3 +574,34 @@ def validate_config(cls, config: PpdbBigQueryConfig) -> None:
             check_dataset_exists(config.project_id, config.dataset_id)
         except Exception as e:
             raise ConfigValidationError("Failed to validate BigQuery dataset") from e
+
+    def _handle_updates(
+        self, replica_chunk: ReplicaChunk, apdb_update_records: Collection[ApdbUpdateRecord], chunk_dir: Path
+    ) -> None:
+        """Handle updates to existing records in the PPDB.
+
+        Parameters
+        ----------
+        replica_chunk : `ReplicaChunk`
+            The replica chunk associated with the updates.
+        update_records : `~collections.abc.Collection` [ `ApdbUpdateRecord` ]
+            Collection of update records to process.
+
+        Notes
+        -----
+        Serializes the ApdbUpdateRecord objects into a dictionary structure
+        for processing.
+        """
+        update_records = UpdateRecords(
+            replica_chunk_id=replica_chunk.id,
+            records=apdb_update_records,
+            record_count=len(apdb_update_records),
+        )
+        update_records.to_json_file(chunk_dir / "update_records.json")
+
+        _LOG.info(
+            "Saved %d update records for %s to %s",
+            update_records.record_count,
+            replica_chunk.id,
+            chunk_dir / "update_records.json",
+        )
diff --git a/python/lsst/dax/ppdb/bigquery/update_records.py b/python/lsst/dax/ppdb/bigquery/update_records.py
new file mode 100644
index 00000000..56e651be
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/update_records.py
@@ -0,0 +1,115 @@
+# This file is part of dax_ppdb
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, field_serializer, field_validator
+
+from lsst.dax.apdb.apdbUpdateRecord import ApdbUpdateRecord
+
+
+class UpdateRecords(BaseModel):
+    """Data model for APDB update records."""
+
+    replica_chunk_id: int
+    """Identifier of the replica chunk to which these update records belong."""
+
+    record_count: int
+    """Number of update records included in this object."""
+
+    records: list[ApdbUpdateRecord]
+    """List of APDB update records included in this object."""
+
+    @field_serializer("records")
+    def serialize_update_records(
+        self,
+        records: list[ApdbUpdateRecord],
+    ) -> list[dict[str, Any]]:
+        """Serialize the ``ApdbUpdateRecord`` objects to JSON.
+
+        Parameters
+        ----------
+        records : `list` [ `ApdbUpdateRecord` ]
+            The list of APDB update records to serialize.
+
+        Returns
+        -------
+        serialized_records : `list` [ `dict` [ `str`, `Any` ]]
+            The serialized JSON data.
+        """
+        serialized_records: list[dict[str, Any]] = []
+
+        for update_record in records:
+            record_dict: dict[str, Any] = json.loads(update_record.to_json())
+            record_dict["update_time_ns"] = update_record.update_time_ns
+            record_dict["update_order"] = update_record.update_order
+            serialized_records.append(record_dict)
+
+        return serialized_records
+
+    @field_validator("records", mode="before")
+    @classmethod
+    def deserialize_update_records(
+        cls,
+        records: list[dict[str, Any]] | list[ApdbUpdateRecord],
+    ) -> list[ApdbUpdateRecord]:
+        """Deserialize the JSON data to ``ApdbUpdateRecord`` objects.
+
+        Parameters
+        ----------
+        records : `list` [ `dict` [ `str`, `Any` ] | `ApdbUpdateRecord` ]
+            The list of serialized JSON data or already deserialized
+            ApdbUpdateRecord objects.
+
+        Returns
+        -------
+        update_records : `list` [ `ApdbUpdateRecord` ]
+            The list of APDB update records.
+        """
+        if records and isinstance(records[0], ApdbUpdateRecord):
+            return records
+        deserialized_records: list[ApdbUpdateRecord] = []
+        for record_dict in records:
+            record_copy = record_dict.copy()
+            update_time_ns = record_copy.pop("update_time_ns")
+            update_order = record_copy.pop("update_order")
+            json_str = json.dumps(record_copy)
+            update_record = ApdbUpdateRecord.from_json(
+                update_time_ns,
+                update_order,
+                json_str,
+            )
+            deserialized_records.append(update_record)
+        return deserialized_records
+
+    def to_json_file(self, path: Path) -> None:
+        with open(path, "w") as f:
+            json.dump(self.model_dump(), f, indent=2, default=str)
+
+    @classmethod
+    def from_json_file(cls, path: Path) -> UpdateRecords:
+        with open(path) as f:
+            data = json.load(f)
+        return cls.model_validate(data)
diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index 198f0bca..6aa64f52 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -26,10 +26,17 @@
 import unittest
 from typing import Any
 
-from lsst.dax.apdb import ApdbConfig
+from lsst.dax.apdb import (
+    Apdb,
+    ApdbConfig,
+    ApdbReplica,
+    apdbUpdateRecord,
+)
 from lsst.dax.apdb.sql import ApdbSql
-from lsst.dax.ppdb import PpdbConfig
+from lsst.dax.ppdb import Ppdb, PpdbConfig
 from lsst.dax.ppdb.bigquery import PpdbBigQuery
+from lsst.dax.ppdb.bigquery.update_records import UpdateRecords
+from lsst.dax.ppdb.replicator import Replicator
 from lsst.dax.ppdb.tests import PpdbTest
 
 try:
@@ -50,8 +57,10 @@
 }
 
 
-class SqliteTestCase(PpdbTest, unittest.TestCase):
-    """A test case for the PpdbBigQuery class using a SQLite backend."""
+class _SqliteMixin:
+    """Mixin class to provide Sqlite-specific setup/teardown and instance
+    creation.
+    """
 
     def setUp(self) -> None:
         self.tempdir = tempfile.mkdtemp()
@@ -86,9 +95,10 @@ def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
         return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
 
 
-@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
-class PostgresTestCase(PpdbTest, unittest.TestCase):
-    """A test case for the PpdbBigQuery class using a Postgres backend."""
+class _PostgresMixin:
+    """Mixin class to provide Postgres-specific setup/teardown and instance
+    creation.
+    """
 
     postgresql: Any
 
@@ -119,7 +129,7 @@ def make_instance(self, **kwargs: Any) -> PpdbConfig:
         kw = {
             **TEST_CONFIG,
             "db_url": self.server.url(),
-            "db_schema": None,
+            "db_schema": "ppdb_test",
             "felis_path": TEST_SCHEMA,
             "replication_dir": self.tempdir,
         }
@@ -136,3 +146,223 @@ def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
         }
         kw.update(kwargs)
         return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
+
+
+class SqliteTestCase(_SqliteMixin, PpdbTest, unittest.TestCase):
+    """A test case for the PpdbBigQuery class using a SQLite backend."""
+
+
+@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
+class PostgresTestCase(_PostgresMixin, PpdbTest, unittest.TestCase):
+    """A test case for the PpdbBigQuery class using a Postgres backend."""
+
+
+class UpdateRecordsTestCase(_PostgresMixin, PpdbTest, unittest.TestCase):
+    """A test case for the PpdbBigQuery class update functionality using a
+    Postgres backend.
+    """
+
+    include_update_records = True
+
+    def setUp(self):
+        super().setUp()
+
+        # Make APDB instance and fill it with test data.
+        apdb_config = self.make_apdb_instance()
+        apdb = Apdb.from_config(apdb_config)
+        self._fill_apdb(apdb)
+        apdb_replica = ApdbReplica.from_config(apdb_config)
+
+        # Make PPDB instance.
+        ppdb_config = self.make_instance()
+        self.ppdb = Ppdb.from_config(ppdb_config)
+        assert isinstance(self.ppdb, PpdbBigQuery)
+
+        # Replicate those to PPDB.
+        replicator = Replicator(
+            apdb_replica, self.ppdb, update=False, min_wait_time=0, max_wait_time=0, check_interval=0
+        )
+
+        # Copy chunks.
+        replicator.run(exit_on_empty=True)
+
+    def test_update_records_json_serialization(self) -> None:
+        """Test that the APDB update records are correctly saved to a JSON file
+        in the replication output and can be read back correctly as
+        UpdateRecords objects.
+        """
+        update_records_path = self.ppdb.replication_path / "2021/03/01/1614600000" / "update_records.json"
+        self.assertTrue(update_records_path.exists(), "Update records file not found in replication output")
+
+        update_records = UpdateRecords.from_json_file(update_records_path)
+        print("\n" + str(update_records))
+
+        self.assertEqual(
+            update_records.replica_chunk_id,
+            1614600000,
+            "Unexpected replica chunk ID in deserialized update records",
+        )
+
+        self.assertEqual(update_records.record_count, 3, "Unexpected number of update records deserialized")
+
+        self.assertEqual(
+            len(update_records.records), 3, "Unexpected number of update records in the deserialized object"
+        )
+
+        for record in update_records.records:
+            self.assertIsInstance(
+                record,
+                apdbUpdateRecord.ApdbUpdateRecord,
+                "Deserialized record is not an instance of ApdbUpdateRecord",
+            )
+
+        update_record = update_records.records[0]
+        self.assertIsInstance(
+            update_record,
+            apdbUpdateRecord.ApdbReassignDiaSourceToSSObjectRecord,
+            "Deserialized record is not an instance of ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        assert isinstance(update_record, apdbUpdateRecord.ApdbReassignDiaSourceToSSObjectRecord)
+        self.assertEqual(
+            update_record.diaSourceId,
+            700,
+            "Unexpected diaSourceId in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.ssObjectId,
+            1,
+            "Unexpected ssObjectId in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.update_time_ns,
+            1614600037000000000,
+            "Unexpected update_time_ns in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.update_order,
+            0,
+            "Unexpected update_order in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.midpointMjdTai,
+            60000.0,
+            "Unexpected midpointMjdTai in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.ssObjectReassocTimeMjdTai,
+            59274.50042824074,
+            "Unexpected ssObjectReassocTimeMjdTai in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbReassignDiaSourceToSSObjectRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbReassignDiaSourceToSSObjectRecord, should not be 0.0",
+        )
+
+        update_record = update_records.records[1]
+        self.assertIsInstance(
+            update_record,
+            apdbUpdateRecord.ApdbCloseDiaObjectValidityRecord,
+            "Deserialized record is not an instance of ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertEqual(
+            update_record.diaObjectId,
+            200,
+            "Unexpected diaObjectId in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbCloseDiaObjectValidityRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbCloseDiaObjectValidityRecord, should not be 0.0",
+        )
+        self.assertEqual(
+            update_record.update_time_ns,
+            1614600037000000000,
+            "Unexpected update_time_ns in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertEqual(
+            update_record.update_order,
+            1,
+            "Unexpected update_order in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertEqual(
+            update_record.validityEndMjdTai,
+            59274.50042824074,
+            "Unexpected validityEndMjdTai in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertIsNone(
+            update_record.nDiaSources,
+            "Unexpected nDiaSources in deserialized ApdbCloseDiaObjectValidityRecord, expected None",
+        )
+
+        update_record = update_records.records[2]
+        self.assertIsInstance(
+            update_record,
+            apdbUpdateRecord.ApdbWithdrawDiaForcedSourceRecord,
+            "Deserialized record is not an instance of ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.diaObjectId,
+            200,
+            "Unexpected diaObjectId in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.visit,
+            7,
+            "Unexpected visit in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.detector,
+            1,
+            "Unexpected detector in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )
+        self.assertEqual(
+            update_record.midpointMjdTai,
+            60000.0,
+            "Unexpected midpointMjdTai in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.update_time_ns,
+            1614600037000000000,
+            "Unexpected update_time_ns in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.update_order,
+            2,
+            "Unexpected update_order in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.timeWithdrawnMjdTai,
+            59274.50042824074,
+            "Unexpected timeWithdrawnMjdTai in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )

From 1cb349534c026abd54119912fd66e56240fd9fb8 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 10 Feb 2026 17:42:25 -0600
Subject: [PATCH 04/49] Separate APDB test functionality into a mixin class

---
 python/lsst/dax/ppdb/tests/_ppdb.py | 90 +++++++++++++++--------------
 tests/test_ppdb_bigquery.py         |  4 +-
 2 files changed, 50 insertions(+), 44 deletions(-)

diff --git a/python/lsst/dax/ppdb/tests/_ppdb.py b/python/lsst/dax/ppdb/tests/_ppdb.py
index 06a84a37..2535d187 100644
--- a/python/lsst/dax/ppdb/tests/_ppdb.py
+++ b/python/lsst/dax/ppdb/tests/_ppdb.py
@@ -21,7 +21,7 @@
 
 from __future__ import annotations
 
-__all__ = ["PpdbTest"]
+__all__ = ["ApdbMixin", "PpdbTest"]
 
 import unittest
 from abc import ABC, abstractmethod
@@ -68,50 +68,14 @@ def _make_region(xyz: tuple[float, float, float] = (1.0, 1.0, -1.0)) -> Region:
     return region
 
 
-class PpdbTest(TestCaseMixin, ABC):
-    """Base class for Ppdb tests that can be specialized for concrete
-    implementation.
-
-    This can only be used as a mixin class for a unittest.TestCase and it
-    calls various assert methods.
+class ApdbMixin:
+    """Mixin class containing APDB setuup and record generation for PPDB
+    testing.
     """
 
     include_update_records = False
     """If True then test replication of ApdbUpdateRecords."""
 
-    @abstractmethod
-    def make_instance(self, **kwargs: Any) -> PpdbConfig:
-        """Make database instance and return configuration for it.
-
-        Parameters
-        ----------
-        **kwargs : `Any`
-            Instance-specific parameters for the PPDB database.
-        """
-        raise NotImplementedError()
-
-    @abstractmethod
-    def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
-        """Make APDB instance and return configuration for it, APDB must have
-        replication enabled.
-
-        Parameters
-        ----------
-        **kwargs : `Any`
-            Instance-specific parameters for the APDB.
-        """
-        raise NotImplementedError()
-
-    def test_empty_db(self) -> None:
-        """Test for instantiation a database and making queries on empty
-        database.
-        """
-        config = self.make_instance()
-        ppdb = Ppdb.from_config(config)
-        chunks = ppdb.get_replica_chunks()
-        if chunks is not None:
-            self.assertEqual(len(chunks), 0)
-
     def _fill_apdb(self, apdb: Apdb) -> None:
         """Populate APDB with some data to replicate."""
         visit_time = astropy.time.Time("2021-01-01T00:01:00", format="isot", scale="tai")
@@ -135,7 +99,7 @@ def _fill_apdb(self, apdb: Apdb) -> None:
             (astropy.time.Time("2021-03-01T00:02:00", format="isot", scale="tai"), objects2),
         ]
 
-        # Time when apdates are applied.
+        # Time when updates are applied.
         update_time = astropy.time.Time("2021-03-01T12:00:00")
 
         update_records = []
@@ -147,7 +111,7 @@ def _fill_apdb(self, apdb: Apdb) -> None:
             start_id += nobj
 
             if self.include_update_records and visit == (len(visits) - 1):
-                # Generate few update records.
+                # Generate a few update records.
                 update_records = self._make_update_records(sources, fsources, update_time)
 
         if self.include_update_records:
@@ -218,6 +182,48 @@ def _check_chunks(
             self.assertEqual(ppdb_chunks[i].last_update_time, apdb_chunks[i].last_update_time)
             self.assertEqual(ppdb_chunks[i].unique_id, apdb_chunks[i].unique_id)
 
+
+class PpdbTest(TestCaseMixin, ApdbMixin, ABC):
+    """Base class for Ppdb tests that can be specialized for concrete
+    implementation.
+
+    This can only be used as a mixin class for a unittest.TestCase and it
+    calls various assert methods.
+    """
+
+    @abstractmethod
+    def make_instance(self, **kwargs: Any) -> PpdbConfig:
+        """Make database instance and return configuration for it.
+
+        Parameters
+        ----------
+        **kwargs : `Any`
+            Instance-specific parameters for the PPDB database.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
+        """Make APDB instance and return configuration for it, APDB must have
+        replication enabled.
+
+        Parameters
+        ----------
+        **kwargs : `Any`
+            Instance-specific parameters for the APDB.
+        """
+        raise NotImplementedError()
+
+    def test_empty_db(self) -> None:
+        """Test for instantiation a database and making queries on empty
+        database.
+        """
+        config = self.make_instance()
+        ppdb = Ppdb.from_config(config)
+        chunks = ppdb.get_replica_chunks()
+        if chunks is not None:
+            self.assertEqual(len(chunks), 0)
+
     def test_replication_single(self) -> None:
         """Test replication from APDB to PPDB using a single chunk option."""
         apdb_config = self.make_apdb_instance()
diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index 6aa64f52..af520e5a 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -37,7 +37,7 @@
 from lsst.dax.ppdb.bigquery import PpdbBigQuery
 from lsst.dax.ppdb.bigquery.update_records import UpdateRecords
 from lsst.dax.ppdb.replicator import Replicator
-from lsst.dax.ppdb.tests import PpdbTest
+from lsst.dax.ppdb.tests import ApdbMixin, PpdbTest
 
 try:
     import testing.postgresql
@@ -157,7 +157,7 @@ class PostgresTestCase(_PostgresMixin, PpdbTest, unittest.TestCase):
     """A test case for the PpdbBigQuery class using a Postgres backend."""
 
 
-class UpdateRecordsTestCase(_PostgresMixin, PpdbTest, unittest.TestCase):
+class UpdateRecordsTestCase(_PostgresMixin, ApdbMixin, unittest.TestCase):
     """A test case for the PpdbBigQuery class update functionality using a
     Postgres backend.
     """

From a4d221778d3ff48d9c304070e76ee1d4ca87d46b Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 11 Feb 2026 16:45:51 -0600
Subject: [PATCH 05/49] Use dax_ppdbx_gcp ticket for development

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4f213f08..ba5c456c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,5 +7,5 @@ lsst-dax-apdb @ git+https://github.com/lsst/dax_apdb@main
 lsst-utils @ git+https://github.com/lsst/utils@main
 lsst-resources[s3] @ git+https://github.com/lsst/resources@main
 lsst-felis @ git+https://github.com/lsst/felis@main
-lsst-dax-ppdbx-gcp @ git+https://github.com/lsst-dm/dax_ppdbx_gcp@main
+lsst-dax-ppdbx-gcp @ git+https://github.com/lsst-dm/dax_ppdbx_gcp@tickets/DM-54070
 lsst-sdm-schemas @ git+https://github.com/lsst/sdm_schemas@main

From eb0436733978058885612213d54ffaf0a8a7335b Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 11 Feb 2026 16:47:01 -0600
Subject: [PATCH 06/49] WIP: Add test of GCS upload

---
 tests/test_ppdb_bigquery.py | 76 ++++++++++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 5 deletions(-)

diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index af520e5a..408a1307 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -21,11 +21,15 @@
 
 import gc
 import os
+import posixpath
 import shutil
 import tempfile
 import unittest
+import uuid
 from typing import Any
 
+import pytest
+
 from lsst.dax.apdb import (
     Apdb,
     ApdbConfig,
@@ -50,8 +54,8 @@
     "db_drop": True,
     "validate_config": False,
     "delete_existing_dirs": True,
-    "bucket_name": "test_bucket",
-    "object_prefix": "test_prefix",
+    "bucket_name": "ppdb-test",
+    "object_prefix": "data/test",
     "dataset_id": "test_dataset",
     "project_id": "test_project",
 }
@@ -157,6 +161,12 @@ class PostgresTestCase(_PostgresMixin, PpdbTest, unittest.TestCase):
     """A test case for the PpdbBigQuery class using a Postgres backend."""
 
 
+def generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
+    """Generate a unique bucket name for testing."""
+    test_id = uuid.uuid4().hex[:16]
+    return f"{test_prefix}-{test_id}"
+
+
 class UpdateRecordsTestCase(_PostgresMixin, ApdbMixin, unittest.TestCase):
     """A test case for the PpdbBigQuery class update functionality using a
     Postgres backend.
@@ -174,8 +184,8 @@ def setUp(self):
         apdb_replica = ApdbReplica.from_config(apdb_config)
 
         # Make PPDB instance.
-        ppdb_config = self.make_instance()
-        self.ppdb = Ppdb.from_config(ppdb_config)
+        self.ppdb_config = self.make_instance()
+        self.ppdb = Ppdb.from_config(self.ppdb_config)
         assert isinstance(self.ppdb, PpdbBigQuery)
 
         # Replicate those to PPDB.
@@ -186,7 +196,7 @@ def setUp(self):
         # Copy chunks.
         replicator.run(exit_on_empty=True)
 
-    def test_update_records_json_serialization(self) -> None:
+    def test_json_serialization(self) -> None:
         """Test that the APDB update records are correctly saved to a JSON file
         in the replication output and can be read back correctly as
         UpdateRecords objects.
@@ -366,3 +376,59 @@ def test_update_records_json_serialization(self) -> None:
             0.0,
             "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
         )
+
+    @pytest.mark.skipif(
+        pytest.importorskip("lsst.dax.ppdbx.gcp", reason="GCP support not installed") is None,
+        reason="GCP support is not installed",
+    )
+    def test_gcs_upload(self) -> None:
+        """Test that the replication output, including the update records JSON
+        file, is correctly uploaded to Google Cloud Storage after replication.
+
+        This will only run if there is GCP support installed.
+        """
+        print("\nTesting GCS upload of replication output...")
+
+        from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
+
+        # Patch the ChunkUploader to print the message that would be published
+        # to the Pub/Sub topic, because there is no support for that service in
+        # a test environment.
+        class DummyChunkUploader(ChunkUploader):
+            def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
+                message = {
+                    "dataset": self.dataset_id,
+                    "chunk_id": str(chunk_id),
+                    "folder": f"gs://{posixpath.join(bucket_name, chunk_prefix)}",
+                }
+                print(f"Dummy publish to Pub/Sub topic: {message}")
+
+        # Configure the uploader to use a unique object prefix to avoid
+        # conflicts
+        ppdb_config_copy = self.ppdb_config.model_copy()
+        ppdb_config_copy.bucket_name = generate_test_bucket_name("ppdb-test-gcs-upload")
+
+        from lsst.dax.ppdbx.gcp.gcs import StorageClient
+
+        # Create the test GCS bucket
+        storage_client = StorageClient(ppdb_config_copy.bucket_name)
+        try:
+            storage_client.create_bucket()
+        except Exception as e:
+            self.fail(f"Failed to create test GCS bucket: {e}")
+
+        uploader = DummyChunkUploader(
+            ppdb_config_copy,
+            wait_interval=0,
+            exit_on_empty=True,
+            exit_on_error=True,
+        )
+
+        print(f"Uploader will copy files to {uploader.bucket_name}/{uploader.prefix}/")
+        uploader.run()
+
+        # Delete the test GCS bucket
+        try:
+            storage_client.delete_bucket(force=True)
+        except Exception as e:
+            self.fail(f"Failed to delete test GCS bucket: {e}")

From ebfad2939366a62ca3b20185898da9ed07e34b0a Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 11 Feb 2026 17:41:12 -0600
Subject: [PATCH 07/49] Upload the JSON file with APDB record updates when
 present

---
 .../lsst/dax/ppdb/bigquery/chunk_uploader.py  | 36 ++++++++-----
 python/lsst/dax/ppdb/bigquery/manifest.py     | 15 ++++--
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 11 ++--
 tests/test_ppdb_bigquery.py                   | 52 ++++++++++++-------
 4 files changed, 76 insertions(+), 38 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/chunk_uploader.py b/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
index 9efe0e22..d23a123c 100644
--- a/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
+++ b/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
@@ -237,15 +237,27 @@ def _process_chunk(self, replica_chunk: PpdbReplicaChunkExtended) -> None:
         )
 
         # Make a list of local parquet files to upload.
-        parquet_files = list(chunk_dir.glob("*.parquet"))
+        upload_file_list = list(chunk_dir.glob("*.parquet"))
+
+        # Include the update records file if the manifest indicates it should
+        # exist
+        if manifest.includes_update_records:
+            update_records_file = chunk_dir / "update_records.json"
+            if not update_records_file.exists():
+                raise ChunkUploadError(
+                    chunk_id,
+                    f"Manifest indicates update records are included but file does not exist: "
+                    f"{update_records_file}",
+                )
+            upload_file_list.append(update_records_file)
 
         # Check if the chunk is expected to be empty.
         is_empty = manifest.is_empty_chunk()
 
-        if not parquet_files and not is_empty:
+        if not upload_file_list and not is_empty:
             # There is a mismatch between the manifest and the actual files.
             # Some processing error may have occurred when exporting.
-            raise ChunkUploadError(chunk_id, f"No parquet files found in {chunk_dir} for non-empty chunk")
+            raise ChunkUploadError(chunk_id, f"No files found to upload in {chunk_dir} for non-empty chunk")
 
         # Check that all expected parquet files from the manifest are present.
         for table_name, table_stats in manifest.table_data.items():
@@ -258,19 +270,16 @@ def _process_chunk(self, replica_chunk: PpdbReplicaChunkExtended) -> None:
                     )
 
         try:
-            # 1) Upload parquet files, which will happen only for non-empty
-            # chunks.
-            if parquet_files:
-                gcs_names = {path: posixpath.join(gcs_prefix, path.name) for path in parquet_files}
+            # 1) Upload the files to GCS for non-empty chunks.
+            if upload_file_list:
+                gcs_names = {path: posixpath.join(gcs_prefix, path.name) for path in upload_file_list}
                 try:
-                    _LOG.info(
-                        "Uploading %d parquet files to GCS under prefix: %s", len(gcs_names), gcs_prefix
-                    )
+                    _LOG.info("Uploading %d files to GCS under prefix: %s", len(gcs_names), gcs_prefix)
                     with Timer(
                         "upload_files_time", _MON, tags={"prefix": str(gcs_prefix), "chunk_id": str(chunk_id)}
                     ) as timer:
                         self.storage.upload_files(gcs_names)
-                        total_bytes = sum(p.stat().st_size for p in parquet_files)
+                        total_bytes = sum(p.stat().st_size for p in upload_file_list)
                         timer.add_values(file_count=len(gcs_names), total_bytes=total_bytes)
                 except* UploadError as eg:
                     raise ChunkUploadError(chunk_id, f"{len(eg.exceptions)} upload(s) failed") from eg
@@ -284,7 +293,8 @@ def _process_chunk(self, replica_chunk: PpdbReplicaChunkExtended) -> None:
             except UploadError as e:
                 raise ChunkUploadError(chunk_id, "Manifest upload failed") from e
 
-            # 3) Update DB status, but not for empty chunks.
+            # 3) Update status in the database, but not for empty chunks.
+            # They have already been marked as skipped during export.
             if not is_empty:
                 try:
                     self._bq.store_chunk(replica_chunk.with_new_status(ChunkStatus.UPLOADED), True)
@@ -294,7 +304,7 @@ def _process_chunk(self, replica_chunk: PpdbReplicaChunkExtended) -> None:
                     ) from e
 
             # 4) Publish Pub/Sub staging message to trigger BigQuery load, but
-            # not for empty chunks. (Empty chunks cannot be staged.)
+            # not for empty chunks. (Empty chunks do not need to be staged.)
             if not is_empty:
                 try:
                     self._post_to_stage_chunk_topic(self.bucket_name, gcs_prefix, chunk_id)
diff --git a/python/lsst/dax/ppdb/bigquery/manifest.py b/python/lsst/dax/ppdb/bigquery/manifest.py
index b53c6f5a..da0fa456 100644
--- a/python/lsst/dax/ppdb/bigquery/manifest.py
+++ b/python/lsst/dax/ppdb/bigquery/manifest.py
@@ -79,6 +79,10 @@ class Manifest(BaseModel):
     """Name of the compression format used for artifacts (e.g., "gzip",
     "zstd", "snappy", etc.)."""
 
+    includes_update_records: bool = False
+    """Whether the exported data includes update records (e.g., in a separate
+    file) or not (`bool`)."""
+
     @property
     def filename(self) -> str:
         """Generate the filename for this manifest based on the replica chunk
@@ -118,12 +122,15 @@ def from_json_file(cls, file_path: Path) -> Manifest:
 
     def is_empty_chunk(self) -> bool:
         """Check if the manifest represents an empty replica chunk in which
-        all tables have zero rows.
+        all tables have zero rows and no update records are included.
 
         Returns
         -------
         bool
-            `True` if all tables have zero rows, indicating an empty chunk,
-            `False` otherwise.
+            `True` if all tables have zero rows and no update records are
+            included, indicating an empty chunk, `False` otherwise.
         """
-        return all(table.row_count == 0 for table in self.table_data.values())
+        return (
+            all(table.row_count == 0 for table in self.table_data.values())
+            and not self.includes_update_records
+        )
diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 871531a6..bf421015 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -151,7 +151,10 @@ def metadata(self) -> ApdbMetadata:
         return self._metadata
 
     def _generate_manifest(
-        self, replica_chunk: ReplicaChunk, table_dict: dict[str, ApdbTableData]
+        self,
+        replica_chunk: ReplicaChunk,
+        table_dict: dict[str, ApdbTableData],
+        update_records: Collection[ApdbUpdateRecord],
     ) -> Manifest:
         """Generate the manifest data for the replica chunk."""
         return Manifest(
@@ -164,6 +167,7 @@ def _generate_manifest(
                 table_name: TableStats(row_count=len(data.rows())) for table_name, data in table_dict.items()
             },
             compression_format=self.parq_compression,
+            includes_update_records=bool(update_records),
         )
 
     def store(
@@ -217,7 +221,7 @@ def store(
 
             # Create manifest for the replica chunk.
             try:
-                manifest = self._generate_manifest(replica_chunk, table_dict)
+                manifest = self._generate_manifest(replica_chunk, table_dict, update_records)
                 _LOG.info("Generated manifest for %s: %s", replica_chunk.id, manifest.model_dump_json())
             except Exception:
                 _LOG.exception("Failed to generate manifest for %d", replica_chunk.id)
@@ -578,7 +582,8 @@ def validate_config(cls, config: PpdbBigQueryConfig) -> None:
     def _handle_updates(
         self, replica_chunk: ReplicaChunk, apdb_update_records: Collection[ApdbUpdateRecord], chunk_dir: Path
     ) -> None:
-        """Handle updates to existing records in the PPDB.
+        """Handle updates to existing records in the PPDB by writing a JSON
+        file with the update information for the replica chunk.
 
         Parameters
         ----------
diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index 408a1307..21ee600d 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -20,6 +20,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import gc
+import json
 import os
 import posixpath
 import shutil
@@ -180,7 +181,7 @@ def setUp(self):
         # Make APDB instance and fill it with test data.
         apdb_config = self.make_apdb_instance()
         apdb = Apdb.from_config(apdb_config)
-        self._fill_apdb(apdb)
+        self._fill_apdb(apdb)  # FIXME: Only include replica chunks with the updates
         apdb_replica = ApdbReplica.from_config(apdb_config)
 
         # Make PPDB instance.
@@ -378,22 +379,30 @@ def test_json_serialization(self) -> None:
         )
 
     @pytest.mark.skipif(
-        pytest.importorskip("lsst.dax.ppdbx.gcp", reason="GCP support not installed") is None,
-        reason="GCP support is not installed",
+        pytest.importorskip("lsst.dax.ppdbx.gcp", reason="dax_ppdbx_gcp is not installed") is None,
+        reason="",
     )
-    def test_gcs_upload(self) -> None:
-        """Test that the replication output, including the update records JSON
-        file, is correctly uploaded to Google Cloud Storage after replication.
+    def test_chunk_uploader(self) -> None:
+        """Test that the update records are correctly uploaded to Google Cloud
+        Storage after replication.
 
-        This will only run if there is GCP support installed.
+        This will only run if ``dax_ppdbx_gcp`` is installed, which provides
+        Google Cloud support. Imports are inlined so that the module can run
+        without it.
         """
+        from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
+        from lsst.dax.ppdbx.gcp.gcs import StorageClient
+
         print("\nTesting GCS upload of replication output...")
 
-        from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
+        # Change the configuration to use a unique test bucket name to avoid
+        # conflicts
+        ppdb_config_copy = self.ppdb_config.model_copy()
+        ppdb_config_copy.bucket_name = generate_test_bucket_name("ppdb-test-gcs-upload")
 
         # Patch the ChunkUploader to print the message that would be published
-        # to the Pub/Sub topic, because there is no support for that service in
-        # a test environment.
+        # to the Pub/Sub topic instead of publishing, because there is no
+        # support for that service in the test environment.
         class DummyChunkUploader(ChunkUploader):
             def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
                 message = {
@@ -403,13 +412,6 @@ def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_
                 }
                 print(f"Dummy publish to Pub/Sub topic: {message}")
 
-        # Configure the uploader to use a unique object prefix to avoid
-        # conflicts
-        ppdb_config_copy = self.ppdb_config.model_copy()
-        ppdb_config_copy.bucket_name = generate_test_bucket_name("ppdb-test-gcs-upload")
-
-        from lsst.dax.ppdbx.gcp.gcs import StorageClient
-
         # Create the test GCS bucket
         storage_client = StorageClient(ppdb_config_copy.bucket_name)
         try:
@@ -417,16 +419,30 @@ def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_
         except Exception as e:
             self.fail(f"Failed to create test GCS bucket: {e}")
 
+        # Configure and run the uploader
         uploader = DummyChunkUploader(
             ppdb_config_copy,
             wait_interval=0,
             exit_on_empty=True,
             exit_on_error=True,
         )
-
         print(f"Uploader will copy files to {uploader.bucket_name}/{uploader.prefix}/")
         uploader.run()
 
+        # Retrieve the update records file
+        update_records_files = storage_client.list_files("**/update_records.json")
+        self.assertEqual(
+            len(update_records_files),
+            1,
+            f"Expected exactly one update_records.json file in GCS, found "
+            f"{len(update_records_files)}: {update_records_files}",
+        )
+        update_records_str = storage_client.read_as_string(update_records_files[0])
+
+        # Print the contents of the update records file for debugging
+        update_records_json = json.loads(update_records_str)
+        print(f"Contents of update_records.json in GCS:\n{json.dumps(update_records_json, indent=2)}")
+
         # Delete the test GCS bucket
         try:
             storage_client.delete_bucket(force=True)

From 6d7dca2cac3feb31f5ace9eb729fe9f323d1b1f3 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 13 Feb 2026 17:58:36 -0600
Subject: [PATCH 08/49] WIP: Add support for expanding update records (modules
 need to be renamed to camelcase)

---
 .../lsst/dax/ppdb/bigquery/update_handler.py  | 290 ++++++++++++
 .../lsst/dax/ppdb/bigquery/update_records.py  |  17 +-
 tests/test_update_handler.py                  | 446 ++++++++++++++++++
 3 files changed, 748 insertions(+), 5 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/update_handler.py
 create mode 100644 tests/test_update_handler.py

diff --git a/python/lsst/dax/ppdb/bigquery/update_handler.py b/python/lsst/dax/ppdb/bigquery/update_handler.py
new file mode 100644
index 00000000..adf0e1fb
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/update_handler.py
@@ -0,0 +1,290 @@
+# This file is part of dax_ppdb
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from lsst.dax.apdb.apdbUpdateRecord import ApdbUpdateRecord
+
+from .update_records import UpdateRecords
+
+
+# TODO: Move to an expandedUpdateRecord.py module
+class ExpandedUpdateRecord(BaseModel):
+    """
+    A single normalized (expanded) update row.
+
+    This model represents one field-level update after expanding an
+    original logical update event into one row per updated field.
+    It is the canonical shape loaded into the BigQuery updates table.
+    """
+
+    table_name: str = Field(
+        ...,
+        min_length=1,
+        description=(
+            "Logical target table for the update (e.g., 'DiaObject', "
+            "'DiaSource'). This determines which production table "
+            "the update will be applied to."
+        ),
+    )
+
+    record_id: int = Field(
+        ...,
+        description=(
+            "Canonical primary key of the record being modified as an integer. "
+            "For composite keys, a single integer representation must be used."
+        ),
+    )
+
+    field_name: str = Field(
+        ...,
+        min_length=1,
+        description=(
+            "Name of the target column being updated within the logical table identified by 'table_name'."
+        ),
+    )
+
+    value_json: Any = Field(
+        ...,
+        description=(
+            "JSON-serializable new value for the field, including explicit "
+            "None to represent setting the column to NULL. This value must "
+            "be compatible with the BigQuery JSON type and later castable "
+            "to the target column type during MERGE."
+        ),
+    )
+
+    replica_chunk_id: int = Field(
+        ...,
+        ge=0,
+        description=(
+            "Source replica chunk identifier associated with this update. "
+            "Used as part of the deterministic ordering rule when resolving "
+            "multiple updates to the same (record_id, field_name)."
+        ),
+    )
+
+    update_order: int | None = Field(
+        default=None,
+        ge=0,
+        description=(
+            "Ordering value within the replica chunk or update batch, "
+            "if provided by the source system. Nullable if not available. "
+            "Used to break ties between updates within the same chunk."
+        ),
+    )
+
+    update_time_ns: int | None = Field(
+        default=None,
+        ge=0,
+        description=(
+            "Source event timestamp in nanoseconds since the epoch, "
+            "if provided. Nullable if not available. Used as an additional "
+            "ordering signal during deduplication."
+        ),
+    )
+
+
+# Move to `updateRecordExpander.py` to follow camelcase convention
+class UpdateRecordExpander:
+    """Expand APDB update records into individual field-level updates for
+    BigQuery.
+    """
+
+    _UPDATE_FIELD_MAPPING = {
+        "reassign_diasource_to_diaobject": ["diaObjectId"],
+        "reassign_diasource_to_ssobject": ["ssObjectId", "ssObjectReassocTimeMjdTai"],
+        "withdraw_diasource": ["timeWithdrawnMjdTai"],
+        "withdraw_diaforcedsource": ["timeWithdrawnMjdTai"],
+        "close_diaobject_validity": ["validityEndMjdTai", "nDiaSources"],
+        "update_n_dia_sources": ["nDiaSources"],
+    }
+
+    _RECORD_ID_FIELD_MAPPING = {
+        "reassign_diasource_to_diaobject": "diaSourceId",
+        "reassign_diasource_to_ssobject": "diaSourceId",
+        "withdraw_diasource": "diaSourceId",
+        "withdraw_diaforcedsource": ["diaObjectId", "visit", "detector"],
+        "close_diaobject_validity": "diaObjectId",
+        "update_n_dia_sources": "diaObjectId",
+    }
+
+    @classmethod
+    def get_update_fields(cls, update_type: str) -> list[str]:
+        """Get the names of fields to update for a given update type.
+
+        Parameters
+        ----------
+        update_type : `str`
+            The type of update record.
+
+        Returns
+        -------
+        field_names : `list` [ `str` ]
+            List of field names that should be updated for this update type.
+
+        Raises
+        ------
+        ValueError
+            If the update_type is not recognized.
+        """
+        if update_type not in cls._UPDATE_FIELD_MAPPING:
+            raise ValueError(f"Unknown update_type: {update_type}")
+
+        return cls._UPDATE_FIELD_MAPPING[update_type]
+
+    @classmethod
+    def get_record_id_field(cls, update_type: str) -> str | list[str]:
+        """Get the field name(s) that serve as the record ID for a given update
+        type.
+
+        Parameters
+        ----------
+        update_type : `str`
+            The type of update record.
+
+        Returns
+        -------
+        field_name : `str` or `list` [ `str` ]
+            Name of the field that contains the record ID for this update type,
+            or list of field names for composite keys.
+
+        Raises
+        ------
+        ValueError
+            If the update_type is not recognized.
+        """
+        if update_type not in cls._RECORD_ID_FIELD_MAPPING:
+            raise ValueError(f"Unknown update_type: {update_type}")
+
+        return cls._RECORD_ID_FIELD_MAPPING[update_type]
+
+    @classmethod
+    def expand_single_record(
+        cls, update_record: ApdbUpdateRecord, replica_chunk_id: int
+    ) -> list[ExpandedUpdateRecord]:
+        """Expand a single APDB update record into ExpandedUpdateRecord
+        objects.
+
+        Parameters
+        ----------
+        update_record : `ApdbUpdateRecord`
+            A single APDB update record to expand.
+        replica_chunk_id : `int`
+            The replica chunk ID associated with this update record.
+
+        Returns
+        -------
+        expanded_records : `list` [ `ExpandedUpdateRecord` ]
+            List of ExpandedUpdateRecord objects, one per field being updated.
+        """
+        update_type = update_record.update_type
+        field_names = cls.get_update_fields(update_type)
+
+        # Get the target table from the update record
+        table_name = update_record.apdb_table.name
+
+        # Get the record ID
+        record_id = cls._generate_record_id(update_record)
+
+        expanded_records = []
+        for field_name in field_names:
+            if not hasattr(update_record, field_name):
+                raise ValueError(
+                    f"Update record of type {update_type} is missing expected field {field_name}"
+                )
+
+            value = getattr(update_record, field_name)
+
+            expanded_record = ExpandedUpdateRecord(
+                table_name=table_name,
+                record_id=record_id,
+                field_name=field_name,
+                value_json=value,
+                replica_chunk_id=replica_chunk_id,
+                update_order=update_record.update_order,
+                update_time_ns=update_record.update_time_ns,
+            )
+            expanded_records.append(expanded_record)
+
+        return expanded_records
+
+    @classmethod
+    def _generate_record_id(cls, update_record: ApdbUpdateRecord) -> int:
+        """Generate a record ID integer from an update record.
+
+        Parameters
+        ----------
+        update_record : `ApdbUpdateRecord`
+            The update record to generate an ID for.
+
+        Returns
+        -------
+        record_id : `int`
+            Integer representation of the record's primary key.
+        """
+        update_type = update_record.update_type
+        id_field = cls.get_record_id_field(update_type)
+
+        if isinstance(id_field, list):
+            # Handle composite key (e.g., DiaForcedSource)
+            key_values = []
+            for field in id_field:
+                if not hasattr(update_record, field):
+                    raise ValueError(
+                        f"Update record of type {update_type} is missing expected ID field {field}"
+                    )
+                key_values.append(getattr(update_record, field))
+            # Create a hash of the composite key components
+            return hash(tuple(key_values))
+        else:
+            # Handle single field key
+            if not hasattr(update_record, id_field):
+                raise ValueError(
+                    f"Update record of type {update_type} is missing expected ID field {id_field}"
+                )
+            return int(getattr(update_record, id_field))
+
+    @classmethod
+    def expand_updates(cls, update_records: UpdateRecords) -> list[ExpandedUpdateRecord]:
+        """Expand the APDB update records into a list of individual updates.
+
+        Parameters
+        ----------
+        update_records : `UpdateRecords`
+            The APDB update records to expand.
+
+        Returns
+        -------
+        expanded_updates : `list` [ `ExpandedUpdateRecord` ]
+            A list of individual updates derived from the input update records.
+        """
+        expanded_updates = []
+
+        for update_record in update_records.records:
+            expanded_records = cls.expand_single_record(update_record, update_records.replica_chunk_id)
+            expanded_updates.extend(expanded_records)
+
+        return expanded_updates
diff --git a/python/lsst/dax/ppdb/bigquery/update_records.py b/python/lsst/dax/ppdb/bigquery/update_records.py
index 56e651be..47c63f01 100644
--- a/python/lsst/dax/ppdb/bigquery/update_records.py
+++ b/python/lsst/dax/ppdb/bigquery/update_records.py
@@ -29,7 +29,11 @@
 
 from lsst.dax.apdb.apdbUpdateRecord import ApdbUpdateRecord
 
+DEFAULT_FILENAME = "update_records.json"
+"""Default filename for the update records JSON file."""
 
+
+# Move to `updateRecords.py` to follow camelcase convention
 class UpdateRecords(BaseModel):
     """Data model for APDB update records."""
 
@@ -43,7 +47,7 @@ class UpdateRecords(BaseModel):
     """List of APDB update records included in this object."""
 
     @field_serializer("records")
-    def serialize_update_records(
+    def serialize_records(
         self,
         records: list[ApdbUpdateRecord],
     ) -> list[dict[str, Any]]:
@@ -60,18 +64,16 @@ def serialize_update_records(
             The serialized JSON data.
         """
         serialized_records: list[dict[str, Any]] = []
-
         for update_record in records:
             record_dict: dict[str, Any] = json.loads(update_record.to_json())
             record_dict["update_time_ns"] = update_record.update_time_ns
             record_dict["update_order"] = update_record.update_order
             serialized_records.append(record_dict)
-
         return serialized_records
 
     @field_validator("records", mode="before")
     @classmethod
-    def deserialize_update_records(
+    def deserialize_records(
         cls,
         records: list[dict[str, Any]] | list[ApdbUpdateRecord],
     ) -> list[ApdbUpdateRecord]:
@@ -104,7 +106,7 @@ def deserialize_update_records(
             deserialized_records.append(update_record)
         return deserialized_records
 
-    def to_json_file(self, path: Path) -> None:
+    def write_json_file(self, path: Path) -> None:
         with open(path, "w") as f:
             json.dump(self.model_dump(), f, indent=2, default=str)
 
@@ -113,3 +115,8 @@ def from_json_file(cls, path: Path) -> UpdateRecords:
         with open(path) as f:
             data = json.load(f)
         return cls.model_validate(data)
+
+    @classmethod
+    def from_json_string(cls, json_str: str) -> UpdateRecords:
+        data = json.loads(json_str)
+        return cls.model_validate(data)
diff --git a/tests/test_update_handler.py b/tests/test_update_handler.py
new file mode 100644
index 00000000..02ea0162
--- /dev/null
+++ b/tests/test_update_handler.py
@@ -0,0 +1,446 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import datetime
+import unittest
+
+import astropy.time
+
+from lsst.dax.apdb import (
+    ApdbCloseDiaObjectValidityRecord,
+    ApdbReassignDiaSourceToDiaObjectRecord,
+    ApdbReassignDiaSourceToSSObjectRecord,
+    ApdbUpdateNDiaSourcesRecord,
+    ApdbWithdrawDiaForcedSourceRecord,
+    ApdbWithdrawDiaSourceRecord,
+)
+from lsst.dax.ppdb.bigquery.update_handler import ExpandedUpdateRecord, UpdateRecordExpander
+from lsst.dax.ppdb.bigquery.update_records import UpdateRecords
+
+
+# Move to `test_updateRecordExpander.py` to follow camelcase convention
+class TestUpdateRecordExpander(unittest.TestCase):
+    """Test UpdateRecordExpander functionality."""
+
+    def setUp(self) -> None:
+        """Set up test fixtures."""
+        # Test time for consistent timestamps
+        self.update_time = astropy.time.Time("2021-03-01T12:00:00", format="isot", scale="tai")
+        self.update_time_ns = int(self.update_time.unix_tai * 1e9)
+
+        # Test replica chunk ID
+        self.replica_chunk_id = 12345
+
+    def _create_test_update_records(self) -> UpdateRecords:
+        """Create test UpdateRecords with sample ApdbUpdateRecord instances.
+
+        Based on patterns from _ppdb.py _make_update_records method.
+        """
+        records = []
+
+        # Reassign DIASource to different DIAObject
+        records.append(
+            ApdbReassignDiaSourceToDiaObjectRecord(
+                update_time_ns=self.update_time_ns,
+                update_order=0,
+                diaSourceId=100001,
+                diaObjectId=300001,
+                ra=45.0,
+                dec=-30.0,
+                midpointMjdTai=60000.0,
+            )
+        )
+
+        # Reassign DIASource to SSObject
+        records.append(
+            ApdbReassignDiaSourceToSSObjectRecord(
+                update_time_ns=self.update_time_ns,
+                update_order=1,
+                diaSourceId=100002,
+                ssObjectId=2001,
+                ssObjectReassocTimeMjdTai=float(self.update_time.tai.mjd),
+                ra=45.0,
+                dec=-30.0,
+                midpointMjdTai=60000.0,
+            )
+        )
+
+        # Withdraw DIASource
+        records.append(
+            ApdbWithdrawDiaSourceRecord(
+                update_time_ns=self.update_time_ns,
+                update_order=2,
+                diaSourceId=100003,
+                timeWithdrawnMjdTai=self.update_time.tai.mjd,
+                ra=45.0,
+                dec=-30.0,
+                midpointMjdTai=60000.0,
+            )
+        )
+
+        # Withdraw DIAForcedSource
+        records.append(
+            ApdbWithdrawDiaForcedSourceRecord(
+                update_time_ns=self.update_time_ns,
+                update_order=3,
+                diaObjectId=200001,
+                visit=12345,
+                detector=42,
+                timeWithdrawnMjdTai=self.update_time.tai.mjd,
+                ra=45.0,
+                dec=-30.0,
+                midpointMjdTai=60000.0,
+            )
+        )
+
+        # Close DIAObject validity interval
+        records.append(
+            ApdbCloseDiaObjectValidityRecord(
+                update_time_ns=self.update_time_ns,
+                update_order=4,
+                diaObjectId=200001,
+                validityEndMjdTai=self.update_time.tai.mjd,
+                nDiaSources=5,
+                ra=45.0,
+                dec=-30.0,
+            )
+        )
+
+        # Update DIAObject nDiaSources count
+        records.append(
+            ApdbUpdateNDiaSourcesRecord(
+                update_time_ns=self.update_time_ns,
+                update_order=5,
+                diaObjectId=200002,
+                nDiaSources=10,
+                ra=45.0,
+                dec=-30.0,
+            )
+        )
+
+        return UpdateRecords(
+            replica_chunk_id=self.replica_chunk_id,
+            record_count=len(records),
+            records=records,
+            file_created_at=datetime.datetime.now(datetime.UTC),
+        )
+
+    def test_get_update_fields(self) -> None:
+        """Test get_update_fields class method."""
+        # Test known update types
+        self.assertEqual(
+            UpdateRecordExpander.get_update_fields("reassign_diasource_to_diaobject"), ["diaObjectId"]
+        )
+        self.assertEqual(
+            UpdateRecordExpander.get_update_fields("reassign_diasource_to_ssobject"),
+            ["ssObjectId", "ssObjectReassocTimeMjdTai"],
+        )
+        self.assertEqual(
+            UpdateRecordExpander.get_update_fields("withdraw_diasource"), ["timeWithdrawnMjdTai"]
+        )
+        self.assertEqual(
+            UpdateRecordExpander.get_update_fields("withdraw_diaforcedsource"), ["timeWithdrawnMjdTai"]
+        )
+        self.assertEqual(
+            UpdateRecordExpander.get_update_fields("close_diaobject_validity"),
+            ["validityEndMjdTai", "nDiaSources"],
+        )
+        self.assertEqual(UpdateRecordExpander.get_update_fields("update_n_dia_sources"), ["nDiaSources"])
+
+        # Test unknown update type
+        with self.assertRaises(ValueError) as cm:
+            UpdateRecordExpander.get_update_fields("unknown_update_type")
+        self.assertIn("Unknown update_type: unknown_update_type", str(cm.exception))
+
+    def test_get_record_id_field(self) -> None:
+        """Test get_record_id_field class method."""
+        # Test known update types
+        self.assertEqual(
+            UpdateRecordExpander.get_record_id_field("reassign_diasource_to_diaobject"), "diaSourceId"
+        )
+        self.assertEqual(
+            UpdateRecordExpander.get_record_id_field("reassign_diasource_to_ssobject"), "diaSourceId"
+        )
+        self.assertEqual(UpdateRecordExpander.get_record_id_field("withdraw_diasource"), "diaSourceId")
+        self.assertEqual(
+            UpdateRecordExpander.get_record_id_field("withdraw_diaforcedsource"),
+            ["diaObjectId", "visit", "detector"],
+        )
+        self.assertEqual(UpdateRecordExpander.get_record_id_field("close_diaobject_validity"), "diaObjectId")
+        self.assertEqual(UpdateRecordExpander.get_record_id_field("update_n_dia_sources"), "diaObjectId")
+
+        # Test unknown update type
+        with self.assertRaises(ValueError) as cm:
+            UpdateRecordExpander.get_record_id_field("unknown_update_type")
+        self.assertIn("Unknown update_type: unknown_update_type", str(cm.exception))
+
+    def test_expand_single_record_reassign_to_diaobject(self) -> None:
+        """Test expand_single_record with ApdbReassignDiaSourceToDiaObjectRecord."""
+        record = ApdbReassignDiaSourceToDiaObjectRecord(
+            update_time_ns=self.update_time_ns,
+            update_order=0,
+            diaSourceId=100001,
+            diaObjectId=300001,
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+
+        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
+
+        # Should expand to 1 record (diaObjectId)
+        self.assertEqual(len(expanded), 1)
+
+        expanded_record = expanded[0]
+        self.assertIsInstance(expanded_record, ExpandedUpdateRecord)
+        self.assertEqual(expanded_record.table_name, "DiaSource")
+        self.assertEqual(expanded_record.record_id, 100001)
+        self.assertEqual(expanded_record.field_name, "diaObjectId")
+        self.assertEqual(expanded_record.value_json, 300001)
+        self.assertEqual(expanded_record.replica_chunk_id, self.replica_chunk_id)
+        self.assertEqual(expanded_record.update_order, 0)
+        self.assertEqual(expanded_record.update_time_ns, self.update_time_ns)
+        """Test expand_single_record with
+        ApdbReassignDiaSourceToSSObjectRecord.
+        """
+        record = ApdbReassignDiaSourceToSSObjectRecord(
+            update_time_ns=self.update_time_ns,
+            update_order=0,
+            diaSourceId=100001,
+            ssObjectId=2001,
+            ssObjectReassocTimeMjdTai=float(self.update_time.tai.mjd),
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+
+        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
+
+        # Should expand to 2 records (ssObjectId and ssObjectReassocTimeMjdTai)
+        self.assertEqual(len(expanded), 2)
+
+        # Check first expanded record (ssObjectId)
+        first_record = expanded[0]
+        self.assertIsInstance(first_record, ExpandedUpdateRecord)
+        self.assertEqual(first_record.table_name, "DiaSource")
+        self.assertEqual(first_record.record_id, 100001)
+        self.assertEqual(first_record.field_name, "ssObjectId")
+        self.assertEqual(first_record.value_json, 2001)
+        self.assertEqual(first_record.replica_chunk_id, self.replica_chunk_id)
+        self.assertEqual(first_record.update_order, 0)
+        self.assertEqual(first_record.update_time_ns, self.update_time_ns)
+
+        # Check second expanded record (ssObjectReassocTimeMjdTai)
+        second_record = expanded[1]
+        self.assertEqual(second_record.table_name, "DiaSource")
+        self.assertEqual(second_record.record_id, 100001)
+        self.assertEqual(second_record.field_name, "ssObjectReassocTimeMjdTai")
+        self.assertEqual(second_record.value_json, float(self.update_time.tai.mjd))
+
+    def test_expand_single_record_withdraw_diasource(self) -> None:
+        """Test expand_single_record with ApdbWithdrawDiaSourceRecord."""
+        record = ApdbWithdrawDiaSourceRecord(
+            update_time_ns=self.update_time_ns,
+            update_order=2,
+            diaSourceId=100003,
+            timeWithdrawnMjdTai=self.update_time.tai.mjd,
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+
+        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
+
+        # Should expand to 1 record (timeWithdrawnMjdTai)
+        self.assertEqual(len(expanded), 1)
+
+        expanded_record = expanded[0]
+        self.assertEqual(expanded_record.table_name, "DiaSource")
+        self.assertEqual(expanded_record.record_id, 100003)
+        self.assertEqual(expanded_record.field_name, "timeWithdrawnMjdTai")
+        self.assertEqual(expanded_record.value_json, self.update_time.tai.mjd)
+
+    def test_expand_single_record_reassign_to_ssobject(self) -> None:
+        """Test expand_single_record with ApdbCloseDiaObjectValidityRecord."""
+        record = ApdbCloseDiaObjectValidityRecord(
+            update_time_ns=self.update_time_ns,
+            update_order=1,
+            diaObjectId=200001,
+            validityEndMjdTai=self.update_time.tai.mjd,
+            nDiaSources=5,
+            ra=45.0,
+            dec=-30.0,
+        )
+
+        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
+
+        # Should expand to 2 records (validityEndMjdTai and nDiaSources)
+        self.assertEqual(len(expanded), 2)
+
+        # Check first expanded record (validityEndMjdTai)
+        first_record = expanded[0]
+        self.assertEqual(first_record.table_name, "DiaObject")
+        self.assertEqual(first_record.record_id, 200001)
+        self.assertEqual(first_record.field_name, "validityEndMjdTai")
+        self.assertEqual(first_record.value_json, self.update_time.tai.mjd)
+
+        # Check second expanded record (nDiaSources)
+        second_record = expanded[1]
+        self.assertEqual(second_record.table_name, "DiaObject")
+        self.assertEqual(second_record.record_id, 200001)
+        self.assertEqual(second_record.field_name, "nDiaSources")
+        self.assertEqual(second_record.value_json, 5)
+
+    def test_expand_single_record_update_n_dia_sources(self) -> None:
+        """Test expand_single_record with ApdbUpdateNDiaSourcesRecord."""
+        record = ApdbUpdateNDiaSourcesRecord(
+            update_time_ns=self.update_time_ns,
+            update_order=5,
+            diaObjectId=200002,
+            nDiaSources=10,
+            ra=45.0,
+            dec=-30.0,
+        )
+
+        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
+
+        # Should expand to 1 record (nDiaSources)
+        self.assertEqual(len(expanded), 1)
+
+        expanded_record = expanded[0]
+        self.assertEqual(expanded_record.table_name, "DiaObject")
+        self.assertEqual(expanded_record.record_id, 200002)
+        self.assertEqual(expanded_record.field_name, "nDiaSources")
+        self.assertEqual(expanded_record.value_json, 10)
+
+    def test_expand_single_record_close_validity(self) -> None:
+        """Test expand_single_record with ApdbCloseDiaObjectValidityRecord."""
+        record = ApdbCloseDiaObjectValidityRecord(
+            update_time_ns=self.update_time_ns,
+            update_order=4,
+            diaObjectId=200001,
+            validityEndMjdTai=self.update_time.tai.mjd,
+            nDiaSources=5,
+            ra=45.0,
+            dec=-30.0,
+        )
+
+        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
+
+        # Should expand to 2 records (validityEndMjdTai and nDiaSources)
+        self.assertEqual(len(expanded), 2)
+
+        # Check first expanded record (validityEndMjdTai)
+        first_record = expanded[0]
+        self.assertIsInstance(first_record, ExpandedUpdateRecord)
+        self.assertEqual(first_record.table_name, "DiaObject")
+        self.assertEqual(first_record.record_id, 200001)
+        self.assertEqual(first_record.field_name, "validityEndMjdTai")
+        self.assertEqual(first_record.value_json, self.update_time.tai.mjd)
+
+        # Check second expanded record (nDiaSources)
+        second_record = expanded[1]
+        self.assertEqual(second_record.table_name, "DiaObject")
+        self.assertEqual(second_record.record_id, 200001)
+        self.assertEqual(second_record.field_name, "nDiaSources")
+        self.assertEqual(second_record.value_json, 5)
+
+    def test_expand_single_record_withdraw_forcedsource(self) -> None:
+        """Test expand_single_record with ApdbWithdrawDiaForcedSourceRecord."""
+        record = ApdbWithdrawDiaForcedSourceRecord(
+            update_time_ns=self.update_time_ns,
+            update_order=2,
+            diaObjectId=200001,
+            visit=12345,
+            detector=42,
+            timeWithdrawnMjdTai=self.update_time.tai.mjd,
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+
+        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
+
+        # Should expand to 1 record (timeWithdrawnMjdTai)
+        self.assertEqual(len(expanded), 1)
+
+        expanded_record = expanded[0]
+        self.assertEqual(expanded_record.table_name, "DiaForcedSource")
+        # The record ID should be a hash of the composite key (diaObjectId,
+        # visit, detector)
+        expected_record_id = hash((200001, 12345, 42))
+        self.assertEqual(expanded_record.record_id, expected_record_id)
+        self.assertEqual(expanded_record.field_name, "timeWithdrawnMjdTai")
+        self.assertEqual(expanded_record.value_json, self.update_time.tai.mjd)
+
+    def test_expand_updates_full_integration(self) -> None:
+        """Test the full expand_updates method with multiple record types."""
+        update_records = self._create_test_update_records()
+
+        expanded = UpdateRecordExpander.expand_updates(update_records)
+
+        # Should have 8 total expanded records:
+        # - 1 from ApdbReassignDiaSourceToDiaObjectRecord
+        # - 2 from ApdbReassignDiaSourceToSSObjectRecord
+        # - 1 from ApdbWithdrawDiaSourceRecord
+        # - 1 from ApdbWithdrawDiaForcedSourceRecord
+        # - 2 from ApdbCloseDiaObjectValidityRecord
+        # - 1 from ApdbUpdateNDiaSourcesRecord
+        self.assertEqual(len(expanded), 8)
+
+        # Verify all expanded records have correct replica_chunk_id
+        for record in expanded:
+            self.assertEqual(record.replica_chunk_id, self.replica_chunk_id)
+            self.assertIsInstance(record.update_time_ns, int)
+            self.assertIsInstance(record.update_order, int)
+
+        # Check that we have the expected table names
+        table_names = {record.table_name for record in expanded}
+        expected_tables = {"DiaSource", "DiaObject", "DiaForcedSource"}
+        self.assertEqual(table_names, expected_tables)
+
+        # Check that we have the expected field names
+        field_names = {record.field_name for record in expanded}
+        expected_fields = {
+            "diaObjectId",  # from reassign to diaobject
+            "ssObjectId",
+            "ssObjectReassocTimeMjdTai",  # from reassign to ssobject
+            "timeWithdrawnMjdTai",  # from withdraw diasource and withdraw forced source
+            "validityEndMjdTai",
+            "nDiaSources",  # from close validity and update n dia sources
+        }
+        self.assertEqual(field_names, expected_fields)
+
+    def test_expand_updates_empty_records(self) -> None:
+        """Test expand_updates with empty records list."""
+        empty_update_records = UpdateRecords(
+            replica_chunk_id=self.replica_chunk_id,
+            record_count=0,
+            records=[],
+            file_created_at=datetime.datetime.now(datetime.UTC),
+        )
+
+        expanded = UpdateRecordExpander.expand_updates(empty_update_records)
+        self.assertEqual(len(expanded), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()

From dcbf99267363d0353f4c06f9863afcc054f93688 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 13 Feb 2026 17:58:56 -0600
Subject: [PATCH 09/49] WIP: Updates to ppdb_bigquery and test

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   |  2 +-
 tests/test_ppdb_bigquery.py                   | 39 ++++++++++++-------
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index bf421015..adc13673 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -602,7 +602,7 @@ def _handle_updates(
             records=apdb_update_records,
             record_count=len(apdb_update_records),
         )
-        update_records.to_json_file(chunk_dir / "update_records.json")
+        update_records.write_json_file(chunk_dir / "update_records.json")
 
         _LOG.info(
             "Saved %d update records for %s to %s",
diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index 21ee600d..dd4d0954 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -169,8 +169,8 @@ def generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
 
 
 class UpdateRecordsTestCase(_PostgresMixin, ApdbMixin, unittest.TestCase):
-    """A test case for the PpdbBigQuery class update functionality using a
-    Postgres backend.
+    """A test case for the handling of APDB record updates by PpdbBigQuery and
+    related classes including the ChunkUploader.
     """
 
     include_update_records = True
@@ -189,18 +189,16 @@ def setUp(self):
         self.ppdb = Ppdb.from_config(self.ppdb_config)
         assert isinstance(self.ppdb, PpdbBigQuery)
 
-        # Replicate those to PPDB.
+        # Replicate APDB replica chunks to the PPDB.
         replicator = Replicator(
             apdb_replica, self.ppdb, update=False, min_wait_time=0, max_wait_time=0, check_interval=0
         )
-
-        # Copy chunks.
         replicator.run(exit_on_empty=True)
 
     def test_json_serialization(self) -> None:
         """Test that the APDB update records are correctly saved to a JSON file
-        in the replication output and can be read back correctly as
-        UpdateRecords objects.
+        in the replication output and can be read back as valid UpdateRecords
+        objects.
         """
         update_records_path = self.ppdb.replication_path / "2021/03/01/1614600000" / "update_records.json"
         self.assertTrue(update_records_path.exists(), "Update records file not found in replication output")
@@ -385,16 +383,10 @@ def test_json_serialization(self) -> None:
     def test_chunk_uploader(self) -> None:
         """Test that the update records are correctly uploaded to Google Cloud
         Storage after replication.
-
-        This will only run if ``dax_ppdbx_gcp`` is installed, which provides
-        Google Cloud support. Imports are inlined so that the module can run
-        without it.
         """
         from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
         from lsst.dax.ppdbx.gcp.gcs import StorageClient
 
-        print("\nTesting GCS upload of replication output...")
-
         # Change the configuration to use a unique test bucket name to avoid
         # conflicts
         ppdb_config_copy = self.ppdb_config.model_copy()
@@ -443,6 +435,27 @@ def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_
         update_records_json = json.loads(update_records_str)
         print(f"Contents of update_records.json in GCS:\n{json.dumps(update_records_json, indent=2)}")
 
+        # Load the update records into the data model and perform a few basic
+        # checks (test_json_serialization already tests this in detail, so we
+        # just check a few key fields here).
+        update_records = UpdateRecords.model_validate(update_records_json)
+        self.assertEqual(
+            update_records.replica_chunk_id,
+            1614600000,
+            "Unexpected replica chunk ID in update records file from GCS",
+        )
+        self.assertEqual(
+            update_records.record_count,
+            3,
+            f"Expected record_count of 3 in update records file from GCS, found "
+            f"{update_records.record_count}",
+        )
+        self.assertEqual(
+            len(update_records.records),
+            3,
+            f"Expected 3 update records in the file from GCS, found {len(update_records.records)}",
+        )
+
         # Delete the test GCS bucket
         try:
             storage_client.delete_bucket(force=True)

From 2312aeefa0067a9306f17e47b5612b0f59150e58 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 17 Feb 2026 15:36:49 -0600
Subject: [PATCH 10/49] Create new package for handling APDB updates

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   |  2 +-
 .../dax/ppdb/bigquery/updates/__init__.py     | 23 +++++++++++++++++++
 .../bigquery/{ => updates}/update_handler.py  |  0
 .../bigquery/{ => updates}/update_records.py  |  0
 tests/test_ppdb_bigquery.py                   |  2 +-
 tests/test_update_handler.py                  |  3 +--
 6 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/__init__.py
 rename python/lsst/dax/ppdb/bigquery/{ => updates}/update_handler.py (100%)
 rename python/lsst/dax/ppdb/bigquery/{ => updates}/update_records.py (100%)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index adc13673..a6c72fa9 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -46,7 +46,7 @@
 from ..sql import PpdbSqlBase, PpdbSqlBaseConfig
 from .manifest import Manifest, TableStats
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
-from .update_records import UpdateRecords
+from .updates.update_records import UpdateRecords
 
 __all__ = ["ConfigValidationError", "PpdbBigQuery", "PpdbBigQueryConfig"]
 
diff --git a/python/lsst/dax/ppdb/bigquery/updates/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
new file mode 100644
index 00000000..dd3a25f1
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
@@ -0,0 +1,23 @@
+# This file is part of dax_ppdb
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from .update_records import UpdateRecords
+from .update_handler import ExpandedUpdateRecord, UpdateRecordExpander
diff --git a/python/lsst/dax/ppdb/bigquery/update_handler.py b/python/lsst/dax/ppdb/bigquery/updates/update_handler.py
similarity index 100%
rename from python/lsst/dax/ppdb/bigquery/update_handler.py
rename to python/lsst/dax/ppdb/bigquery/updates/update_handler.py
diff --git a/python/lsst/dax/ppdb/bigquery/update_records.py b/python/lsst/dax/ppdb/bigquery/updates/update_records.py
similarity index 100%
rename from python/lsst/dax/ppdb/bigquery/update_records.py
rename to python/lsst/dax/ppdb/bigquery/updates/update_records.py
diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index dd4d0954..5bf253bd 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -40,7 +40,7 @@
 from lsst.dax.apdb.sql import ApdbSql
 from lsst.dax.ppdb import Ppdb, PpdbConfig
 from lsst.dax.ppdb.bigquery import PpdbBigQuery
-from lsst.dax.ppdb.bigquery.update_records import UpdateRecords
+from lsst.dax.ppdb.bigquery.updates import UpdateRecords
 from lsst.dax.ppdb.replicator import Replicator
 from lsst.dax.ppdb.tests import ApdbMixin, PpdbTest
 
diff --git a/tests/test_update_handler.py b/tests/test_update_handler.py
index 02ea0162..06abc73f 100644
--- a/tests/test_update_handler.py
+++ b/tests/test_update_handler.py
@@ -32,8 +32,7 @@
     ApdbWithdrawDiaForcedSourceRecord,
     ApdbWithdrawDiaSourceRecord,
 )
-from lsst.dax.ppdb.bigquery.update_handler import ExpandedUpdateRecord, UpdateRecordExpander
-from lsst.dax.ppdb.bigquery.update_records import UpdateRecords
+from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
 
 
 # Move to `test_updateRecordExpander.py` to follow camelcase convention

From e307d47eed39be8987bffd3c703f283aa4888dfd Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 17 Feb 2026 15:43:06 -0600
Subject: [PATCH 11/49] Add expanded_update_record module

---
 .../updates/expanded_update_record.py         | 76 +++++++++++++++++
 .../ppdb/bigquery/updates/update_handler.py   | 83 +------------------
 2 files changed, 77 insertions(+), 82 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py

diff --git a/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py b/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
new file mode 100644
index 00000000..fa5df388
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
@@ -0,0 +1,76 @@
+# This file is part of dax_ppdb
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class ExpandedUpdateRecord(BaseModel):
+    """
+    A single normalized (expanded) update row.
+
+    This model represents one field-level update after expanding an
+    original logical update event into one row per updated field.
+    It is the canonical shape loaded into the BigQuery updates table.
+    """
+
+    table_name: str = Field(
+        ...,
+        min_length=1,
+        description=("Logical target table for the update (e.g., 'DiaObject', 'DiaSource')."),
+    )
+
+    record_id: int = Field(
+        ...,
+        description=("Canonical identifier of the record being modified."),
+    )
+
+    field_name: str = Field(
+        ...,
+        min_length=1,
+        description=("Name of the target column being updated."),
+    )
+
+    value_json: Any = Field(
+        ...,
+        description=("JSON-serializable new value for the field."),
+    )
+
+    replica_chunk_id: int = Field(
+        ...,
+        ge=0,
+        description=("Source replica chunk identifier associated with this update."),
+    )
+
+    update_order: int | None = Field(
+        default=None,
+        ge=0,
+        description=("Ordering value within the replica chunk or update batch."),
+    )
+
+    update_time_ns: int | None = Field(
+        default=None,
+        ge=0,
+        description=("Source event timestamp in nanoseconds since the epoch."),
+    )
diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_handler.py b/python/lsst/dax/ppdb/bigquery/updates/update_handler.py
index adf0e1fb..15191ca5 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_handler.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_handler.py
@@ -21,93 +21,12 @@
 
 from __future__ import annotations
 
-from typing import Any
-
-from pydantic import BaseModel, Field
-
 from lsst.dax.apdb.apdbUpdateRecord import ApdbUpdateRecord
 
+from .expanded_update_record import ExpandedUpdateRecord
 from .update_records import UpdateRecords
 
 
-# TODO: Move to an expandedUpdateRecord.py module
-class ExpandedUpdateRecord(BaseModel):
-    """
-    A single normalized (expanded) update row.
-
-    This model represents one field-level update after expanding an
-    original logical update event into one row per updated field.
-    It is the canonical shape loaded into the BigQuery updates table.
-    """
-
-    table_name: str = Field(
-        ...,
-        min_length=1,
-        description=(
-            "Logical target table for the update (e.g., 'DiaObject', "
-            "'DiaSource'). This determines which production table "
-            "the update will be applied to."
-        ),
-    )
-
-    record_id: int = Field(
-        ...,
-        description=(
-            "Canonical primary key of the record being modified as an integer. "
-            "For composite keys, a single integer representation must be used."
-        ),
-    )
-
-    field_name: str = Field(
-        ...,
-        min_length=1,
-        description=(
-            "Name of the target column being updated within the logical table identified by 'table_name'."
-        ),
-    )
-
-    value_json: Any = Field(
-        ...,
-        description=(
-            "JSON-serializable new value for the field, including explicit "
-            "None to represent setting the column to NULL. This value must "
-            "be compatible with the BigQuery JSON type and later castable "
-            "to the target column type during MERGE."
-        ),
-    )
-
-    replica_chunk_id: int = Field(
-        ...,
-        ge=0,
-        description=(
-            "Source replica chunk identifier associated with this update. "
-            "Used as part of the deterministic ordering rule when resolving "
-            "multiple updates to the same (record_id, field_name)."
-        ),
-    )
-
-    update_order: int | None = Field(
-        default=None,
-        ge=0,
-        description=(
-            "Ordering value within the replica chunk or update batch, "
-            "if provided by the source system. Nullable if not available. "
-            "Used to break ties between updates within the same chunk."
-        ),
-    )
-
-    update_time_ns: int | None = Field(
-        default=None,
-        ge=0,
-        description=(
-            "Source event timestamp in nanoseconds since the epoch, "
-            "if provided. Nullable if not available. Used as an additional "
-            "ordering signal during deduplication."
-        ),
-    )
-
-
-# Move to `updateRecordExpander.py` to follow camelcase convention
 class UpdateRecordExpander:
     """Expand APDB update records into individual field-level updates for
     BigQuery.

From 021af3d8eef93018e1b287afe0fb88b728aeb8c5 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 17 Feb 2026 15:51:35 -0600
Subject: [PATCH 12/49] Rename update_handler module to update_record_expander

---
 python/lsst/dax/ppdb/bigquery/updates/__init__.py              | 3 ++-
 .../updates/{update_handler.py => update_record_expander.py}   | 0
 .../{test_update_handler.py => test_update_record_expander.py} | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename python/lsst/dax/ppdb/bigquery/updates/{update_handler.py => update_record_expander.py} (100%)
 rename tests/{test_update_handler.py => test_update_record_expander.py} (99%)

diff --git a/python/lsst/dax/ppdb/bigquery/updates/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
index dd3a25f1..35cd37ae 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/__init__.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
@@ -20,4 +20,5 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 from .update_records import UpdateRecords
-from .update_handler import ExpandedUpdateRecord, UpdateRecordExpander
+from .expanded_update_record import ExpandedUpdateRecord
+from .update_record_expander import UpdateRecordExpander
diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_handler.py b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
similarity index 100%
rename from python/lsst/dax/ppdb/bigquery/updates/update_handler.py
rename to python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
diff --git a/tests/test_update_handler.py b/tests/test_update_record_expander.py
similarity index 99%
rename from tests/test_update_handler.py
rename to tests/test_update_record_expander.py
index 06abc73f..54050e89 100644
--- a/tests/test_update_handler.py
+++ b/tests/test_update_record_expander.py
@@ -35,7 +35,6 @@
 from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
 
 
-# Move to `test_updateRecordExpander.py` to follow camelcase convention
 class TestUpdateRecordExpander(unittest.TestCase):
     """Test UpdateRecordExpander functionality."""
 

From 465cf32e2d18ebc769ea70a9d3f7cd46b6a6167d Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 17 Feb 2026 16:33:12 -0600
Subject: [PATCH 13/49] Model record_id as a list of integers

---
 .../updates/expanded_update_record.py         |  8 +-
 .../updates/update_record_expander.py         | 74 ++++++++++++-------
 tests/test_update_record_expander.py          | 61 ++++++++-------
 3 files changed, 89 insertions(+), 54 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py b/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
index fa5df388..d23c87c2 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
@@ -41,9 +41,13 @@ class ExpandedUpdateRecord(BaseModel):
         description=("Logical target table for the update (e.g., 'DiaObject', 'DiaSource')."),
     )
 
-    record_id: int = Field(
+    record_id: list[int] = Field(
         ...,
-        description=("Canonical identifier of the record being modified."),
+        description=(
+            "Identifier of the record being updated. For update types with a single record ID, this "
+            "will be a list of one element. For updates on records with a composite key "
+            "(e.g., DiaForcedSource), this will include all components of the key, in order."
+        ),
     )
 
     field_name: str = Field(
diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
index 15191ca5..18c632c9 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
@@ -75,7 +75,7 @@ def get_update_fields(cls, update_type: str) -> list[str]:
         return cls._UPDATE_FIELD_MAPPING[update_type]
 
     @classmethod
-    def get_record_id_field(cls, update_type: str) -> str | list[str]:
+    def get_record_id_field_names(cls, update_type: str) -> str | list[str]:
         """Get the field name(s) that serve as the record ID for a given update
         type.
 
@@ -100,6 +100,32 @@ def get_record_id_field(cls, update_type: str) -> str | list[str]:
 
         return cls._RECORD_ID_FIELD_MAPPING[update_type]
 
+    @classmethod
+    def get_record_id_field(cls, update_type: str) -> str | list[str]:
+        """Get the field name(s) that serve as the record ID for a given update
+        type.
+
+        This method is an alias for get_record_id_field_names for backward
+        compatibility.
+
+        Parameters
+        ----------
+        update_type : `str`
+            The type of update record.
+
+        Returns
+        -------
+        field_name : `str` or `list` [ `str` ]
+            Name of the field that contains the record ID for this update type,
+            or list of field names for composite keys.
+
+        Raises
+        ------
+        ValueError
+            If the update_type is not recognized.
+        """
+        return cls.get_record_id_field_names(update_type)
+
     @classmethod
     def expand_single_record(
         cls, update_record: ApdbUpdateRecord, replica_chunk_id: int
@@ -126,7 +152,7 @@ def expand_single_record(
         table_name = update_record.apdb_table.name
 
         # Get the record ID
-        record_id = cls._generate_record_id(update_record)
+        record_id = cls._get_record_id(update_record)
 
         expanded_records = []
         for field_name in field_names:
@@ -151,8 +177,8 @@ def expand_single_record(
         return expanded_records
 
     @classmethod
-    def _generate_record_id(cls, update_record: ApdbUpdateRecord) -> int:
-        """Generate a record ID integer from an update record.
+    def _get_record_id(cls, update_record: ApdbUpdateRecord) -> list[int]:
+        """Generate a record ID from an update record.
 
         Parameters
         ----------
@@ -161,30 +187,28 @@ def _generate_record_id(cls, update_record: ApdbUpdateRecord) -> int:
 
         Returns
         -------
-        record_id : `int`
-            Integer representation of the record's primary key.
+        record_id : `list` [ `int` ]
+            The record ID as a list of integers. For simple keys, a
+            single-element list. For composite keys, a multi-element list.
         """
         update_type = update_record.update_type
-        id_field = cls.get_record_id_field(update_type)
-
-        if isinstance(id_field, list):
-            # Handle composite key (e.g., DiaForcedSource)
-            key_values = []
-            for field in id_field:
-                if not hasattr(update_record, field):
-                    raise ValueError(
-                        f"Update record of type {update_type} is missing expected ID field {field}"
-                    )
-                key_values.append(getattr(update_record, field))
-            # Create a hash of the composite key components
-            return hash(tuple(key_values))
+        id_fields = cls.get_record_id_field_names(update_type)
+
+        # Handle both single field (string) and composite fields (list)
+        if isinstance(id_fields, str):
+            # Single field key
+            field = id_fields
+            if not hasattr(update_record, field):
+                raise ValueError(f"Update record of type {update_type} is missing expected ID field {field}")
+            return [int(getattr(update_record, field))]
         else:
-            # Handle single field key
-            if not hasattr(update_record, id_field):
-                raise ValueError(
-                    f"Update record of type {update_type} is missing expected ID field {id_field}"
-                )
-            return int(getattr(update_record, id_field))
+            # Composite key (list of fields)
+            record_id = []
+            for field in id_fields:
+                if not hasattr(update_record, field):
+                    raise ValueError(f"Update record of type {update_type} is missing expected ID field {field}")
+                record_id.append(int(getattr(update_record, field)))
+            return record_id
 
     @classmethod
     def expand_updates(cls, update_records: UpdateRecords) -> list[ExpandedUpdateRecord]:
diff --git a/tests/test_update_record_expander.py b/tests/test_update_record_expander.py
index 54050e89..b9f487a4 100644
--- a/tests/test_update_record_expander.py
+++ b/tests/test_update_record_expander.py
@@ -191,7 +191,9 @@ def test_get_record_id_field(self) -> None:
         self.assertIn("Unknown update_type: unknown_update_type", str(cm.exception))
 
     def test_expand_single_record_reassign_to_diaobject(self) -> None:
-        """Test expand_single_record with ApdbReassignDiaSourceToDiaObjectRecord."""
+        """Test expand_single_record with
+        ApdbReassignDiaSourceToDiaObjectRecord.
+        """
         record = ApdbReassignDiaSourceToDiaObjectRecord(
             update_time_ns=self.update_time_ns,
             update_order=0,
@@ -210,7 +212,7 @@ def test_expand_single_record_reassign_to_diaobject(self) -> None:
         expanded_record = expanded[0]
         self.assertIsInstance(expanded_record, ExpandedUpdateRecord)
         self.assertEqual(expanded_record.table_name, "DiaSource")
-        self.assertEqual(expanded_record.record_id, 100001)
+        self.assertEqual(expanded_record.record_id, [100001])
         self.assertEqual(expanded_record.field_name, "diaObjectId")
         self.assertEqual(expanded_record.value_json, 300001)
         self.assertEqual(expanded_record.replica_chunk_id, self.replica_chunk_id)
@@ -239,7 +241,7 @@ def test_expand_single_record_reassign_to_diaobject(self) -> None:
         first_record = expanded[0]
         self.assertIsInstance(first_record, ExpandedUpdateRecord)
         self.assertEqual(first_record.table_name, "DiaSource")
-        self.assertEqual(first_record.record_id, 100001)
+        self.assertEqual(first_record.record_id, [100001])
         self.assertEqual(first_record.field_name, "ssObjectId")
         self.assertEqual(first_record.value_json, 2001)
         self.assertEqual(first_record.replica_chunk_id, self.replica_chunk_id)
@@ -249,7 +251,7 @@ def test_expand_single_record_reassign_to_diaobject(self) -> None:
         # Check second expanded record (ssObjectReassocTimeMjdTai)
         second_record = expanded[1]
         self.assertEqual(second_record.table_name, "DiaSource")
-        self.assertEqual(second_record.record_id, 100001)
+        self.assertEqual(second_record.record_id, [100001])
         self.assertEqual(second_record.field_name, "ssObjectReassocTimeMjdTai")
         self.assertEqual(second_record.value_json, float(self.update_time.tai.mjd))
 
@@ -272,40 +274,45 @@ def test_expand_single_record_withdraw_diasource(self) -> None:
 
         expanded_record = expanded[0]
         self.assertEqual(expanded_record.table_name, "DiaSource")
-        self.assertEqual(expanded_record.record_id, 100003)
+        self.assertEqual(expanded_record.record_id, [100003])
         self.assertEqual(expanded_record.field_name, "timeWithdrawnMjdTai")
         self.assertEqual(expanded_record.value_json, self.update_time.tai.mjd)
 
     def test_expand_single_record_reassign_to_ssobject(self) -> None:
-        """Test expand_single_record with ApdbCloseDiaObjectValidityRecord."""
-        record = ApdbCloseDiaObjectValidityRecord(
+        """Test expand_single_record with ApdbReassignDiaSourceToSSObjectRecord."""
+        record = ApdbReassignDiaSourceToSSObjectRecord(
             update_time_ns=self.update_time_ns,
             update_order=1,
-            diaObjectId=200001,
-            validityEndMjdTai=self.update_time.tai.mjd,
-            nDiaSources=5,
+            diaSourceId=100002,
+            ssObjectId=2001,
+            ssObjectReassocTimeMjdTai=float(self.update_time.tai.mjd),
             ra=45.0,
             dec=-30.0,
+            midpointMjdTai=60000.0,
         )
 
         expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
 
-        # Should expand to 2 records (validityEndMjdTai and nDiaSources)
+        # Should expand to 2 records (ssObjectId and ssObjectReassocTimeMjdTai)
         self.assertEqual(len(expanded), 2)
 
-        # Check first expanded record (validityEndMjdTai)
+        # Check first expanded record (ssObjectId)
         first_record = expanded[0]
-        self.assertEqual(first_record.table_name, "DiaObject")
-        self.assertEqual(first_record.record_id, 200001)
-        self.assertEqual(first_record.field_name, "validityEndMjdTai")
-        self.assertEqual(first_record.value_json, self.update_time.tai.mjd)
+        self.assertIsInstance(first_record, ExpandedUpdateRecord)
+        self.assertEqual(first_record.table_name, "DiaSource")
+        self.assertEqual(first_record.record_id, [100002])
+        self.assertEqual(first_record.field_name, "ssObjectId")
+        self.assertEqual(first_record.value_json, 2001)
+        self.assertEqual(first_record.replica_chunk_id, self.replica_chunk_id)
+        self.assertEqual(first_record.update_order, 1)
+        self.assertEqual(first_record.update_time_ns, self.update_time_ns)
 
-        # Check second expanded record (nDiaSources)
+        # Check second expanded record (ssObjectReassocTimeMjdTai)
         second_record = expanded[1]
-        self.assertEqual(second_record.table_name, "DiaObject")
-        self.assertEqual(second_record.record_id, 200001)
-        self.assertEqual(second_record.field_name, "nDiaSources")
-        self.assertEqual(second_record.value_json, 5)
+        self.assertEqual(second_record.table_name, "DiaSource")
+        self.assertEqual(second_record.record_id, [100002])
+        self.assertEqual(second_record.field_name, "ssObjectReassocTimeMjdTai")
+        self.assertEqual(second_record.value_json, float(self.update_time.tai.mjd))
 
     def test_expand_single_record_update_n_dia_sources(self) -> None:
         """Test expand_single_record with ApdbUpdateNDiaSourcesRecord."""
@@ -325,7 +332,7 @@ def test_expand_single_record_update_n_dia_sources(self) -> None:
 
         expanded_record = expanded[0]
         self.assertEqual(expanded_record.table_name, "DiaObject")
-        self.assertEqual(expanded_record.record_id, 200002)
+        self.assertEqual(expanded_record.record_id, [200002])
         self.assertEqual(expanded_record.field_name, "nDiaSources")
         self.assertEqual(expanded_record.value_json, 10)
 
@@ -350,14 +357,14 @@ def test_expand_single_record_close_validity(self) -> None:
         first_record = expanded[0]
         self.assertIsInstance(first_record, ExpandedUpdateRecord)
         self.assertEqual(first_record.table_name, "DiaObject")
-        self.assertEqual(first_record.record_id, 200001)
+        self.assertEqual(first_record.record_id, [200001])
         self.assertEqual(first_record.field_name, "validityEndMjdTai")
         self.assertEqual(first_record.value_json, self.update_time.tai.mjd)
 
         # Check second expanded record (nDiaSources)
         second_record = expanded[1]
         self.assertEqual(second_record.table_name, "DiaObject")
-        self.assertEqual(second_record.record_id, 200001)
+        self.assertEqual(second_record.record_id, [200001])
         self.assertEqual(second_record.field_name, "nDiaSources")
         self.assertEqual(second_record.value_json, 5)
 
@@ -382,9 +389,9 @@ def test_expand_single_record_withdraw_forcedsource(self) -> None:
 
         expanded_record = expanded[0]
         self.assertEqual(expanded_record.table_name, "DiaForcedSource")
-        # The record ID should be a hash of the composite key (diaObjectId,
-        # visit, detector)
-        expected_record_id = hash((200001, 12345, 42))
+        # The record ID should be a list of the composite key components
+        # [diaObjectId, visit, detector] for BigQuery compatibility
+        expected_record_id = [200001, 12345, 42]
         self.assertEqual(expanded_record.record_id, expected_record_id)
         self.assertEqual(expanded_record.field_name, "timeWithdrawnMjdTai")
         self.assertEqual(expanded_record.value_json, self.update_time.tai.mjd)

From 4a9df5d54bb9b137029e9a693bdd98daf62990ca Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 17 Feb 2026 17:52:51 -0600
Subject: [PATCH 14/49] Add insertion of update records into BigQuery

---
 .../dax/ppdb/bigquery/updates/__init__.py     |   1 +
 .../updates/expanded_update_record.py         |   2 +-
 .../updates/update_record_expander.py         |   6 +-
 .../ppdb/bigquery/updates/updates_table.py    | 154 ++++++++++++++++
 python/lsst/dax/ppdb/tests/_updates.py        | 130 +++++++++++++
 tests/test_update_record_expander.py          | 174 +++---------------
 tests/test_updates_table.py                   | 165 +++++++++++++++++
 7 files changed, 481 insertions(+), 151 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/updates_table.py
 create mode 100644 python/lsst/dax/ppdb/tests/_updates.py
 create mode 100644 tests/test_updates_table.py

diff --git a/python/lsst/dax/ppdb/bigquery/updates/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
index 35cd37ae..a21b1add 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/__init__.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
@@ -22,3 +22,4 @@
 from .update_records import UpdateRecords
 from .expanded_update_record import ExpandedUpdateRecord
 from .update_record_expander import UpdateRecordExpander
+from .updates_table import UpdatesTable
diff --git a/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py b/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
index d23c87c2..d59c9785 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/expanded_update_record.py
@@ -1,4 +1,4 @@
-# This file is part of dax_ppdb
+# This file is part of dax_ppdb.
 #
 # Developed for the LSST Data Management System.
 # This product includes software developed by the LSST Project
diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
index 18c632c9..e9b6e911 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
@@ -1,4 +1,4 @@
-# This file is part of dax_ppdb
+# This file is part of dax_ppdb.
 #
 # Developed for the LSST Data Management System.
 # This product includes software developed by the LSST Project
@@ -206,7 +206,9 @@ def _get_record_id(cls, update_record: ApdbUpdateRecord) -> list[int]:
             record_id = []
             for field in id_fields:
                 if not hasattr(update_record, field):
-                    raise ValueError(f"Update record of type {update_type} is missing expected ID field {field}")
+                    raise ValueError(
+                        f"Update record of type {update_type} is missing expected ID field {field}"
+                    )
                 record_id.append(int(getattr(update_record, field)))
             return record_id
 
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_table.py b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
new file mode 100644
index 00000000..c39d1592
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
@@ -0,0 +1,154 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Any
+
+from google.cloud import bigquery
+
+from .expanded_update_record import ExpandedUpdateRecord
+
+
+class UpdatesTable:
+    """
+    Manage a BigQuery updates table for `ExpandedUpdateRecord` rows.
+
+    This class is responsible for creating the updates table with the correct
+    schema and appending expanded update records into it.
+    """
+
+    def __init__(self, client: bigquery.Client, table_fqn: str) -> None:
+        """
+        Parameters
+        ----------
+        client
+            BigQuery client.
+        table_fqn
+            Fully-qualified table name in the form ``"project.dataset.table"``.
+        """
+        self._client: bigquery.Client = client
+        self._table_fqn: str = table_fqn
+
+    @property
+    def table_fqn(self) -> str:
+        """
+        Fully-qualified BigQuery table name.
+
+        Returns
+        -------
+        str
+            Table name in the form ``"project.dataset.table"``.
+        """
+        return self._table_fqn
+
+    def create(self) -> bigquery.Table:
+        """
+        Create the updates table.
+
+        Returns
+        -------
+        google.cloud.bigquery.Table
+            The created table.
+
+        Raises
+        ------
+        google.api_core.exceptions.Conflict
+            If the table already exists.
+
+        Notes
+        -----
+        Schema:
+
+        - table_name: STRING (REQUIRED)
+        - record_id: ARRAY<INT64> (REQUIRED)
+        - field_name: STRING (REQUIRED)
+        - value_json: JSON (REQUIRED)
+        - replica_chunk_id: INT64 (REQUIRED)
+        - update_order: INT64 (NULLABLE)
+        - update_time_ns: INT64 (NULLABLE)
+        """
+        schema: list[bigquery.SchemaField] = [
+            bigquery.SchemaField("table_name", "STRING", mode="REQUIRED"),
+            bigquery.SchemaField("record_id", "INT64", mode="REPEATED"),
+            bigquery.SchemaField("field_name", "STRING", mode="REQUIRED"),
+            bigquery.SchemaField("value_json", "JSON", mode="REQUIRED"),
+            bigquery.SchemaField("replica_chunk_id", "INT64", mode="REQUIRED"),
+            bigquery.SchemaField("update_order", "INT64", mode="NULLABLE"),
+            bigquery.SchemaField("update_time_ns", "INT64", mode="NULLABLE"),
+        ]
+
+        table = bigquery.Table(self._table_fqn, schema=schema)
+        return self._client.create_table(table)
+
+    def append(self, records: Iterable[ExpandedUpdateRecord]) -> bigquery.LoadJob:
+        """
+        Append `ExpandedUpdateRecord` rows into the updates table.
+
+        Parameters
+        ----------
+        records
+            Iterable of update records to append.
+
+        Returns
+        -------
+        google.cloud.bigquery.LoadJob
+            Completed BigQuery load job.
+
+        Raises
+        ------
+        RuntimeError
+            If the BigQuery load job completes with errors.
+
+        Notes
+        -----
+        This uses a batch load via `Client.load_table_from_json` (not streaming
+        inserts). The table must already exist.
+        """
+        rows: list[dict[str, Any]] = [
+            {
+                "table_name": r.table_name,
+                "record_id": r.record_id,
+                "field_name": r.field_name,
+                "value_json": r.value_json,
+                "replica_chunk_id": r.replica_chunk_id,
+                "update_order": r.update_order,
+                "update_time_ns": r.update_time_ns,
+            }
+            for r in records
+        ]
+
+        print("Appending rows to BigQuery:", rows)  # Debug print to verify the data being loaded
+
+        job = self._client.load_table_from_json(
+            rows,
+            self._table_fqn,
+            job_config=bigquery.LoadJobConfig(
+                write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
+            ),
+        )
+        job.result()
+
+        if job.errors:
+            raise RuntimeError(f"BigQuery load failed: {job.errors}")
+
+        return job
diff --git a/python/lsst/dax/ppdb/tests/_updates.py b/python/lsst/dax/ppdb/tests/_updates.py
new file mode 100644
index 00000000..52069359
--- /dev/null
+++ b/python/lsst/dax/ppdb/tests/_updates.py
@@ -0,0 +1,130 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import datetime
+
+from lsst.dax.apdb import (
+    ApdbCloseDiaObjectValidityRecord,
+    ApdbReassignDiaSourceToDiaObjectRecord,
+    ApdbReassignDiaSourceToSSObjectRecord,
+    ApdbUpdateNDiaSourcesRecord,
+    ApdbWithdrawDiaForcedSourceRecord,
+    ApdbWithdrawDiaSourceRecord,
+)
+
+from ..bigquery.updates import UpdateRecords
+
+
+def _create_test_update_records() -> UpdateRecords:
+    """Create test UpdateRecords with sample ApdbUpdateRecord instances."""
+    records = []
+
+    # Hardcoded test values
+    test_update_time_ns = 1640995200000000000  # 2022-01-01 00:00:00 UTC in nanoseconds
+    test_mjd_tai = 59580.0  # Corresponding MJD TAI for 2022-01-01
+    test_replica_chunk_id = 12345
+
+    # Reassign DIASource to different DIAObject
+    records.append(
+        ApdbReassignDiaSourceToDiaObjectRecord(
+            update_time_ns=test_update_time_ns,
+            update_order=0,
+            diaSourceId=100001,
+            diaObjectId=300001,
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+    )
+
+    # Reassign DIASource to SSObject
+    records.append(
+        ApdbReassignDiaSourceToSSObjectRecord(
+            update_time_ns=test_update_time_ns,
+            update_order=1,
+            diaSourceId=100002,
+            ssObjectId=2001,
+            ssObjectReassocTimeMjdTai=test_mjd_tai,
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+    )
+
+    # Withdraw DIASource
+    records.append(
+        ApdbWithdrawDiaSourceRecord(
+            update_time_ns=test_update_time_ns,
+            update_order=2,
+            diaSourceId=100003,
+            timeWithdrawnMjdTai=test_mjd_tai,
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+    )
+
+    # Withdraw DIAForcedSource
+    records.append(
+        ApdbWithdrawDiaForcedSourceRecord(
+            update_time_ns=test_update_time_ns,
+            update_order=3,
+            diaObjectId=200001,
+            visit=12345,
+            detector=42,
+            timeWithdrawnMjdTai=test_mjd_tai,
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+    )
+
+    # Close DIAObject validity interval
+    records.append(
+        ApdbCloseDiaObjectValidityRecord(
+            update_time_ns=test_update_time_ns,
+            update_order=4,
+            diaObjectId=200001,
+            validityEndMjdTai=test_mjd_tai,
+            nDiaSources=5,
+            ra=45.0,
+            dec=-30.0,
+        )
+    )
+
+    # Update DIAObject nDiaSources count
+    records.append(
+        ApdbUpdateNDiaSourcesRecord(
+            update_time_ns=test_update_time_ns,
+            update_order=5,
+            diaObjectId=200002,
+            nDiaSources=10,
+            ra=45.0,
+            dec=-30.0,
+        )
+    )
+
+    return UpdateRecords(
+        replica_chunk_id=test_replica_chunk_id,
+        record_count=len(records),
+        records=records,
+        file_created_at=datetime.datetime.now(datetime.UTC),
+    )
diff --git a/tests/test_update_record_expander.py b/tests/test_update_record_expander.py
index b9f487a4..6da65484 100644
--- a/tests/test_update_record_expander.py
+++ b/tests/test_update_record_expander.py
@@ -33,9 +33,10 @@
     ApdbWithdrawDiaSourceRecord,
 )
 from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
+from lsst.dax.ppdb.tests._updates import _create_test_update_records
 
 
-class TestUpdateRecordExpander(unittest.TestCase):
+class UpdateRecordExpanderTestCase(unittest.TestCase):
     """Test UpdateRecordExpander functionality."""
 
     def setUp(self) -> None:
@@ -47,100 +48,6 @@ def setUp(self) -> None:
         # Test replica chunk ID
         self.replica_chunk_id = 12345
 
-    def _create_test_update_records(self) -> UpdateRecords:
-        """Create test UpdateRecords with sample ApdbUpdateRecord instances.
-
-        Based on patterns from _ppdb.py _make_update_records method.
-        """
-        records = []
-
-        # Reassign DIASource to different DIAObject
-        records.append(
-            ApdbReassignDiaSourceToDiaObjectRecord(
-                update_time_ns=self.update_time_ns,
-                update_order=0,
-                diaSourceId=100001,
-                diaObjectId=300001,
-                ra=45.0,
-                dec=-30.0,
-                midpointMjdTai=60000.0,
-            )
-        )
-
-        # Reassign DIASource to SSObject
-        records.append(
-            ApdbReassignDiaSourceToSSObjectRecord(
-                update_time_ns=self.update_time_ns,
-                update_order=1,
-                diaSourceId=100002,
-                ssObjectId=2001,
-                ssObjectReassocTimeMjdTai=float(self.update_time.tai.mjd),
-                ra=45.0,
-                dec=-30.0,
-                midpointMjdTai=60000.0,
-            )
-        )
-
-        # Withdraw DIASource
-        records.append(
-            ApdbWithdrawDiaSourceRecord(
-                update_time_ns=self.update_time_ns,
-                update_order=2,
-                diaSourceId=100003,
-                timeWithdrawnMjdTai=self.update_time.tai.mjd,
-                ra=45.0,
-                dec=-30.0,
-                midpointMjdTai=60000.0,
-            )
-        )
-
-        # Withdraw DIAForcedSource
-        records.append(
-            ApdbWithdrawDiaForcedSourceRecord(
-                update_time_ns=self.update_time_ns,
-                update_order=3,
-                diaObjectId=200001,
-                visit=12345,
-                detector=42,
-                timeWithdrawnMjdTai=self.update_time.tai.mjd,
-                ra=45.0,
-                dec=-30.0,
-                midpointMjdTai=60000.0,
-            )
-        )
-
-        # Close DIAObject validity interval
-        records.append(
-            ApdbCloseDiaObjectValidityRecord(
-                update_time_ns=self.update_time_ns,
-                update_order=4,
-                diaObjectId=200001,
-                validityEndMjdTai=self.update_time.tai.mjd,
-                nDiaSources=5,
-                ra=45.0,
-                dec=-30.0,
-            )
-        )
-
-        # Update DIAObject nDiaSources count
-        records.append(
-            ApdbUpdateNDiaSourcesRecord(
-                update_time_ns=self.update_time_ns,
-                update_order=5,
-                diaObjectId=200002,
-                nDiaSources=10,
-                ra=45.0,
-                dec=-30.0,
-            )
-        )
-
-        return UpdateRecords(
-            replica_chunk_id=self.replica_chunk_id,
-            record_count=len(records),
-            records=records,
-            file_created_at=datetime.datetime.now(datetime.UTC),
-        )
-
     def test_get_update_fields(self) -> None:
         """Test get_update_fields class method."""
         # Test known update types
@@ -168,29 +75,34 @@ def test_get_update_fields(self) -> None:
             UpdateRecordExpander.get_update_fields("unknown_update_type")
         self.assertIn("Unknown update_type: unknown_update_type", str(cm.exception))
 
-    def test_get_record_id_field(self) -> None:
+    def test_get_record_id_field_names(self) -> None:
         """Test get_record_id_field class method."""
-        # Test known update types
         self.assertEqual(
-            UpdateRecordExpander.get_record_id_field("reassign_diasource_to_diaobject"), "diaSourceId"
+            UpdateRecordExpander.get_record_id_field_names("reassign_diasource_to_diaobject"), ["diaSourceId"]
+        )
+        self.assertEqual(
+            UpdateRecordExpander.get_record_id_field_names("reassign_diasource_to_ssobject"), ["diaSourceId"]
         )
         self.assertEqual(
-            UpdateRecordExpander.get_record_id_field("reassign_diasource_to_ssobject"), "diaSourceId"
+            UpdateRecordExpander.get_record_id_field_names("withdraw_diasource"), ["diaSourceId"]
         )
-        self.assertEqual(UpdateRecordExpander.get_record_id_field("withdraw_diasource"), "diaSourceId")
         self.assertEqual(
-            UpdateRecordExpander.get_record_id_field("withdraw_diaforcedsource"),
+            UpdateRecordExpander.get_record_id_field_names("withdraw_diaforcedsource"),
             ["diaObjectId", "visit", "detector"],
         )
-        self.assertEqual(UpdateRecordExpander.get_record_id_field("close_diaobject_validity"), "diaObjectId")
-        self.assertEqual(UpdateRecordExpander.get_record_id_field("update_n_dia_sources"), "diaObjectId")
+        self.assertEqual(
+            UpdateRecordExpander.get_record_id_field_names("close_diaobject_validity"), ["diaObjectId"]
+        )
+        self.assertEqual(
+            UpdateRecordExpander.get_record_id_field_names("update_n_dia_sources"), ["diaObjectId"]
+        )
 
         # Test unknown update type
         with self.assertRaises(ValueError) as cm:
-            UpdateRecordExpander.get_record_id_field("unknown_update_type")
+            UpdateRecordExpander.get_record_id_field_names("unknown_update_type")
         self.assertIn("Unknown update_type: unknown_update_type", str(cm.exception))
 
-    def test_expand_single_record_reassign_to_diaobject(self) -> None:
+    def test_reassign_diasource_to_diaobject(self) -> None:
         """Test expand_single_record with
         ApdbReassignDiaSourceToDiaObjectRecord.
         """
@@ -218,6 +130,8 @@ def test_expand_single_record_reassign_to_diaobject(self) -> None:
         self.assertEqual(expanded_record.replica_chunk_id, self.replica_chunk_id)
         self.assertEqual(expanded_record.update_order, 0)
         self.assertEqual(expanded_record.update_time_ns, self.update_time_ns)
+
+    def test_reassign_diasource_to_ssobject(self) -> None:
         """Test expand_single_record with
         ApdbReassignDiaSourceToSSObjectRecord.
         """
@@ -255,7 +169,7 @@ def test_expand_single_record_reassign_to_diaobject(self) -> None:
         self.assertEqual(second_record.field_name, "ssObjectReassocTimeMjdTai")
         self.assertEqual(second_record.value_json, float(self.update_time.tai.mjd))
 
-    def test_expand_single_record_withdraw_diasource(self) -> None:
+    def test_withdraw_diasource(self) -> None:
         """Test expand_single_record with ApdbWithdrawDiaSourceRecord."""
         record = ApdbWithdrawDiaSourceRecord(
             update_time_ns=self.update_time_ns,
@@ -278,43 +192,7 @@ def test_expand_single_record_withdraw_diasource(self) -> None:
         self.assertEqual(expanded_record.field_name, "timeWithdrawnMjdTai")
         self.assertEqual(expanded_record.value_json, self.update_time.tai.mjd)
 
-    def test_expand_single_record_reassign_to_ssobject(self) -> None:
-        """Test expand_single_record with ApdbReassignDiaSourceToSSObjectRecord."""
-        record = ApdbReassignDiaSourceToSSObjectRecord(
-            update_time_ns=self.update_time_ns,
-            update_order=1,
-            diaSourceId=100002,
-            ssObjectId=2001,
-            ssObjectReassocTimeMjdTai=float(self.update_time.tai.mjd),
-            ra=45.0,
-            dec=-30.0,
-            midpointMjdTai=60000.0,
-        )
-
-        expanded = UpdateRecordExpander.expand_single_record(record, self.replica_chunk_id)
-
-        # Should expand to 2 records (ssObjectId and ssObjectReassocTimeMjdTai)
-        self.assertEqual(len(expanded), 2)
-
-        # Check first expanded record (ssObjectId)
-        first_record = expanded[0]
-        self.assertIsInstance(first_record, ExpandedUpdateRecord)
-        self.assertEqual(first_record.table_name, "DiaSource")
-        self.assertEqual(first_record.record_id, [100002])
-        self.assertEqual(first_record.field_name, "ssObjectId")
-        self.assertEqual(first_record.value_json, 2001)
-        self.assertEqual(first_record.replica_chunk_id, self.replica_chunk_id)
-        self.assertEqual(first_record.update_order, 1)
-        self.assertEqual(first_record.update_time_ns, self.update_time_ns)
-
-        # Check second expanded record (ssObjectReassocTimeMjdTai)
-        second_record = expanded[1]
-        self.assertEqual(second_record.table_name, "DiaSource")
-        self.assertEqual(second_record.record_id, [100002])
-        self.assertEqual(second_record.field_name, "ssObjectReassocTimeMjdTai")
-        self.assertEqual(second_record.value_json, float(self.update_time.tai.mjd))
-
-    def test_expand_single_record_update_n_dia_sources(self) -> None:
+    def test_update_n_dia_sources(self) -> None:
         """Test expand_single_record with ApdbUpdateNDiaSourcesRecord."""
         record = ApdbUpdateNDiaSourcesRecord(
             update_time_ns=self.update_time_ns,
@@ -336,7 +214,7 @@ def test_expand_single_record_update_n_dia_sources(self) -> None:
         self.assertEqual(expanded_record.field_name, "nDiaSources")
         self.assertEqual(expanded_record.value_json, 10)
 
-    def test_expand_single_record_close_validity(self) -> None:
+    def test_close_diaobject_validity(self) -> None:
         """Test expand_single_record with ApdbCloseDiaObjectValidityRecord."""
         record = ApdbCloseDiaObjectValidityRecord(
             update_time_ns=self.update_time_ns,
@@ -368,7 +246,7 @@ def test_expand_single_record_close_validity(self) -> None:
         self.assertEqual(second_record.field_name, "nDiaSources")
         self.assertEqual(second_record.value_json, 5)
 
-    def test_expand_single_record_withdraw_forcedsource(self) -> None:
+    def test_withdraw_diaforcedsource(self) -> None:
         """Test expand_single_record with ApdbWithdrawDiaForcedSourceRecord."""
         record = ApdbWithdrawDiaForcedSourceRecord(
             update_time_ns=self.update_time_ns,
@@ -396,9 +274,9 @@ def test_expand_single_record_withdraw_forcedsource(self) -> None:
         self.assertEqual(expanded_record.field_name, "timeWithdrawnMjdTai")
         self.assertEqual(expanded_record.value_json, self.update_time.tai.mjd)
 
-    def test_expand_updates_full_integration(self) -> None:
+    def test_update_records_all(self) -> None:
         """Test the full expand_updates method with multiple record types."""
-        update_records = self._create_test_update_records()
+        update_records = _create_test_update_records()
 
         expanded = UpdateRecordExpander.expand_updates(update_records)
 
@@ -434,7 +312,7 @@ def test_expand_updates_full_integration(self) -> None:
         }
         self.assertEqual(field_names, expected_fields)
 
-    def test_expand_updates_empty_records(self) -> None:
+    def test_empty_records(self) -> None:
         """Test expand_updates with empty records list."""
         empty_update_records = UpdateRecords(
             replica_chunk_id=self.replica_chunk_id,
diff --git a/tests/test_updates_table.py b/tests/test_updates_table.py
new file mode 100644
index 00000000..67372af6
--- /dev/null
+++ b/tests/test_updates_table.py
@@ -0,0 +1,165 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import unittest
+import uuid
+
+try:
+    from google.cloud import bigquery
+except ImportError:
+    bigquery = None
+
+from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander, UpdatesTable
+from lsst.dax.ppdb.tests._updates import _create_test_update_records
+
+
+@unittest.skipIf(bigquery is None, "google-cloud-bigquery not available")
+class TestUpdatesTable(unittest.TestCase):
+    """Test UpdatesTable functionality."""
+
+    def setUp(self) -> None:
+        """Set up test fixtures."""
+        # Create BigQuery client
+        self.client = bigquery.Client()
+
+        # Create unique dataset name for this test run
+        self.dataset_id = f"test_updates_{uuid.uuid4().hex[:8]}"
+        self.project_id = self.client.project
+        self.table_name = "updates"
+        self.table_fqn = f"{self.project_id}.{self.dataset_id}.{self.table_name}"
+
+        # Create the test dataset
+        dataset = bigquery.Dataset(f"{self.project_id}.{self.dataset_id}")
+        # Set a short expiration for cleanup safety (1 hour)
+        dataset.default_table_expiration_ms = 3600000  # 1 hour
+        self.dataset = self.client.create_dataset(dataset)
+
+        # Create UpdatesTable instance
+        self.updates_table = UpdatesTable(self.client, self.table_fqn)
+
+    def tearDown(self) -> None:
+        """Clean up test fixtures."""
+        # Always clean up the test dataset, whether test passed or failed
+        try:
+            self.client.delete_dataset(self.dataset_id, delete_contents=True, not_found_ok=True)
+        except Exception:
+            # If deletion fails, at least the expiration will clean it up
+            pass
+
+    def test_table_fqn_property(self) -> None:
+        """Test the table_fqn property."""
+        self.assertEqual(self.updates_table.table_fqn, self.table_fqn)
+
+    def test_create_table(self) -> None:
+        """Test creating the updates table."""
+        table = self.updates_table.create()
+
+        # Verify table was created successfully
+        self.assertEqual(table.table_id, self.table_name)
+        self.assertEqual(table.dataset_id, self.dataset_id)
+
+        # Verify schema is correct
+        expected_fields = {
+            "table_name": ("STRING", "REQUIRED"),
+            "record_id": ("INTEGER", "REPEATED"),
+            "field_name": ("STRING", "REQUIRED"),
+            "value_json": ("JSON", "REQUIRED"),
+            "replica_chunk_id": ("INTEGER", "REQUIRED"),
+            "update_order": ("INTEGER", "NULLABLE"),
+            "update_time_ns": ("INTEGER", "NULLABLE"),
+        }
+
+        actual_fields = {field.name: (field.field_type, field.mode) for field in table.schema}
+        self.assertEqual(actual_fields, expected_fields)
+
+    def test_create_table_already_exists(self) -> None:
+        """Test creating a table that already exists raises an error."""
+        # Create table first time - should succeed
+        self.updates_table.create()
+
+        # Try to create again - should raise Conflict
+        with self.assertRaises(Exception) as cm:
+            self.updates_table.create()
+
+        # Check that it's a conflict-type error
+        self.assertIn("already exists", str(cm.exception).lower())
+
+    def test_append_records(self) -> None:
+        """Test appending ExpandedUpdateRecord objects to the table."""
+        # Create the table first
+        self.updates_table.create()
+
+        # Get test update records and expand them
+        update_records = _create_test_update_records()
+        expanded_records = UpdateRecordExpander.expand_updates(update_records)
+
+        # Append the records
+        job = self.updates_table.append(expanded_records)
+
+        # Verify the job completed successfully
+        self.assertIsNone(job.errors)
+
+        # Verify records were inserted by querying the table
+        query = f"SELECT COUNT(*) as count FROM `{self.table_fqn}`"
+        result = list(self.client.query(query).result())
+        record_count = result[0].count
+
+        # Should have 8 total expanded records based on the test data
+        # (1 + 2 + 1 + 1 + 2 + 1 from each update record type)
+        self.assertEqual(record_count, 8)
+
+        # Verify some specific data was inserted correctly
+        query = f"""
+        SELECT table_name, record_id, field_name, replica_chunk_id
+        FROM `{self.table_fqn}`
+        """
+        # WHERE table_name = 'DiaForcedSource'
+        results = list(self.client.query(query).result())
+
+        print(results)  # Debug print to see what was inserted
+        # Should have one DiaForcedSource record
+        # self.assertEqual(len(results), 1)
+        # row = results[0]
+        # self.assertEqual(row.table_name, "DiaForcedSource")
+        # self.assertEqual(row.record_id, [200001, 12345, 42])  # Composite key as list
+        # self.assertEqual(row.field_name, "timeWithdrawnMjdTai")
+        # self.assertEqual(row.replica_chunk_id, self.replica_chunk_id)
+
+    def test_append_empty_records(self) -> None:
+        """Test appending empty list of records."""
+        # Create the table first
+        self.updates_table.create()
+
+        # Append empty list
+        job = self.updates_table.append([])
+
+        # Verify the job completed successfully
+        self.assertIsNone(job.errors)
+
+        # Verify no records were inserted
+        query = f"SELECT COUNT(*) as count FROM `{self.table_fqn}`"
+        result = list(self.client.query(query).result())
+        record_count = result[0].count
+        self.assertEqual(record_count, 0)
+
+
+if __name__ == "__main__":
+    unittest.main()

From db06460fb149f2eba183201e85777d34ef1562cc Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 18 Feb 2026 14:23:41 -0600
Subject: [PATCH 15/49] Add preliminary implementation of update records table
 dedup in BQ

---
 .../updates/update_record_expander.py         | 24 ++++------
 .../ppdb/bigquery/updates/update_records.py   |  1 -
 .../ppdb/bigquery/updates/updates_table.py    | 38 +++++++++++++--
 python/lsst/dax/ppdb/tests/_updates.py        | 26 ++++++++++
 tests/test_update_record_expander.py          | 20 ++++----
 tests/test_updates_table.py                   | 48 +++++++++++++++++--
 6 files changed, 121 insertions(+), 36 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
index e9b6e911..55d1ba33 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
@@ -42,12 +42,12 @@ class UpdateRecordExpander:
     }
 
     _RECORD_ID_FIELD_MAPPING = {
-        "reassign_diasource_to_diaobject": "diaSourceId",
-        "reassign_diasource_to_ssobject": "diaSourceId",
-        "withdraw_diasource": "diaSourceId",
+        "reassign_diasource_to_diaobject": ["diaSourceId"],
+        "reassign_diasource_to_ssobject": ["diaSourceId"],
+        "withdraw_diasource": ["diaSourceId"],
         "withdraw_diaforcedsource": ["diaObjectId", "visit", "detector"],
-        "close_diaobject_validity": "diaObjectId",
-        "update_n_dia_sources": "diaObjectId",
+        "close_diaobject_validity": ["diaObjectId"],
+        "update_n_dia_sources": ["diaObjectId"],
     }
 
     @classmethod
@@ -75,7 +75,7 @@ def get_update_fields(cls, update_type: str) -> list[str]:
         return cls._UPDATE_FIELD_MAPPING[update_type]
 
     @classmethod
-    def get_record_id_field_names(cls, update_type: str) -> str | list[str]:
+    def get_record_id_fields(cls, update_type: str) -> str | list[str]:
         """Get the field name(s) that serve as the record ID for a given update
         type.
 
@@ -105,9 +105,6 @@ def get_record_id_field(cls, update_type: str) -> str | list[str]:
         """Get the field name(s) that serve as the record ID for a given update
         type.
 
-        This method is an alias for get_record_id_field_names for backward
-        compatibility.
-
         Parameters
         ----------
         update_type : `str`
@@ -115,16 +112,15 @@ def get_record_id_field(cls, update_type: str) -> str | list[str]:
 
         Returns
         -------
-        field_name : `str` or `list` [ `str` ]
-            Name of the field that contains the record ID for this update type,
-            or list of field names for composite keys.
+        field_name : `list` [ `str` ]
+            List of the fields that contain the record ID for this update type.
 
         Raises
         ------
         ValueError
             If the update_type is not recognized.
         """
-        return cls.get_record_id_field_names(update_type)
+        return cls.get_record_id_fields(update_type)
 
     @classmethod
     def expand_single_record(
@@ -192,7 +188,7 @@ def _get_record_id(cls, update_record: ApdbUpdateRecord) -> list[int]:
             single-element list. For composite keys, a multi-element list.
         """
         update_type = update_record.update_type
-        id_fields = cls.get_record_id_field_names(update_type)
+        id_fields = cls.get_record_id_fields(update_type)
 
         # Handle both single field (string) and composite fields (list)
         if isinstance(id_fields, str):
diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_records.py b/python/lsst/dax/ppdb/bigquery/updates/update_records.py
index 47c63f01..ae986fb3 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_records.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_records.py
@@ -33,7 +33,6 @@
 """Default filename for the update records JSON file."""
 
 
-# Move to `updateRecords.py` to follow camelcase convention
 class UpdateRecords(BaseModel):
     """Data model for APDB update records."""
 
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_table.py b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
index c39d1592..8cc51e0c 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
@@ -30,11 +30,8 @@
 
 
 class UpdatesTable:
-    """
-    Manage a BigQuery updates table for `ExpandedUpdateRecord` rows.
-
-    This class is responsible for creating the updates table with the correct
-    schema and appending expanded update records into it.
+    """Manage the table in BigQuery used for inserting and deduplicating
+    expanded update records which contain one update per row.
     """
 
     def __init__(self, client: bigquery.Client, table_fqn: str) -> None:
@@ -152,3 +149,34 @@ def append(self, records: Iterable[ExpandedUpdateRecord]) -> bigquery.LoadJob:
             raise RuntimeError(f"BigQuery load failed: {job.errors}")
 
         return job
+
+    def deduplicate_to(self, target_table_fqn: str) -> bigquery.QueryJob:
+        """
+        Deduplicate this table's records to a target table.
+
+        Keeps the record with the latest update_time_ns for each unique
+        combination of (table_name, record_id, field_name).
+        """
+        query = f"""
+        CREATE OR REPLACE TABLE `{target_table_fqn}`
+        AS
+        SELECT * EXCEPT(row_num)
+        FROM (
+            SELECT *,
+                ROW_NUMBER() OVER (
+                    PARTITION BY table_name,
+                        ARRAY_TO_STRING(
+                            ARRAY(SELECT CAST(elem AS STRING) FROM UNNEST(record_id) AS elem),
+                            ','
+                        ),
+                        field_name
+                    ORDER BY update_time_ns DESC
+                ) as row_num
+            FROM `{self._table_fqn}`
+        )
+        WHERE row_num = 1
+        """
+
+        job = self._client.query(query)
+        job.result()
+        return job
diff --git a/python/lsst/dax/ppdb/tests/_updates.py b/python/lsst/dax/ppdb/tests/_updates.py
index 52069359..a5a62002 100644
--- a/python/lsst/dax/ppdb/tests/_updates.py
+++ b/python/lsst/dax/ppdb/tests/_updates.py
@@ -122,6 +122,32 @@ def _create_test_update_records() -> UpdateRecords:
         )
     )
 
+    # Add duplicate records for testing deduplication
+    # Duplicate of the first record but with later timestamp (should be kept)
+    records.append(
+        ApdbReassignDiaSourceToDiaObjectRecord(
+            update_time_ns=test_update_time_ns + 1000000000,  # 1 second later
+            update_order=0,
+            diaSourceId=100001,
+            diaObjectId=400001,  # Different target object
+            ra=45.0,
+            dec=-30.0,
+            midpointMjdTai=60000.0,
+        )
+    )
+
+    # Duplicate of the nDiaSources update but with earlier timestamp (should be discarded)
+    records.append(
+        ApdbUpdateNDiaSourcesRecord(
+            update_time_ns=test_update_time_ns - 1000000000,  # 1 second earlier
+            update_order=5,
+            diaObjectId=200002,
+            nDiaSources=8,  # Different value but older timestamp
+            ra=45.0,
+            dec=-30.0,
+        )
+    )
+
     return UpdateRecords(
         replica_chunk_id=test_replica_chunk_id,
         record_count=len(records),
diff --git a/tests/test_update_record_expander.py b/tests/test_update_record_expander.py
index 6da65484..8818565e 100644
--- a/tests/test_update_record_expander.py
+++ b/tests/test_update_record_expander.py
@@ -78,28 +78,24 @@ def test_get_update_fields(self) -> None:
     def test_get_record_id_field_names(self) -> None:
         """Test get_record_id_field class method."""
         self.assertEqual(
-            UpdateRecordExpander.get_record_id_field_names("reassign_diasource_to_diaobject"), ["diaSourceId"]
+            UpdateRecordExpander.get_record_id_fields("reassign_diasource_to_diaobject"), ["diaSourceId"]
         )
         self.assertEqual(
-            UpdateRecordExpander.get_record_id_field_names("reassign_diasource_to_ssobject"), ["diaSourceId"]
+            UpdateRecordExpander.get_record_id_fields("reassign_diasource_to_ssobject"), ["diaSourceId"]
         )
+        self.assertEqual(UpdateRecordExpander.get_record_id_fields("withdraw_diasource"), ["diaSourceId"])
         self.assertEqual(
-            UpdateRecordExpander.get_record_id_field_names("withdraw_diasource"), ["diaSourceId"]
-        )
-        self.assertEqual(
-            UpdateRecordExpander.get_record_id_field_names("withdraw_diaforcedsource"),
+            UpdateRecordExpander.get_record_id_fields("withdraw_diaforcedsource"),
             ["diaObjectId", "visit", "detector"],
         )
         self.assertEqual(
-            UpdateRecordExpander.get_record_id_field_names("close_diaobject_validity"), ["diaObjectId"]
-        )
-        self.assertEqual(
-            UpdateRecordExpander.get_record_id_field_names("update_n_dia_sources"), ["diaObjectId"]
+            UpdateRecordExpander.get_record_id_fields("close_diaobject_validity"), ["diaObjectId"]
         )
+        self.assertEqual(UpdateRecordExpander.get_record_id_fields("update_n_dia_sources"), ["diaObjectId"])
 
         # Test unknown update type
         with self.assertRaises(ValueError) as cm:
-            UpdateRecordExpander.get_record_id_field_names("unknown_update_type")
+            UpdateRecordExpander.get_record_id_fields("unknown_update_type")
         self.assertIn("Unknown update_type: unknown_update_type", str(cm.exception))
 
     def test_reassign_diasource_to_diaobject(self) -> None:
@@ -287,7 +283,7 @@ def test_update_records_all(self) -> None:
         # - 1 from ApdbWithdrawDiaForcedSourceRecord
         # - 2 from ApdbCloseDiaObjectValidityRecord
         # - 1 from ApdbUpdateNDiaSourcesRecord
-        self.assertEqual(len(expanded), 8)
+        self.assertEqual(len(expanded), 10)
 
         # Verify all expanded records have correct replica_chunk_id
         for record in expanded:
diff --git a/tests/test_updates_table.py b/tests/test_updates_table.py
index 67372af6..9672500c 100644
--- a/tests/test_updates_table.py
+++ b/tests/test_updates_table.py
@@ -122,9 +122,9 @@ def test_append_records(self) -> None:
         result = list(self.client.query(query).result())
         record_count = result[0].count
 
-        # Should have 8 total expanded records based on the test data
-        # (1 + 2 + 1 + 1 + 2 + 1 from each update record type)
-        self.assertEqual(record_count, 8)
+        # Should have 10 total expanded records based on the test data
+        # (1 + 2 + 1 + 1 + 2 + 1 from original records + 2 duplicates)
+        self.assertEqual(record_count, 10)
 
         # Verify some specific data was inserted correctly
         query = f"""
@@ -139,7 +139,7 @@ def test_append_records(self) -> None:
         # self.assertEqual(len(results), 1)
         # row = results[0]
         # self.assertEqual(row.table_name, "DiaForcedSource")
-        # self.assertEqual(row.record_id, [200001, 12345, 42])  # Composite key as list
+        # self.assertEqual(row.record_id, [200001, 12345, 42])
         # self.assertEqual(row.field_name, "timeWithdrawnMjdTai")
         # self.assertEqual(row.replica_chunk_id, self.replica_chunk_id)
 
@@ -160,6 +160,46 @@ def test_append_empty_records(self) -> None:
         record_count = result[0].count
         self.assertEqual(record_count, 0)
 
+    def test_deduplicate_records(self) -> None:
+        """Test deduplication functionality."""
+        # Create the source table
+        self.updates_table.create()
+
+        # Get test records (which now include duplicates) and expand them
+        update_records = _create_test_update_records()
+        expanded_records = UpdateRecordExpander.expand_updates(update_records)
+
+        # Append all records (including duplicates)
+        self.updates_table.append(expanded_records)
+
+        # Count original records
+        query = f"SELECT COUNT(*) as count FROM `{self.table_fqn}`"
+        original_count = list(self.client.query(query).result())[0].count
+
+        # Create deduplicated table
+        dedup_table_fqn = f"{self.table_fqn}_dedup"
+        self.updates_table.deduplicate_to(dedup_table_fqn)
+
+        # Count deduplicated records
+        query = f"SELECT COUNT(*) as count FROM `{dedup_table_fqn}`"
+        dedup_count = list(self.client.query(query).result())[0].count
+
+        # Should have fewer records after deduplication
+        self.assertLess(dedup_count, original_count)
+
+        # Verify specific deduplication behavior:
+        # Check that the later timestamp record is kept for diaSourceId=100001
+        query = f"""
+        SELECT value_json
+        FROM `{dedup_table_fqn}`
+        WHERE ARRAY_TO_STRING(
+            ARRAY(SELECT CAST(elem AS STRING) FROM UNNEST(record_id) AS elem), ','
+        ) = '100001' AND field_name = 'diaObjectId'
+        """
+        result = list(self.client.query(query).result())
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0].value_json, 400001)  # Should be the later update
+
 
 if __name__ == "__main__":
     unittest.main()

From f3a1924b0a0163bf388a18ca0650996ccb5ae96e Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 18 Feb 2026 15:42:43 -0600
Subject: [PATCH 16/49] Use a hashed value of record ID for deduplication

---
 .../updates/update_record_expander.py         | 21 ++++++++++++++
 .../ppdb/bigquery/updates/updates_table.py    | 28 +++++++++++++++----
 tests/test_updates_table.py                   |  9 +++---
 3 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
index 55d1ba33..f5c95fdf 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
@@ -21,6 +21,8 @@
 
 from __future__ import annotations
 
+import hashlib
+
 from lsst.dax.apdb.apdbUpdateRecord import ApdbUpdateRecord
 
 from .expanded_update_record import ExpandedUpdateRecord
@@ -100,6 +102,23 @@ def get_record_id_fields(cls, update_type: str) -> str | list[str]:
 
         return cls._RECORD_ID_FIELD_MAPPING[update_type]
 
+    @classmethod
+    def _compute_record_id_hash(cls, record_id: list[int]) -> str:
+        """Compute MD5 hash of a record_id list for deduplication.
+
+        Parameters
+        ----------
+        record_id : list[int]
+            The record ID as a list of integers.
+
+        Returns
+        -------
+        str
+            Full 64-character hexadecimal MD5 hash of the record_id list.
+        """
+        record_id_str = ",".join(str(x) for x in record_id)
+        return hashlib.md5(record_id_str.encode()).hexdigest()
+
     @classmethod
     def get_record_id_field(cls, update_type: str) -> str | list[str]:
         """Get the field name(s) that serve as the record ID for a given update
@@ -149,6 +168,7 @@ def expand_single_record(
 
         # Get the record ID
         record_id = cls._get_record_id(update_record)
+        record_id_hash = cls._compute_record_id_hash(record_id)
 
         expanded_records = []
         for field_name in field_names:
@@ -162,6 +182,7 @@ def expand_single_record(
             expanded_record = ExpandedUpdateRecord(
                 table_name=table_name,
                 record_id=record_id,
+                record_id_hash=record_id_hash,
                 field_name=field_name,
                 value_json=value,
                 replica_chunk_id=replica_chunk_id,
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_table.py b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
index 8cc51e0c..5553d342 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
@@ -21,6 +21,7 @@
 
 from __future__ import annotations
 
+import hashlib
 from collections.abc import Iterable
 from typing import Any
 
@@ -46,6 +47,23 @@ def __init__(self, client: bigquery.Client, table_fqn: str) -> None:
         self._client: bigquery.Client = client
         self._table_fqn: str = table_fqn
 
+    @staticmethod
+    def _compute_record_id_hash(record_id: list[int]) -> str:
+        """Compute MD5 hash of a record_id list for deduplication.
+
+        Parameters
+        ----------
+        record_id : list[int]
+            The record ID as a list of integers.
+
+        Returns
+        -------
+        str
+            Full 64-character hexadecimal MD5 hash of the record_id list.
+        """
+        record_id_str = ",".join(str(x) for x in record_id)
+        return hashlib.md5(record_id_str.encode()).hexdigest()
+
     @property
     def table_fqn(self) -> str:
         """
@@ -78,6 +96,7 @@ def create(self) -> bigquery.Table:
 
         - table_name: STRING (REQUIRED)
         - record_id: ARRAY<INT64> (REQUIRED)
+        - record_id_hash: STRING (REQUIRED)
         - field_name: STRING (REQUIRED)
         - value_json: JSON (REQUIRED)
         - replica_chunk_id: INT64 (REQUIRED)
@@ -87,6 +106,7 @@ def create(self) -> bigquery.Table:
         schema: list[bigquery.SchemaField] = [
             bigquery.SchemaField("table_name", "STRING", mode="REQUIRED"),
             bigquery.SchemaField("record_id", "INT64", mode="REPEATED"),
+            bigquery.SchemaField("record_id_hash", "STRING", mode="REQUIRED"),
             bigquery.SchemaField("field_name", "STRING", mode="REQUIRED"),
             bigquery.SchemaField("value_json", "JSON", mode="REQUIRED"),
             bigquery.SchemaField("replica_chunk_id", "INT64", mode="REQUIRED"),
@@ -125,6 +145,7 @@ def append(self, records: Iterable[ExpandedUpdateRecord]) -> bigquery.LoadJob:
             {
                 "table_name": r.table_name,
                 "record_id": r.record_id,
+                "record_id_hash": self._compute_record_id_hash(r.record_id),
                 "field_name": r.field_name,
                 "value_json": r.value_json,
                 "replica_chunk_id": r.replica_chunk_id,
@@ -164,12 +185,7 @@ def deduplicate_to(self, target_table_fqn: str) -> bigquery.QueryJob:
         FROM (
             SELECT *,
                 ROW_NUMBER() OVER (
-                    PARTITION BY table_name,
-                        ARRAY_TO_STRING(
-                            ARRAY(SELECT CAST(elem AS STRING) FROM UNNEST(record_id) AS elem),
-                            ','
-                        ),
-                        field_name
+                    PARTITION BY table_name, record_id_hash, field_name
                     ORDER BY update_time_ns DESC
                 ) as row_num
             FROM `{self._table_fqn}`
diff --git a/tests/test_updates_table.py b/tests/test_updates_table.py
index 9672500c..8d3d821f 100644
--- a/tests/test_updates_table.py
+++ b/tests/test_updates_table.py
@@ -80,6 +80,7 @@ def test_create_table(self) -> None:
         expected_fields = {
             "table_name": ("STRING", "REQUIRED"),
             "record_id": ("INTEGER", "REPEATED"),
+            "record_id_hash": ("STRING", "REQUIRED"),
             "field_name": ("STRING", "REQUIRED"),
             "value_json": ("JSON", "REQUIRED"),
             "replica_chunk_id": ("INTEGER", "REQUIRED"),
@@ -187,14 +188,12 @@ def test_deduplicate_records(self) -> None:
         # Should have fewer records after deduplication
         self.assertLess(dedup_count, original_count)
 
-        # Verify specific deduplication behavior:
-        # Check that the later timestamp record is kept for diaSourceId=100001
+        # Verify specific deduplication behavior
+        record_id_hash = UpdatesTable._compute_record_id_hash([100001])
         query = f"""
         SELECT value_json
         FROM `{dedup_table_fqn}`
-        WHERE ARRAY_TO_STRING(
-            ARRAY(SELECT CAST(elem AS STRING) FROM UNNEST(record_id) AS elem), ','
-        ) = '100001' AND field_name = 'diaObjectId'
+        WHERE record_id_hash = '{record_id_hash}' AND field_name = 'diaObjectId'
         """
         result = list(self.client.query(query).result())
         self.assertEqual(len(result), 1)

From be9175f4aba2aee8f6ffc8edfee3f9ae240f5b3b Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 18 Feb 2026 16:53:41 -0600
Subject: [PATCH 17/49] WIP on update merge implementation

---
 pyproject.toml                                |   2 +-
 .../dax/ppdb/bigquery/updates/__init__.py     |   1 +
 .../dax/ppdb/bigquery/updates/sql/__init__.py |   0
 .../updates/sql/merge_diaobject_updates.sql   |  46 +++++++
 .../ppdb/bigquery/updates/updates_merger.py   |  93 +++++++++++++
 tests/test_updates_merger.py                  | 126 ++++++++++++++++++
 6 files changed, 267 insertions(+), 1 deletion(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/sql/__init__.py
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaobject_updates.sql
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
 create mode 100644 tests/test_updates_merger.py

diff --git a/pyproject.toml b/pyproject.toml
index 5f082778..c5860c60 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ where = ["python"]
 zip-safe = true
 
 [tool.setuptools.package-data]
-"lsst.dax.ppdb" = ["py.typed"]
+"lsst.dax.ppdb" = ["py.typed", "bigquery/updates/sql/*.sql"]
 
 [tool.setuptools.dynamic]
 version = { attr = "lsst_versions.get_lsst_version" }
diff --git a/python/lsst/dax/ppdb/bigquery/updates/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
index a21b1add..342c6069 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/__init__.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
@@ -23,3 +23,4 @@
 from .expanded_update_record import ExpandedUpdateRecord
 from .update_record_expander import UpdateRecordExpander
 from .updates_table import UpdatesTable
+from .updates_merger import UpdatesMerger, DiaObjectUpdatesMerger
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/sql/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaobject_updates.sql b/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaobject_updates.sql
new file mode 100644
index 00000000..143f86a5
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaobject_updates.sql
@@ -0,0 +1,46 @@
+-- merge_diaobject_updates.sql
+--
+-- Query parameters:
+--   @updates_table   STRING  -- table FQN, e.g. "project.dataset.prod_next"
+--   @target_dataset  STRING  -- dataset FQN, e.g. "project.dataset"
+--
+-- Do NOT include backticks in parameter values.
+
+DECLARE sql STRING;
+
+SET sql = """
+MERGE `{target_dataset}.DiaObject` T
+USING (
+  WITH patch AS (
+    SELECT
+      record_id[OFFSET(0)] AS diaObjectId,
+
+      ANY_VALUE(
+        CASE WHEN field_name = 'validityEndMjdTai'
+             THEN CAST(JSON_VALUE(value_json) AS FLOAT64)
+        END
+      ) AS validityEndMjdTai_value,
+      COUNTIF(field_name = 'validityEndMjdTai') > 0 AS validityEndMjdTai_present,
+
+      ANY_VALUE(
+        CASE WHEN field_name = 'nDiaSources'
+             THEN CAST(JSON_VALUE(value_json) AS INT64)
+        END
+      ) AS nDiaSources_value,
+      COUNTIF(field_name = 'nDiaSources') > 0 AS nDiaSources_present
+
+    FROM `{updates_table}`
+    WHERE table_name = 'DiaObject'
+      AND field_name IN ('validityEndMjdTai', 'nDiaSources')
+    GROUP BY diaObjectId
+  )
+  SELECT * FROM patch
+) P
+ON T.diaObjectId = P.diaObjectId
+WHEN MATCHED THEN
+UPDATE SET
+  validityEndMjdTai = IF(P.validityEndMjdTai_present, P.validityEndMjdTai_value, T.validityEndMjdTai),
+  nDiaSources       = IF(P.nDiaSources_present,       P.nDiaSources_value,       T.nDiaSources)
+""";
+
+EXECUTE IMMEDIATE sql;
\ No newline at end of file
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
new file mode 100644
index 00000000..61bd2927
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -0,0 +1,93 @@
+# This file is part of dax_ppdb
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+from abc import ABC
+
+from google.cloud import bigquery
+
+from lsst.resources import ResourcePath
+
+_SQL_RESOURCE_PACKAGE = "lsst.dax.ppdb.bigquery.updates.sql"
+
+
+class UpdatesMerger(ABC):
+    """Abstract base class for merging expanded update records into target
+    tables in BigQuery.
+    """
+
+    TABLE_NAME: str
+    """Logical name of the target table this merger applies to
+    (e.g., 'DiaObject')."""
+
+    SQL_RESOURCE_NAME: str
+    """Base name of the SQL file (without .sql extension) containing the MERGE
+    statement for this merger. The SQL file must be located in the
+    `lsst.dax.ppdb.bigquery.updates.sql` package."""
+
+    def __init__(self, client: bigquery.Client) -> None:
+        """
+        Parameters
+        ----------
+        client
+            BigQuery client.
+        """
+        self._client: bigquery.Client = client
+
+    def merge(self, *, updates_table_fqn: str, target_dataset_fqn: str) -> bigquery.QueryJob:
+        """
+        Apply updates from the updates table specified by `updates_table_fqn`
+        to the target table in the `target_dataset_fqn` dataset.
+
+        Parameters
+        ----------
+        updates_table_fqn
+            Fully-qualified BigQuery table name containing updates.
+        target_dataset_fqn
+            Fully-qualified BigQuery dataset name containing the target table.
+
+        Returns
+        -------
+        google.cloud.bigquery.job.QueryJob
+            The completed BigQuery job.
+        """
+        try:
+            sql_resource_path = f"resource://{_SQL_RESOURCE_PACKAGE}/{self.SQL_RESOURCE_NAME}.sql"
+            print(f"Reading SQL from resource: {sql_resource_path}")
+            sql_text = ResourcePath(sql_resource_path).read().decode("utf-8")
+        except Exception as e:
+            raise RuntimeError(f"Failed to read SQL resource at {sql_resource_path}") from e
+
+        sql_text = sql_text.format(updates_table=updates_table_fqn, target_dataset=target_dataset_fqn)
+
+        job = self._client.query(sql_text)
+        job.result()
+
+        return job
+
+
+class DiaObjectUpdatesMerger(UpdatesMerger):
+    """Merger for DiaObject updates."""
+
+    TABLE_NAME = "DiaObject"
+
+    SQL_RESOURCE_NAME = "merge_diaobject_updates"
diff --git a/tests/test_updates_merger.py b/tests/test_updates_merger.py
new file mode 100644
index 00000000..381b0adc
--- /dev/null
+++ b/tests/test_updates_merger.py
@@ -0,0 +1,126 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import io
+import json
+import unittest
+import uuid
+
+try:
+    from google.cloud import bigquery
+except ImportError:
+    bigquery = None
+
+from lsst.dax.ppdb.bigquery.updates import DiaObjectUpdatesMerger, UpdateRecordExpander, UpdatesTable
+from lsst.dax.ppdb.tests._updates import _create_test_update_records
+
+
+@unittest.skipIf(bigquery is None, "google-cloud-bigquery not available")
+class TestUpdatesMerger(unittest.TestCase):
+    """Test UpdatesMerger functionality."""
+
+    def setUp(self):
+        self.client = bigquery.Client()
+        self.dataset_id = f"test_merger_{uuid.uuid4().hex[:8]}"
+        self.project_id = self.client.project
+        self.updates_table_fqn = f"{self.project_id}.{self.dataset_id}.updates"
+        self.target_dataset_fqn = f"{self.project_id}.{self.dataset_id}"
+        dataset = bigquery.Dataset(f"{self.project_id}.{self.dataset_id}")
+        dataset.default_table_expiration_ms = 3600000
+        self.client.create_dataset(dataset)
+
+    def tearDown(self):
+        try:
+            self.client.delete_dataset(self.dataset_id, delete_contents=True, not_found_ok=True)
+        except Exception:
+            pass
+
+    def _create_target_table(self):
+        schema = [
+            bigquery.SchemaField("diaObjectId", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("validityEndMjdTai", "FLOAT", mode="NULLABLE"),
+            bigquery.SchemaField("nDiaSources", "INTEGER", mode="NULLABLE"),
+        ]
+        table_fqn = f"{self.target_dataset_fqn}.DiaObject"
+        table = bigquery.Table(table_fqn, schema=schema)
+        self.client.create_table(table)
+        rows = [
+            {"diaObjectId": 200001, "validityEndMjdTai": None, "nDiaSources": 3},
+            {"diaObjectId": 200002, "validityEndMjdTai": None, "nDiaSources": 7},
+            {"diaObjectId": 200003, "validityEndMjdTai": 59000.0, "nDiaSources": 2},
+        ]
+        buf = self._json_rows_to_buf(rows)
+        job = self.client.load_table_from_file(
+            buf,
+            table_fqn,
+            job_config=bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON),
+        )
+        job.result()
+
+    def _json_rows_to_buf(self, rows):
+        buf = io.StringIO()
+        for row in rows:
+            buf.write(json.dumps(row) + "\n")
+        buf.seek(0)
+        return buf
+
+    def test_merge_diaobject(self):
+        self._create_target_table()
+        updates_table = UpdatesTable(self.client, self.updates_table_fqn)
+        updates_table.create()
+        update_records = _create_test_update_records()
+        expanded = UpdateRecordExpander.expand_updates(update_records)
+        updates_table.append(expanded)
+        dedup_fqn = f"{self.updates_table_fqn}_dedup"
+        updates_table.deduplicate_to(dedup_fqn)
+        table_fqn = f"{self.target_dataset_fqn}.DiaObject"
+        query = f"SELECT * FROM `{table_fqn}` ORDER BY diaObjectId"
+        before = {r.diaObjectId: r for r in self.client.query(query).result()}
+        print("Before merge:", before)
+        merger = DiaObjectUpdatesMerger(self.client)
+        merger.merge(updates_table_fqn=dedup_fqn, target_dataset_fqn=self.target_dataset_fqn)
+        after = {r.diaObjectId: r for r in self.client.query(query).result()}
+        print("After merge:", after)
+        self.assertEqual(after[200001].validityEndMjdTai, 59580.0)
+        self.assertEqual(after[200001].nDiaSources, 5)
+        self.assertIsNone(after[200002].validityEndMjdTai)
+        self.assertEqual(after[200002].nDiaSources, 10)
+        self.assertEqual(after[200003].validityEndMjdTai, before[200003].validityEndMjdTai)
+        self.assertEqual(after[200003].nDiaSources, before[200003].nDiaSources)
+
+    def test_merge_no_updates(self):
+        self._create_target_table()
+        updates_table = UpdatesTable(self.client, self.updates_table_fqn)
+        updates_table.create()
+        dedup_fqn = f"{self.updates_table_fqn}_dedup"
+        updates_table.deduplicate_to(dedup_fqn)
+        table_fqn = f"{self.target_dataset_fqn}.DiaObject"
+        before = {r.diaObjectId: r for r in self.client.query(f"SELECT * FROM `{table_fqn}`").result()}
+        merger = DiaObjectUpdatesMerger(self.client)
+        merger.merge(updates_table_fqn=dedup_fqn, target_table_fqn=table_fqn)
+        after = {r.diaObjectId: r for r in self.client.query(f"SELECT * FROM `{table_fqn}`").result()}
+        for obj_id in before:
+            self.assertEqual(before[obj_id].validityEndMjdTai, after[obj_id].validityEndMjdTai)
+            self.assertEqual(before[obj_id].nDiaSources, after[obj_id].nDiaSources)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 20e3b2b47fce963a9b0590220631686547611095 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Feb 2026 16:17:56 -0600
Subject: [PATCH 18/49] Add google-cloud-bigquery requirement

---
 pyproject.toml   | 1 +
 requirements.txt | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c5860c60..3f1c500d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ test = [
     "pytest-openfiles >= 0.5.0"
 ]
 gcp = [
+    "google-cloud-bigquery",
     "lsst-dax-ppdbx-gcp"
 ]
 
diff --git a/requirements.txt b/requirements.txt
index ba5c456c..f40c2d15 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,5 +7,6 @@ lsst-dax-apdb @ git+https://github.com/lsst/dax_apdb@main
 lsst-utils @ git+https://github.com/lsst/utils@main
 lsst-resources[s3] @ git+https://github.com/lsst/resources@main
 lsst-felis @ git+https://github.com/lsst/felis@main
-lsst-dax-ppdbx-gcp @ git+https://github.com/lsst-dm/dax_ppdbx_gcp@tickets/DM-54070
 lsst-sdm-schemas @ git+https://github.com/lsst/sdm_schemas@main
+lsst-dax-ppdbx-gcp @ git+https://github.com/lsst-dm/dax_ppdbx_gcp@tickets/DM-54070
+google-cloud-bigquery

From ecca19b8ab6cd48851183dee30ed9c5b30a0eba4 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Feb 2026 16:57:03 -0600
Subject: [PATCH 19/49] Rearrange tests to guard against missing google deps

---
 pyproject.toml                                |   2 +-
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   |   4 +-
 python/lsst/dax/ppdb/tests/_bigquery.py       | 140 ++++++
 python/lsst/dax/ppdb/tests/_ppdb.py           |   5 +-
 python/lsst/dax/ppdb/tests/config/__init__.py |   0
 .../lsst/dax/ppdb/tests}/config/schema.yaml   |   0
 tests/test_ppdb_bigquery.py                   | 428 +-----------------
 tests/test_ppdb_sql.py                        |  15 +-
 tests/test_update_record_expander.py          |  16 +-
 tests/test_update_records.py                  | 346 ++++++++++++++
 tests/test_updates_merger.py                  |  12 +-
 tests/test_updates_table.py                   |  14 +-
 12 files changed, 534 insertions(+), 448 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/tests/_bigquery.py
 create mode 100644 python/lsst/dax/ppdb/tests/config/__init__.py
 rename {tests => python/lsst/dax/ppdb/tests}/config/schema.yaml (100%)
 create mode 100644 tests/test_update_records.py

diff --git a/pyproject.toml b/pyproject.toml
index 3f1c500d..930ac8f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,7 @@ where = ["python"]
 zip-safe = true
 
 [tool.setuptools.package-data]
-"lsst.dax.ppdb" = ["py.typed", "bigquery/updates/sql/*.sql"]
+"lsst.dax.ppdb" = ["py.typed", "bigquery/updates/sql/*.sql", "tests/config/*.yaml"]
 
 [tool.setuptools.dynamic]
 version = { attr = "lsst_versions.get_lsst_version" }
diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index a6c72fa9..82d2205c 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -46,7 +46,6 @@
 from ..sql import PpdbSqlBase, PpdbSqlBaseConfig
 from .manifest import Manifest, TableStats
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
-from .updates.update_records import UpdateRecords
 
 __all__ = ["ConfigValidationError", "PpdbBigQuery", "PpdbBigQueryConfig"]
 
@@ -597,6 +596,9 @@ def _handle_updates(
         Serializes the ApdbUpdateRecord objects into a dictionary structure
         for processing.
         """
+        # Import inlined here to avoid triggering google cloud imports
+        from .updates.update_records import UpdateRecords
+
         update_records = UpdateRecords(
             replica_chunk_id=replica_chunk.id,
             records=apdb_update_records,
diff --git a/python/lsst/dax/ppdb/tests/_bigquery.py b/python/lsst/dax/ppdb/tests/_bigquery.py
new file mode 100644
index 00000000..1861396e
--- /dev/null
+++ b/python/lsst/dax/ppdb/tests/_bigquery.py
@@ -0,0 +1,140 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import gc
+import shutil
+import tempfile
+from typing import Any
+
+from lsst.dax.apdb import (
+    ApdbConfig,
+)
+from lsst.dax.apdb.sql import ApdbSql
+from lsst.dax.ppdb import PpdbConfig
+from lsst.dax.ppdb.bigquery import PpdbBigQuery
+
+try:
+    import testing.postgresql
+except ImportError:
+    testing = None
+
+from lsst.dax.ppdb.tests import TEST_SCHEMA_RESOURCE_PATH
+
+TEST_CONFIG = {
+    "db_drop": True,
+    "validate_config": False,
+    "delete_existing_dirs": True,
+    "bucket_name": "ppdb-test",
+    "object_prefix": "data/test",
+    "dataset_id": "test_dataset",
+    "project_id": "test_project",
+}
+
+
+class _SqliteMixin:
+    """Mixin class to provide Sqlite-specific setup/teardown and instance
+    creation.
+    """
+
+    def setUp(self) -> None:
+        self.tempdir = tempfile.mkdtemp()
+        self.apdb_url = f"sqlite:///{self.tempdir}/apdb.sqlite3"
+        self.ppdb_url = f"sqlite:///{self.tempdir}/ppdb.sqlite3"
+
+    def tearDown(self) -> None:
+        shutil.rmtree(self.tempdir, ignore_errors=True)
+
+    def make_instance(self, **kwargs: Any) -> PpdbConfig:
+        """Make config class instance used in all tests."""
+        kw = {
+            **TEST_CONFIG,
+            "db_url": self.ppdb_url,
+            "felis_path": TEST_SCHEMA_RESOURCE_PATH,
+            "replication_dir": self.tempdir,
+        }
+        bq_config = PpdbBigQuery.init_bigquery(
+            **kw,
+        )  # type: ignore[arg-type]
+        return bq_config
+
+    def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
+        """Make APDB instance for tests."""
+        kw = {
+            "schema_file": TEST_SCHEMA_RESOURCE_PATH,
+            "ss_schema_file": "",
+            "db_url": self.apdb_url,
+            "enable_replica": True,
+        }
+        kw.update(kwargs)
+        return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
+
+
+class _PostgresMixin:
+    """Mixin class to provide Postgres-specific setup/teardown and instance
+    creation.
+    """
+
+    postgresql: Any
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        # Create the postgres test server.
+        cls.postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True)
+        super().setUpClass()
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        # Clean up any lingering SQLAlchemy engines/connections
+        # so they're closed before we shut down the server.
+        gc.collect()
+        cls.postgresql.clear_cache()
+        super().tearDownClass()
+
+    def setUp(self) -> None:
+        self.server = self.postgresql()
+        self.tempdir = tempfile.mkdtemp()
+
+    def tearDown(self) -> None:
+        self.server = self.postgresql()
+        shutil.rmtree(self.tempdir, ignore_errors=True)
+
+    def make_instance(self, **kwargs: Any) -> PpdbConfig:
+        """Make config class instance used in all tests."""
+        kw = {
+            **TEST_CONFIG,
+            "db_url": self.server.url(),
+            "db_schema": "ppdb_test",
+            "felis_path": TEST_SCHEMA_RESOURCE_PATH,
+            "replication_dir": self.tempdir,
+        }
+        bq_config = PpdbBigQuery.init_bigquery(**kw)  # type: ignore[arg-type]
+        return bq_config
+
+    def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
+        kw = {
+            "schema_file": TEST_SCHEMA_RESOURCE_PATH,
+            "ss_schema_file": "",
+            "db_url": self.server.url(),
+            "namespace": "apdb",
+            "enable_replica": True,
+        }
+        kw.update(kwargs)
+        return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
diff --git a/python/lsst/dax/ppdb/tests/_ppdb.py b/python/lsst/dax/ppdb/tests/_ppdb.py
index 2535d187..6a2e38d8 100644
--- a/python/lsst/dax/ppdb/tests/_ppdb.py
+++ b/python/lsst/dax/ppdb/tests/_ppdb.py
@@ -21,7 +21,7 @@
 
 from __future__ import annotations
 
-__all__ = ["ApdbMixin", "PpdbTest"]
+__all__ = ["TEST_SCHEMA_RESOURCE_PATH", "ApdbMixin", "PpdbTest"]
 
 import unittest
 from abc import ABC, abstractmethod
@@ -60,6 +60,9 @@ class TestCaseMixin:
         """Do-nothing definition of mixin base class for regular execution."""
 
 
+TEST_SCHEMA_RESOURCE_PATH = "resource://lsst.dax.ppdb.tests.config/schema.yaml"
+
+
 def _make_region(xyz: tuple[float, float, float] = (1.0, 1.0, -1.0)) -> Region:
     """Make a region to use in tests"""
     pointing_v = UnitVector3d(*xyz)
diff --git a/python/lsst/dax/ppdb/tests/config/__init__.py b/python/lsst/dax/ppdb/tests/config/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/config/schema.yaml b/python/lsst/dax/ppdb/tests/config/schema.yaml
similarity index 100%
rename from tests/config/schema.yaml
rename to python/lsst/dax/ppdb/tests/config/schema.yaml
diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index 5bf253bd..b23381e5 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -19,139 +19,16 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-import gc
-import json
-import os
-import posixpath
-import shutil
-import tempfile
 import unittest
-import uuid
-from typing import Any
 
-import pytest
-
-from lsst.dax.apdb import (
-    Apdb,
-    ApdbConfig,
-    ApdbReplica,
-    apdbUpdateRecord,
-)
-from lsst.dax.apdb.sql import ApdbSql
-from lsst.dax.ppdb import Ppdb, PpdbConfig
-from lsst.dax.ppdb.bigquery import PpdbBigQuery
-from lsst.dax.ppdb.bigquery.updates import UpdateRecords
-from lsst.dax.ppdb.replicator import Replicator
-from lsst.dax.ppdb.tests import ApdbMixin, PpdbTest
+from lsst.dax.ppdb.tests import PpdbTest
+from lsst.dax.ppdb.tests._bigquery import _PostgresMixin, _SqliteMixin
 
 try:
     import testing.postgresql
 except ImportError:
     testing = None
 
-TEST_SCHEMA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "config/schema.yaml")
-
-TEST_CONFIG = {
-    "db_drop": True,
-    "validate_config": False,
-    "delete_existing_dirs": True,
-    "bucket_name": "ppdb-test",
-    "object_prefix": "data/test",
-    "dataset_id": "test_dataset",
-    "project_id": "test_project",
-}
-
-
-class _SqliteMixin:
-    """Mixin class to provide Sqlite-specific setup/teardown and instance
-    creation.
-    """
-
-    def setUp(self) -> None:
-        self.tempdir = tempfile.mkdtemp()
-        self.apdb_url = f"sqlite:///{self.tempdir}/apdb.sqlite3"
-        self.ppdb_url = f"sqlite:///{self.tempdir}/ppdb.sqlite3"
-
-    def tearDown(self) -> None:
-        shutil.rmtree(self.tempdir, ignore_errors=True)
-
-    def make_instance(self, **kwargs: Any) -> PpdbConfig:
-        """Make config class instance used in all tests."""
-        kw = {
-            **TEST_CONFIG,
-            "db_url": self.ppdb_url,
-            "felis_path": TEST_SCHEMA,
-            "replication_dir": self.tempdir,
-        }
-        bq_config = PpdbBigQuery.init_bigquery(
-            **kw,
-        )  # type: ignore[arg-type]
-        return bq_config
-
-    def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
-        """Make APDB instance for tests."""
-        kw = {
-            "schema_file": TEST_SCHEMA,
-            "ss_schema_file": "",
-            "db_url": self.apdb_url,
-            "enable_replica": True,
-        }
-        kw.update(kwargs)
-        return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
-
-
-class _PostgresMixin:
-    """Mixin class to provide Postgres-specific setup/teardown and instance
-    creation.
-    """
-
-    postgresql: Any
-
-    @classmethod
-    def setUpClass(cls) -> None:
-        # Create the postgres test server.
-        cls.postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True)
-        super().setUpClass()
-
-    @classmethod
-    def tearDownClass(cls) -> None:
-        # Clean up any lingering SQLAlchemy engines/connections
-        # so they're closed before we shut down the server.
-        gc.collect()
-        cls.postgresql.clear_cache()
-        super().tearDownClass()
-
-    def setUp(self) -> None:
-        self.server = self.postgresql()
-        self.tempdir = tempfile.mkdtemp()
-
-    def tearDown(self) -> None:
-        self.server = self.postgresql()
-        shutil.rmtree(self.tempdir, ignore_errors=True)
-
-    def make_instance(self, **kwargs: Any) -> PpdbConfig:
-        """Make config class instance used in all tests."""
-        kw = {
-            **TEST_CONFIG,
-            "db_url": self.server.url(),
-            "db_schema": "ppdb_test",
-            "felis_path": TEST_SCHEMA,
-            "replication_dir": self.tempdir,
-        }
-        bq_config = PpdbBigQuery.init_bigquery(**kw)  # type: ignore[arg-type]
-        return bq_config
-
-    def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
-        kw = {
-            "schema_file": TEST_SCHEMA,
-            "ss_schema_file": "",
-            "db_url": self.server.url(),
-            "namespace": "apdb",
-            "enable_replica": True,
-        }
-        kw.update(kwargs)
-        return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
-
 
 class SqliteTestCase(_SqliteMixin, PpdbTest, unittest.TestCase):
     """A test case for the PpdbBigQuery class using a SQLite backend."""
@@ -160,304 +37,3 @@ class SqliteTestCase(_SqliteMixin, PpdbTest, unittest.TestCase):
 @unittest.skipUnless(testing is not None, "testing.postgresql module not found")
 class PostgresTestCase(_PostgresMixin, PpdbTest, unittest.TestCase):
     """A test case for the PpdbBigQuery class using a Postgres backend."""
-
-
-def generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
-    """Generate a unique bucket name for testing."""
-    test_id = uuid.uuid4().hex[:16]
-    return f"{test_prefix}-{test_id}"
-
-
-class UpdateRecordsTestCase(_PostgresMixin, ApdbMixin, unittest.TestCase):
-    """A test case for the handling of APDB record updates by PpdbBigQuery and
-    related classes including the ChunkUploader.
-    """
-
-    include_update_records = True
-
-    def setUp(self):
-        super().setUp()
-
-        # Make APDB instance and fill it with test data.
-        apdb_config = self.make_apdb_instance()
-        apdb = Apdb.from_config(apdb_config)
-        self._fill_apdb(apdb)  # FIXME: Only include replica chunks with the updates
-        apdb_replica = ApdbReplica.from_config(apdb_config)
-
-        # Make PPDB instance.
-        self.ppdb_config = self.make_instance()
-        self.ppdb = Ppdb.from_config(self.ppdb_config)
-        assert isinstance(self.ppdb, PpdbBigQuery)
-
-        # Replicate APDB replica chunks to the PPDB.
-        replicator = Replicator(
-            apdb_replica, self.ppdb, update=False, min_wait_time=0, max_wait_time=0, check_interval=0
-        )
-        replicator.run(exit_on_empty=True)
-
-    def test_json_serialization(self) -> None:
-        """Test that the APDB update records are correctly saved to a JSON file
-        in the replication output and can be read back as valid UpdateRecords
-        objects.
-        """
-        update_records_path = self.ppdb.replication_path / "2021/03/01/1614600000" / "update_records.json"
-        self.assertTrue(update_records_path.exists(), "Update records file not found in replication output")
-
-        update_records = UpdateRecords.from_json_file(update_records_path)
-        print("\n" + str(update_records))
-
-        self.assertEqual(
-            update_records.replica_chunk_id,
-            1614600000,
-            "Unexpected replica chunk ID in deserialized update records",
-        )
-
-        self.assertEqual(update_records.record_count, 3, "Unexpected number of update records deserialized")
-
-        self.assertEqual(
-            len(update_records.records), 3, "Unexpected number of update records in the deserialized object"
-        )
-
-        for record in update_records.records:
-            self.assertIsInstance(
-                record,
-                apdbUpdateRecord.ApdbUpdateRecord,
-                "Deserialized record is not an instance of ApdbUpdateRecord",
-            )
-
-        update_record = update_records.records[0]
-        self.assertIsInstance(
-            update_record,
-            apdbUpdateRecord.ApdbReassignDiaSourceToSSObjectRecord,
-            "Deserialized record is not an instance of ApdbReassignDiaSourceToSSObjectRecord",
-        )
-        assert isinstance(update_record, apdbUpdateRecord.ApdbReassignDiaSourceToSSObjectRecord)
-        self.assertEqual(
-            update_record.diaSourceId,
-            700,
-            "Unexpected diaSourceId in deserialized ApdbReassignDiaSourceToSSObjectRecord",
-        )
-        self.assertEqual(
-            update_record.ssObjectId,
-            1,
-            "Unexpected ssObjectId in deserialized ApdbReassignDiaSourceToSSObjectRecord",
-        )
-        self.assertEqual(
-            update_record.update_time_ns,
-            1614600037000000000,
-            "Unexpected update_time_ns in deserialized ApdbReassignDiaSourceToSSObjectRecord",
-        )
-        self.assertEqual(
-            update_record.update_order,
-            0,
-            "Unexpected update_order in deserialized ApdbReassignDiaSourceToSSObjectRecord",
-        )
-        self.assertEqual(
-            update_record.midpointMjdTai,
-            60000.0,
-            "Unexpected midpointMjdTai in deserialized ApdbReassignDiaSourceToSSObjectRecord",
-        )
-        self.assertEqual(
-            update_record.ssObjectReassocTimeMjdTai,
-            59274.50042824074,
-            "Unexpected ssObjectReassocTimeMjdTai in deserialized ApdbReassignDiaSourceToSSObjectRecord",
-        )
-        self.assertNotEqual(
-            update_record.ra,
-            0.0,
-            "Unexpected ra in deserialized ApdbReassignDiaSourceToSSObjectRecord, should not be 0.0",
-        )
-        self.assertNotEqual(
-            update_record.dec,
-            0.0,
-            "Unexpected dec in deserialized ApdbReassignDiaSourceToSSObjectRecord, should not be 0.0",
-        )
-
-        update_record = update_records.records[1]
-        self.assertIsInstance(
-            update_record,
-            apdbUpdateRecord.ApdbCloseDiaObjectValidityRecord,
-            "Deserialized record is not an instance of ApdbCloseDiaObjectValidityRecord",
-        )
-        self.assertEqual(
-            update_record.diaObjectId,
-            200,
-            "Unexpected diaObjectId in deserialized ApdbCloseDiaObjectValidityRecord",
-        )
-        self.assertNotEqual(
-            update_record.ra,
-            0.0,
-            "Unexpected ra in deserialized ApdbCloseDiaObjectValidityRecord, should not be 0.0",
-        )
-        self.assertNotEqual(
-            update_record.dec,
-            0.0,
-            "Unexpected dec in deserialized ApdbCloseDiaObjectValidityRecord, should not be 0.0",
-        )
-        self.assertEqual(
-            update_record.update_time_ns,
-            1614600037000000000,
-            "Unexpected update_time_ns in deserialized ApdbCloseDiaObjectValidityRecord",
-        )
-        self.assertEqual(
-            update_record.update_order,
-            1,
-            "Unexpected update_order in deserialized ApdbCloseDiaObjectValidityRecord",
-        )
-        self.assertEqual(
-            update_record.validityEndMjdTai,
-            59274.50042824074,
-            "Unexpected validityEndMjdTai in deserialized ApdbCloseDiaObjectValidityRecord",
-        )
-        self.assertIsNone(
-            update_record.nDiaSources,
-            "Unexpected nDiaSources in deserialized ApdbCloseDiaObjectValidityRecord, expected None",
-        )
-
-        update_record = update_records.records[2]
-        self.assertIsInstance(
-            update_record,
-            apdbUpdateRecord.ApdbWithdrawDiaForcedSourceRecord,
-            "Deserialized record is not an instance of ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertEqual(
-            update_record.diaObjectId,
-            200,
-            "Unexpected diaObjectId in deserialized ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertEqual(
-            update_record.visit,
-            7,
-            "Unexpected visit in deserialized ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertEqual(
-            update_record.detector,
-            1,
-            "Unexpected detector in deserialized ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertNotEqual(
-            update_record.ra,
-            0.0,
-            "Unexpected ra in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
-        )
-        self.assertNotEqual(
-            update_record.dec,
-            0.0,
-            "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
-        )
-        self.assertEqual(
-            update_record.midpointMjdTai,
-            60000.0,
-            "Unexpected midpointMjdTai in deserialized ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertEqual(
-            update_record.update_time_ns,
-            1614600037000000000,
-            "Unexpected update_time_ns in deserialized ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertEqual(
-            update_record.update_order,
-            2,
-            "Unexpected update_order in deserialized ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertEqual(
-            update_record.timeWithdrawnMjdTai,
-            59274.50042824074,
-            "Unexpected timeWithdrawnMjdTai in deserialized ApdbWithdrawDiaForcedSourceRecord",
-        )
-        self.assertNotEqual(
-            update_record.ra,
-            0.0,
-            "Unexpected ra in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
-        )
-        self.assertNotEqual(
-            update_record.dec,
-            0.0,
-            "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
-        )
-
-    @pytest.mark.skipif(
-        pytest.importorskip("lsst.dax.ppdbx.gcp", reason="dax_ppdbx_gcp is not installed") is None,
-        reason="",
-    )
-    def test_chunk_uploader(self) -> None:
-        """Test that the update records are correctly uploaded to Google Cloud
-        Storage after replication.
-        """
-        from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
-        from lsst.dax.ppdbx.gcp.gcs import StorageClient
-
-        # Change the configuration to use a unique test bucket name to avoid
-        # conflicts
-        ppdb_config_copy = self.ppdb_config.model_copy()
-        ppdb_config_copy.bucket_name = generate_test_bucket_name("ppdb-test-gcs-upload")
-
-        # Patch the ChunkUploader to print the message that would be published
-        # to the Pub/Sub topic instead of publishing, because there is no
-        # support for that service in the test environment.
-        class DummyChunkUploader(ChunkUploader):
-            def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
-                message = {
-                    "dataset": self.dataset_id,
-                    "chunk_id": str(chunk_id),
-                    "folder": f"gs://{posixpath.join(bucket_name, chunk_prefix)}",
-                }
-                print(f"Dummy publish to Pub/Sub topic: {message}")
-
-        # Create the test GCS bucket
-        storage_client = StorageClient(ppdb_config_copy.bucket_name)
-        try:
-            storage_client.create_bucket()
-        except Exception as e:
-            self.fail(f"Failed to create test GCS bucket: {e}")
-
-        # Configure and run the uploader
-        uploader = DummyChunkUploader(
-            ppdb_config_copy,
-            wait_interval=0,
-            exit_on_empty=True,
-            exit_on_error=True,
-        )
-        print(f"Uploader will copy files to {uploader.bucket_name}/{uploader.prefix}/")
-        uploader.run()
-
-        # Retrieve the update records file
-        update_records_files = storage_client.list_files("**/update_records.json")
-        self.assertEqual(
-            len(update_records_files),
-            1,
-            f"Expected exactly one update_records.json file in GCS, found "
-            f"{len(update_records_files)}: {update_records_files}",
-        )
-        update_records_str = storage_client.read_as_string(update_records_files[0])
-
-        # Print the contents of the update records file for debugging
-        update_records_json = json.loads(update_records_str)
-        print(f"Contents of update_records.json in GCS:\n{json.dumps(update_records_json, indent=2)}")
-
-        # Load the update records into the data model and perform a few basic
-        # checks (test_json_serialization already tests this in detail, so we
-        # just check a few key fields here).
-        update_records = UpdateRecords.model_validate(update_records_json)
-        self.assertEqual(
-            update_records.replica_chunk_id,
-            1614600000,
-            "Unexpected replica chunk ID in update records file from GCS",
-        )
-        self.assertEqual(
-            update_records.record_count,
-            3,
-            f"Expected record_count of 3 in update records file from GCS, found "
-            f"{update_records.record_count}",
-        )
-        self.assertEqual(
-            len(update_records.records),
-            3,
-            f"Expected 3 update records in the file from GCS, found {len(update_records.records)}",
-        )
-
-        # Delete the test GCS bucket
-        try:
-            storage_client.delete_bucket(force=True)
-        except Exception as e:
-            self.fail(f"Failed to delete test GCS bucket: {e}")
diff --git a/tests/test_ppdb_sql.py b/tests/test_ppdb_sql.py
index f8675079..b6a6a1ab 100644
--- a/tests/test_ppdb_sql.py
+++ b/tests/test_ppdb_sql.py
@@ -20,7 +20,6 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import gc
-import os
 import shutil
 import tempfile
 import unittest
@@ -30,15 +29,13 @@
 from lsst.dax.apdb.sql import ApdbSql
 from lsst.dax.ppdb import PpdbConfig
 from lsst.dax.ppdb.sql import PpdbSql
-from lsst.dax.ppdb.tests import PpdbTest
+from lsst.dax.ppdb.tests import TEST_SCHEMA_RESOURCE_PATH, PpdbTest
 
 try:
     import testing.postgresql
 except ImportError:
     testing = None
 
-TEST_SCHEMA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "config/schema.yaml")
-
 
 class ApdbSQLiteTestCase(PpdbTest, unittest.TestCase):
     """A test case for PpdbSql class using SQLite backend."""
@@ -55,11 +52,11 @@ def tearDown(self) -> None:
 
     def make_instance(self, **kwargs: Any) -> PpdbConfig:
         """Make config class instance used in all tests."""
-        return PpdbSql.init_database(db_url=self.ppdb_url, schema_file=TEST_SCHEMA, **kwargs)
+        return PpdbSql.init_database(db_url=self.ppdb_url, schema_file=TEST_SCHEMA_RESOURCE_PATH, **kwargs)
 
     def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
         kw = {
-            "schema_file": TEST_SCHEMA,
+            "schema_file": TEST_SCHEMA_RESOURCE_PATH,
             "ss_schema_file": "",
             "db_url": self.apdb_url,
             "enable_replica": True,
@@ -98,11 +95,13 @@ def tearDown(self) -> None:
 
     def make_instance(self, **kwargs: Any) -> PpdbConfig:
         """Make config class instance used in all tests."""
-        return PpdbSql.init_database(db_url=self.server.url(), schema_file=TEST_SCHEMA, **kwargs)
+        return PpdbSql.init_database(
+            db_url=self.server.url(), schema_file=TEST_SCHEMA_RESOURCE_PATH, **kwargs
+        )
 
     def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
         kw = {
-            "schema_file": TEST_SCHEMA,
+            "schema_file": TEST_SCHEMA_RESOURCE_PATH,
             "ss_schema_file": "",
             "db_url": self.server.url(),
             "namespace": "apdb",
diff --git a/tests/test_update_record_expander.py b/tests/test_update_record_expander.py
index 8818565e..843a025a 100644
--- a/tests/test_update_record_expander.py
+++ b/tests/test_update_record_expander.py
@@ -32,10 +32,16 @@
     ApdbWithdrawDiaForcedSourceRecord,
     ApdbWithdrawDiaSourceRecord,
 )
-from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
-from lsst.dax.ppdb.tests._updates import _create_test_update_records
 
+try:
+    from lsst.dax.ppdb.bigquery import updates
+    from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
+    from lsst.dax.ppdb.tests._updates import _create_test_update_records
+except ImportError:
+    updates = None
 
+
+@unittest.skipIf(updates is None, "Google Cloud environment not available")
 class UpdateRecordExpanderTestCase(unittest.TestCase):
     """Test UpdateRecordExpander functionality."""
 
@@ -50,6 +56,8 @@ def setUp(self) -> None:
 
     def test_get_update_fields(self) -> None:
         """Test get_update_fields class method."""
+        from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander
+
         # Test known update types
         self.assertEqual(
             UpdateRecordExpander.get_update_fields("reassign_diasource_to_diaobject"), ["diaObjectId"]
@@ -77,6 +85,8 @@ def test_get_update_fields(self) -> None:
 
     def test_get_record_id_field_names(self) -> None:
         """Test get_record_id_field class method."""
+        from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander
+
         self.assertEqual(
             UpdateRecordExpander.get_record_id_fields("reassign_diasource_to_diaobject"), ["diaSourceId"]
         )
@@ -102,6 +112,8 @@ def test_reassign_diasource_to_diaobject(self) -> None:
         """Test expand_single_record with
         ApdbReassignDiaSourceToDiaObjectRecord.
         """
+        from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander
+
         record = ApdbReassignDiaSourceToDiaObjectRecord(
             update_time_ns=self.update_time_ns,
             update_order=0,
diff --git a/tests/test_update_records.py b/tests/test_update_records.py
new file mode 100644
index 00000000..e523544a
--- /dev/null
+++ b/tests/test_update_records.py
@@ -0,0 +1,346 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import json
+import posixpath
+import unittest
+import uuid
+
+import pytest
+
+try:
+    from lsst.dax.ppdb.bigquery import updates
+    from lsst.dax.ppdb.bigquery.updates import UpdateRecords
+except ImportError:
+    updates = None
+
+from lsst.dax.apdb import (
+    Apdb,
+    ApdbReplica,
+    apdbUpdateRecord,
+)
+from lsst.dax.ppdb import Ppdb
+from lsst.dax.ppdb.bigquery import PpdbBigQuery
+from lsst.dax.ppdb.replicator import Replicator
+from lsst.dax.ppdb.tests import ApdbMixin
+from lsst.dax.ppdb.tests._bigquery import _PostgresMixin
+
+
+def _generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
+    """Generate a unique bucket name for testing."""
+    test_id = uuid.uuid4().hex[:16]
+    return f"{test_prefix}-{test_id}"
+
+
+@unittest.skipIf(updates is None, "Google Cloud dependencies not available")
+class UpdateRecordsTestCase(_PostgresMixin, ApdbMixin, unittest.TestCase):
+    """A test case for the handling of APDB record updates by PpdbBigQuery and
+    related classes including the ChunkUploader.
+    """
+
+    include_update_records = True
+
+    def setUp(self):
+        super().setUp()
+
+        # Make APDB instance and fill it with test data.
+        apdb_config = self.make_apdb_instance()
+        apdb = Apdb.from_config(apdb_config)
+        self._fill_apdb(apdb)  # FIXME: Only include replica chunks with the updates
+        apdb_replica = ApdbReplica.from_config(apdb_config)
+
+        # Make PPDB instance.
+        self.ppdb_config = self.make_instance()
+        self.ppdb = Ppdb.from_config(self.ppdb_config)
+        assert isinstance(self.ppdb, PpdbBigQuery)
+
+        # Replicate APDB replica chunks to the PPDB.
+        replicator = Replicator(
+            apdb_replica, self.ppdb, update=False, min_wait_time=0, max_wait_time=0, check_interval=0
+        )
+        replicator.run(exit_on_empty=True)
+
+    def test_json_serialization(self) -> None:
+        """Test that the APDB update records are correctly saved to a JSON file
+        in the replication output and can be read back as valid UpdateRecords
+        objects.
+        """
+        update_records_path = self.ppdb.replication_path / "2021/03/01/1614600000" / "update_records.json"
+        self.assertTrue(update_records_path.exists(), "Update records file not found in replication output")
+
+        update_records = UpdateRecords.from_json_file(update_records_path)
+        print("\n" + str(update_records))
+
+        self.assertEqual(
+            update_records.replica_chunk_id,
+            1614600000,
+            "Unexpected replica chunk ID in deserialized update records",
+        )
+
+        self.assertEqual(update_records.record_count, 3, "Unexpected number of update records deserialized")
+
+        self.assertEqual(
+            len(update_records.records), 3, "Unexpected number of update records in the deserialized object"
+        )
+
+        for record in update_records.records:
+            self.assertIsInstance(
+                record,
+                apdbUpdateRecord.ApdbUpdateRecord,
+                "Deserialized record is not an instance of ApdbUpdateRecord",
+            )
+
+        update_record = update_records.records[0]
+        self.assertIsInstance(
+            update_record,
+            apdbUpdateRecord.ApdbReassignDiaSourceToSSObjectRecord,
+            "Deserialized record is not an instance of ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        assert isinstance(update_record, apdbUpdateRecord.ApdbReassignDiaSourceToSSObjectRecord)
+        self.assertEqual(
+            update_record.diaSourceId,
+            700,
+            "Unexpected diaSourceId in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.ssObjectId,
+            1,
+            "Unexpected ssObjectId in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.update_time_ns,
+            1614600037000000000,
+            "Unexpected update_time_ns in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.update_order,
+            0,
+            "Unexpected update_order in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.midpointMjdTai,
+            60000.0,
+            "Unexpected midpointMjdTai in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertEqual(
+            update_record.ssObjectReassocTimeMjdTai,
+            59274.50042824074,
+            "Unexpected ssObjectReassocTimeMjdTai in deserialized ApdbReassignDiaSourceToSSObjectRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbReassignDiaSourceToSSObjectRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbReassignDiaSourceToSSObjectRecord, should not be 0.0",
+        )
+
+        update_record = update_records.records[1]
+        self.assertIsInstance(
+            update_record,
+            apdbUpdateRecord.ApdbCloseDiaObjectValidityRecord,
+            "Deserialized record is not an instance of ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertEqual(
+            update_record.diaObjectId,
+            200,
+            "Unexpected diaObjectId in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbCloseDiaObjectValidityRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbCloseDiaObjectValidityRecord, should not be 0.0",
+        )
+        self.assertEqual(
+            update_record.update_time_ns,
+            1614600037000000000,
+            "Unexpected update_time_ns in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertEqual(
+            update_record.update_order,
+            1,
+            "Unexpected update_order in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertEqual(
+            update_record.validityEndMjdTai,
+            59274.50042824074,
+            "Unexpected validityEndMjdTai in deserialized ApdbCloseDiaObjectValidityRecord",
+        )
+        self.assertIsNone(
+            update_record.nDiaSources,
+            "Unexpected nDiaSources in deserialized ApdbCloseDiaObjectValidityRecord, expected None",
+        )
+
+        update_record = update_records.records[2]
+        self.assertIsInstance(
+            update_record,
+            apdbUpdateRecord.ApdbWithdrawDiaForcedSourceRecord,
+            "Deserialized record is not an instance of ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.diaObjectId,
+            200,
+            "Unexpected diaObjectId in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.visit,
+            7,
+            "Unexpected visit in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.detector,
+            1,
+            "Unexpected detector in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )
+        self.assertEqual(
+            update_record.midpointMjdTai,
+            60000.0,
+            "Unexpected midpointMjdTai in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.update_time_ns,
+            1614600037000000000,
+            "Unexpected update_time_ns in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.update_order,
+            2,
+            "Unexpected update_order in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertEqual(
+            update_record.timeWithdrawnMjdTai,
+            59274.50042824074,
+            "Unexpected timeWithdrawnMjdTai in deserialized ApdbWithdrawDiaForcedSourceRecord",
+        )
+        self.assertNotEqual(
+            update_record.ra,
+            0.0,
+            "Unexpected ra in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )
+        self.assertNotEqual(
+            update_record.dec,
+            0.0,
+            "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
+        )
+
+    @pytest.mark.skipif(
+        pytest.importorskip("lsst.dax.ppdbx.gcp", reason="dax_ppdbx_gcp is not installed") is None,
+        reason="",
+    )
+    def test_chunk_uploader(self) -> None:
+        """Test that the update records are correctly uploaded to Google Cloud
+        Storage after replication.
+        """
+        from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
+        from lsst.dax.ppdbx.gcp.gcs import StorageClient
+
+        # Change the configuration to use a unique test bucket name to avoid
+        # conflicts
+        ppdb_config_copy = self.ppdb_config.model_copy()
+        ppdb_config_copy.bucket_name = _generate_test_bucket_name("ppdb-test-gcs-upload")
+
+        # Patch the ChunkUploader to print the message that would be published
+        # to the Pub/Sub topic instead of publishing, because there is no
+        # support for that service in the test environment.
+        class DummyChunkUploader(ChunkUploader):
+            def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
+                message = {
+                    "dataset": self.dataset_id,
+                    "chunk_id": str(chunk_id),
+                    "folder": f"gs://{posixpath.join(bucket_name, chunk_prefix)}",
+                }
+                print(f"Dummy publish to Pub/Sub topic: {message}")
+
+        # Create the test GCS bucket
+        storage_client = StorageClient(ppdb_config_copy.bucket_name)
+        try:
+            storage_client.create_bucket()
+        except Exception as e:
+            self.fail(f"Failed to create test GCS bucket: {e}")
+
+        # Configure and run the uploader
+        uploader = DummyChunkUploader(
+            ppdb_config_copy,
+            wait_interval=0,
+            exit_on_empty=True,
+            exit_on_error=True,
+        )
+        print(f"Uploader will copy files to {uploader.bucket_name}/{uploader.prefix}/")
+        uploader.run()
+
+        # Retrieve the update records file
+        update_records_files = storage_client.list_files("**/update_records.json")
+        self.assertEqual(
+            len(update_records_files),
+            1,
+            f"Expected exactly one update_records.json file in GCS, found "
+            f"{len(update_records_files)}: {update_records_files}",
+        )
+        update_records_str = storage_client.read_as_string(update_records_files[0])
+
+        # Print the contents of the update records file for debugging
+        update_records_json = json.loads(update_records_str)
+        print(f"Contents of update_records.json in GCS:\n{json.dumps(update_records_json, indent=2)}")
+
+        # Load the update records into the data model and perform a few basic
+        # checks (test_json_serialization already tests this in detail, so we
+        # just check a few key fields here).
+        update_records = UpdateRecords.model_validate(update_records_json)
+        self.assertEqual(
+            update_records.replica_chunk_id,
+            1614600000,
+            "Unexpected replica chunk ID in update records file from GCS",
+        )
+        self.assertEqual(
+            update_records.record_count,
+            3,
+            f"Expected record_count of 3 in update records file from GCS, found "
+            f"{update_records.record_count}",
+        )
+        self.assertEqual(
+            len(update_records.records),
+            3,
+            f"Expected 3 update records in the file from GCS, found {len(update_records.records)}",
+        )
+
+        # Delete the test GCS bucket
+        try:
+            storage_client.delete_bucket(force=True)
+        except Exception as e:
+            self.fail(f"Failed to delete test GCS bucket: {e}")
diff --git a/tests/test_updates_merger.py b/tests/test_updates_merger.py
index 381b0adc..07705955 100644
--- a/tests/test_updates_merger.py
+++ b/tests/test_updates_merger.py
@@ -29,11 +29,15 @@
 except ImportError:
     bigquery = None
 
-from lsst.dax.ppdb.bigquery.updates import DiaObjectUpdatesMerger, UpdateRecordExpander, UpdatesTable
-from lsst.dax.ppdb.tests._updates import _create_test_update_records
+try:
+    from lsst.dax.ppdb.bigquery import updates
+    from lsst.dax.ppdb.bigquery.updates import DiaObjectUpdatesMerger, UpdateRecordExpander, UpdatesTable
+    from lsst.dax.ppdb.tests._updates import _create_test_update_records
+except ImportError:
+    updates = None
 
 
-@unittest.skipIf(bigquery is None, "google-cloud-bigquery not available")
+@unittest.skipIf(bigquery is None or updates is None, "Google Cloud dependencies not available")
 class TestUpdatesMerger(unittest.TestCase):
     """Test UpdatesMerger functionality."""
 
@@ -115,7 +119,7 @@ def test_merge_no_updates(self):
         table_fqn = f"{self.target_dataset_fqn}.DiaObject"
         before = {r.diaObjectId: r for r in self.client.query(f"SELECT * FROM `{table_fqn}`").result()}
         merger = DiaObjectUpdatesMerger(self.client)
-        merger.merge(updates_table_fqn=dedup_fqn, target_table_fqn=table_fqn)
+        merger.merge(updates_table_fqn=dedup_fqn, target_dataset_fqn=self.target_dataset_fqn)
         after = {r.diaObjectId: r for r in self.client.query(f"SELECT * FROM `{table_fqn}`").result()}
         for obj_id in before:
             self.assertEqual(before[obj_id].validityEndMjdTai, after[obj_id].validityEndMjdTai)
diff --git a/tests/test_updates_table.py b/tests/test_updates_table.py
index 8d3d821f..aeb196b4 100644
--- a/tests/test_updates_table.py
+++ b/tests/test_updates_table.py
@@ -23,15 +23,19 @@
 import uuid
 
 try:
-    from google.cloud import bigquery
+    from lsst.dax.ppdb.bigquery import updates
+    from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander, UpdatesTable
+    from lsst.dax.ppdb.tests._updates import _create_test_update_records
 except ImportError:
-    bigquery = None
+    updates = None
 
-from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander, UpdatesTable
-from lsst.dax.ppdb.tests._updates import _create_test_update_records
+try:
+    from google.cloud import bigquery
+except (ModuleNotFoundError, ImportError):
+    bigquery = None
 
 
-@unittest.skipIf(bigquery is None, "google-cloud-bigquery not available")
+@unittest.skipIf(updates is None or bigquery is None, "Google Cloud dependencies not available")
 class TestUpdatesTable(unittest.TestCase):
     """Test UpdatesTable functionality."""
 

From 74c4f7463e8134eb1e0950b5aff6a1bb68cc1a99 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Feb 2026 17:43:58 -0600
Subject: [PATCH 20/49] Add update merging support for DiaSource and
 DiaForcedSource tables

---
 .../dax/ppdb/bigquery/updates/__init__.py     |   7 +-
 .../sql/merge_diaforcedsource_updates.sql     |  42 +++++++
 .../updates/sql/merge_diasource_updates.sql   |  62 +++++++++
 .../ppdb/bigquery/updates/updates_merger.py   |  15 ++-
 tests/test_updates_merger.py                  | 119 +++++++++++++++++-
 5 files changed, 242 insertions(+), 3 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaforcedsource_updates.sql
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/sql/merge_diasource_updates.sql

diff --git a/python/lsst/dax/ppdb/bigquery/updates/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
index 342c6069..7673a1ca 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/__init__.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
@@ -23,4 +23,9 @@
 from .expanded_update_record import ExpandedUpdateRecord
 from .update_record_expander import UpdateRecordExpander
 from .updates_table import UpdatesTable
-from .updates_merger import UpdatesMerger, DiaObjectUpdatesMerger
+from .updates_merger import (
+    UpdatesMerger,
+    DiaObjectUpdatesMerger,
+    DiaSourceUpdatesMerger,
+    DiaForcedSourceUpdatesMerger,
+)
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaforcedsource_updates.sql b/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaforcedsource_updates.sql
new file mode 100644
index 00000000..8c60f86c
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaforcedsource_updates.sql
@@ -0,0 +1,42 @@
+-- merge_diaforcedsource_updates.sql
+--
+-- Query parameters:
+--   @updates_table   STRING  -- table FQN, e.g. "project.dataset.prod_next"
+--   @target_dataset  STRING  -- dataset FQN, e.g. "project.dataset"
+--
+-- Do NOT include backticks in parameter values.
+
+DECLARE sql STRING;
+
+SET sql = """
+MERGE `{target_dataset}.DiaForcedSource` T
+USING (
+  WITH patch AS (
+    SELECT
+      record_id[OFFSET(0)] AS diaObjectId,
+      record_id[OFFSET(1)] AS visit,
+      record_id[OFFSET(2)] AS detector,
+
+      ANY_VALUE(
+        CASE WHEN field_name = 'timeWithdrawnMjdTai'
+             THEN CAST(JSON_VALUE(value_json) AS FLOAT64)
+        END
+      ) AS timeWithdrawnMjdTai_value,
+      COUNTIF(field_name = 'timeWithdrawnMjdTai') > 0 AS timeWithdrawnMjdTai_present
+
+    FROM `{updates_table}`
+    WHERE table_name = 'DiaForcedSource'
+      AND field_name IN ('timeWithdrawnMjdTai')
+    GROUP BY diaObjectId, visit, detector
+  )
+  SELECT * FROM patch
+) P
+ON T.diaObjectId = P.diaObjectId
+   AND T.visit = P.visit
+   AND T.detector = P.detector
+WHEN MATCHED THEN
+UPDATE SET
+  timeWithdrawnMjdTai = IF(P.timeWithdrawnMjdTai_present, P.timeWithdrawnMjdTai_value, T.timeWithdrawnMjdTai)
+""";
+
+EXECUTE IMMEDIATE sql;
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diasource_updates.sql b/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diasource_updates.sql
new file mode 100644
index 00000000..5a39b877
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diasource_updates.sql
@@ -0,0 +1,62 @@
+-- merge_diasource_updates.sql
+--
+-- Query parameters:
+--   @updates_table   STRING  -- table FQN, e.g. "project.dataset.prod_next"
+--   @target_dataset  STRING  -- dataset FQN, e.g. "project.dataset"
+--
+-- Do NOT include backticks in parameter values.
+
+DECLARE sql STRING;
+
+SET sql = """
+MERGE `{target_dataset}.DiaSource` T
+USING (
+  WITH patch AS (
+    SELECT
+      record_id[OFFSET(0)] AS diaSourceId,
+
+      ANY_VALUE(
+        CASE WHEN field_name = 'diaObjectId'
+             THEN CAST(JSON_VALUE(value_json) AS INT64)
+        END
+      ) AS diaObjectId_value,
+      COUNTIF(field_name = 'diaObjectId') > 0 AS diaObjectId_present,
+
+      ANY_VALUE(
+        CASE WHEN field_name = 'ssObjectId'
+             THEN CAST(JSON_VALUE(value_json) AS INT64)
+        END
+      ) AS ssObjectId_value,
+      COUNTIF(field_name = 'ssObjectId') > 0 AS ssObjectId_present,
+
+      ANY_VALUE(
+        CASE WHEN field_name = 'ssObjectReassocTimeMjdTai'
+             THEN CAST(JSON_VALUE(value_json) AS FLOAT64)
+        END
+      ) AS ssObjectReassocTimeMjdTai_value,
+      COUNTIF(field_name = 'ssObjectReassocTimeMjdTai') > 0 AS ssObjectReassocTimeMjdTai_present,
+
+      ANY_VALUE(
+        CASE WHEN field_name = 'timeWithdrawnMjdTai'
+             THEN CAST(JSON_VALUE(value_json) AS FLOAT64)
+        END
+      ) AS timeWithdrawnMjdTai_value,
+      COUNTIF(field_name = 'timeWithdrawnMjdTai') > 0 AS timeWithdrawnMjdTai_present
+
+    FROM `{updates_table}`
+    WHERE table_name = 'DiaSource'
+      AND field_name IN ('diaObjectId', 'ssObjectId', 'ssObjectReassocTimeMjdTai', 'timeWithdrawnMjdTai')
+    GROUP BY diaSourceId
+  )
+  SELECT * FROM patch
+) P
+ON T.diaSourceId = P.diaSourceId
+WHEN MATCHED THEN
+UPDATE SET
+  diaObjectId              = IF(P.diaObjectId_present,              P.diaObjectId_value,              T.diaObjectId),
+  ssObjectId               = IF(P.ssObjectId_present,               P.ssObjectId_value,               T.ssObjectId),
+  ssObjectReassocTimeMjdTai = IF(P.ssObjectReassocTimeMjdTai_present, P.ssObjectReassocTimeMjdTai_value, T.ssObjectReassocTimeMjdTai),
+  timeWithdrawnMjdTai      = IF(P.timeWithdrawnMjdTai_present,      P.timeWithdrawnMjdTai_value,      T.timeWithdrawnMjdTai)
+""";
+
+EXECUTE IMMEDIATE sql;
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
index 61bd2927..d1ea3f5c 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -89,5 +89,18 @@ class DiaObjectUpdatesMerger(UpdatesMerger):
     """Merger for DiaObject updates."""
 
     TABLE_NAME = "DiaObject"
-
     SQL_RESOURCE_NAME = "merge_diaobject_updates"
+
+
+class DiaSourceUpdatesMerger(UpdatesMerger):
+    """Merger for DiaSource updates."""
+
+    TABLE_NAME = "DiaSource"
+    SQL_RESOURCE_NAME = "merge_diasource_updates"
+
+
+class DiaForcedSourceUpdatesMerger(UpdatesMerger):
+    """Merger for DiaForcedSource updates."""
+
+    TABLE_NAME = "DiaForcedSource"
+    SQL_RESOURCE_NAME = "merge_diaforcedsource_updates"
diff --git a/tests/test_updates_merger.py b/tests/test_updates_merger.py
index 07705955..ed4dffa7 100644
--- a/tests/test_updates_merger.py
+++ b/tests/test_updates_merger.py
@@ -31,7 +31,13 @@
 
 try:
     from lsst.dax.ppdb.bigquery import updates
-    from lsst.dax.ppdb.bigquery.updates import DiaObjectUpdatesMerger, UpdateRecordExpander, UpdatesTable
+    from lsst.dax.ppdb.bigquery.updates import (
+        DiaForcedSourceUpdatesMerger,
+        DiaObjectUpdatesMerger,
+        DiaSourceUpdatesMerger,
+        UpdateRecordExpander,
+        UpdatesTable,
+    )
     from lsst.dax.ppdb.tests._updates import _create_test_update_records
 except ImportError:
     updates = None
@@ -110,6 +116,117 @@ def test_merge_diaobject(self):
         self.assertEqual(after[200003].validityEndMjdTai, before[200003].validityEndMjdTai)
         self.assertEqual(after[200003].nDiaSources, before[200003].nDiaSources)
 
+    def test_merge_diasource(self):
+        schema = [
+            bigquery.SchemaField("diaSourceId", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("diaObjectId", "INTEGER", mode="NULLABLE"),
+            bigquery.SchemaField("ssObjectId", "INTEGER", mode="NULLABLE"),
+            bigquery.SchemaField("ssObjectReassocTimeMjdTai", "FLOAT", mode="NULLABLE"),
+            bigquery.SchemaField("timeWithdrawnMjdTai", "FLOAT", mode="NULLABLE"),
+        ]
+        table_fqn = f"{self.target_dataset_fqn}.DiaSource"
+        table = bigquery.Table(table_fqn, schema=schema)
+        self.client.create_table(table)
+        rows = [
+            {
+                "diaSourceId": 100001,
+                "diaObjectId": 200001,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+            {
+                "diaSourceId": 100002,
+                "diaObjectId": 200002,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+            {
+                "diaSourceId": 100003,
+                "diaObjectId": 200003,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+            {
+                "diaSourceId": 100004,
+                "diaObjectId": 200004,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+        ]
+        job = self.client.load_table_from_file(
+            self._json_rows_to_buf(rows),
+            table_fqn,
+            job_config=bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON),
+        )
+        job.result()
+
+        updates_table = UpdatesTable(self.client, self.updates_table_fqn)
+        updates_table.create()
+        update_records = _create_test_update_records()
+        expanded = UpdateRecordExpander.expand_updates(update_records)
+        updates_table.append(expanded)
+        dedup_fqn = f"{self.updates_table_fqn}_dedup"
+        updates_table.deduplicate_to(dedup_fqn)
+
+        query = f"SELECT * FROM `{table_fqn}` ORDER BY diaSourceId"
+        before = {r.diaSourceId: r for r in self.client.query(query).result()}
+        merger = DiaSourceUpdatesMerger(self.client)
+        merger.merge(updates_table_fqn=dedup_fqn, target_dataset_fqn=self.target_dataset_fqn)
+        after = {r.diaSourceId: r for r in self.client.query(query).result()}
+
+        self.assertEqual(after[100001].diaObjectId, 400001)
+        self.assertEqual(after[100002].ssObjectId, 2001)
+        self.assertEqual(after[100002].ssObjectReassocTimeMjdTai, 59580.0)
+        self.assertEqual(after[100003].timeWithdrawnMjdTai, 59580.0)
+        self.assertEqual(after[100004].diaObjectId, before[100004].diaObjectId)
+        self.assertEqual(after[100004].ssObjectId, before[100004].ssObjectId)
+        self.assertEqual(after[100004].timeWithdrawnMjdTai, before[100004].timeWithdrawnMjdTai)
+
+    def test_merge_diaforcedsource(self):
+        schema = [
+            bigquery.SchemaField("diaObjectId", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("visit", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("detector", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("timeWithdrawnMjdTai", "FLOAT", mode="NULLABLE"),
+        ]
+        table_fqn = f"{self.target_dataset_fqn}.DiaForcedSource"
+        table = bigquery.Table(table_fqn, schema=schema)
+        self.client.create_table(table)
+        rows = [
+            {"diaObjectId": 200001, "visit": 12345, "detector": 42, "timeWithdrawnMjdTai": None},
+            {"diaObjectId": 200001, "visit": 12346, "detector": 42, "timeWithdrawnMjdTai": None},
+        ]
+        job = self.client.load_table_from_file(
+            self._json_rows_to_buf(rows),
+            table_fqn,
+            job_config=bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON),
+        )
+        job.result()
+
+        updates_table = UpdatesTable(self.client, self.updates_table_fqn)
+        updates_table.create()
+        update_records = _create_test_update_records()
+        expanded = UpdateRecordExpander.expand_updates(update_records)
+        updates_table.append(expanded)
+        dedup_fqn = f"{self.updates_table_fqn}_dedup"
+        updates_table.deduplicate_to(dedup_fqn)
+
+        query = f"SELECT * FROM `{table_fqn}` ORDER BY diaObjectId, visit, detector"
+        before = {(r.diaObjectId, r.visit, r.detector): r for r in self.client.query(query).result()}
+        merger = DiaForcedSourceUpdatesMerger(self.client)
+        merger.merge(updates_table_fqn=dedup_fqn, target_dataset_fqn=self.target_dataset_fqn)
+        after = {(r.diaObjectId, r.visit, r.detector): r for r in self.client.query(query).result()}
+
+        self.assertEqual(after[(200001, 12345, 42)].timeWithdrawnMjdTai, 59580.0)
+        self.assertEqual(
+            after[(200001, 12346, 42)].timeWithdrawnMjdTai,
+            before[(200001, 12346, 42)].timeWithdrawnMjdTai,
+        )
+
     def test_merge_no_updates(self):
         self._create_target_table()
         updates_table = UpdatesTable(self.client, self.updates_table_fqn)

From 300a228b632ce8c1eda7f7eb996a090e17b0cfa6 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Feb 2026 18:02:04 -0600
Subject: [PATCH 21/49] Remove requirements that we don't want installed by
 default in testing

---
 requirements.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f40c2d15..a8e9fceb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,5 +8,3 @@ lsst-utils @ git+https://github.com/lsst/utils@main
 lsst-resources[s3] @ git+https://github.com/lsst/resources@main
 lsst-felis @ git+https://github.com/lsst/felis@main
 lsst-sdm-schemas @ git+https://github.com/lsst/sdm_schemas@main
-lsst-dax-ppdbx-gcp @ git+https://github.com/lsst-dm/dax_ppdbx_gcp@tickets/DM-54070
-google-cloud-bigquery

From 183bae56f782469c6ea3fba9b22faeb33b7d1650 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Feb 2026 18:03:46 -0600
Subject: [PATCH 22/49] ruff

---
 python/lsst/dax/ppdb/tests/_updates.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/lsst/dax/ppdb/tests/_updates.py b/python/lsst/dax/ppdb/tests/_updates.py
index a5a62002..bc978ffe 100644
--- a/python/lsst/dax/ppdb/tests/_updates.py
+++ b/python/lsst/dax/ppdb/tests/_updates.py
@@ -136,7 +136,8 @@ def _create_test_update_records() -> UpdateRecords:
         )
     )
 
-    # Duplicate of the nDiaSources update but with earlier timestamp (should be discarded)
+    # Duplicate of the nDiaSources update but with earlier timestamp (should be
+    # discarded)
     records.append(
         ApdbUpdateNDiaSourcesRecord(
             update_time_ns=test_update_time_ns - 1000000000,  # 1 second earlier

From 61821dcff46375a869716426b955469f4e0fb857 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Mon, 23 Feb 2026 14:52:49 -0600
Subject: [PATCH 23/49] Add build tools to Dockerfile

---
 docker/Dockerfile.replication | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/docker/Dockerfile.replication b/docker/Dockerfile.replication
index 12e8b3a5..a8e218f1 100644
--- a/docker/Dockerfile.replication
+++ b/docker/Dockerfile.replication
@@ -3,11 +3,14 @@ FROM python:3.12-slim-bookworm
 ENV DEBIAN_FRONTEND=noninteractive
 
 # Update and install OS dependencies
-RUN apt-get -y update && \
-    apt-get -y upgrade && \
-    apt-get -y install --no-install-recommends git && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      build-essential \
+      python3-dev \
+      pkg-config \
+      git \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
 
 # Install required python build dependencies
 RUN pip install --upgrade --no-cache-dir pip setuptools wheel uv

From 5d1a3afddfe340727836381a359eb6e84e6fe5f2 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 24 Feb 2026 15:00:35 -0600
Subject: [PATCH 24/49] Move engine creation out of `make_database` method

---
 python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py | 3 ++-
 python/lsst/dax/ppdb/sql/_ppdb_sql.py          | 3 ++-
 python/lsst/dax/ppdb/sql/_ppdb_sql_base.py     | 3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 82d2205c..2b6beab2 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -476,7 +476,8 @@ def init_bigquery(
         sql_config = PpdbSqlBaseConfig(
             db_url=db_url, schema_name=db_schema, felis_path=felis_path, felis_schema=felis_schema
         )
-        cls.make_database(sql_config, sa_metadata, schema_version, db_drop)
+        engine = cls.make_engine(sql_config)
+        cls.make_database(engine, sql_config, sa_metadata, schema_version, db_drop)
 
         # Build config parameters.
         bq_config = PpdbBigQueryConfig(
diff --git a/python/lsst/dax/ppdb/sql/_ppdb_sql.py b/python/lsst/dax/ppdb/sql/_ppdb_sql.py
index 623cf651..320f579c 100644
--- a/python/lsst/dax/ppdb/sql/_ppdb_sql.py
+++ b/python/lsst/dax/ppdb/sql/_ppdb_sql.py
@@ -552,5 +552,6 @@ def init_database(
             isolation_level=isolation_level,
             connection_timeout=connection_timeout,
         )
-        cls.make_database(config, sa_metadata, schema_version, drop)
+        engine = cls.make_engine(config)
+        cls.make_database(engine, config, sa_metadata, schema_version, drop)
         return config
diff --git a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
index 836cab53..81f42dd7 100644
--- a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
+++ b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
@@ -171,6 +171,7 @@ def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
     @classmethod
     def make_database(
         cls,
+        engine: sqlalchemy.engine.Engine,
         config: PpdbSqlBaseConfig,
         sa_metadata: sqlalchemy.schema.MetaData,
         schema_version: VersionTuple,
@@ -189,8 +190,6 @@ def make_database(
         drop : `bool`
             If `True` then drop existing tables before creating new ones.
         """
-        engine = cls.make_engine(config)
-
         if config.schema_name is not None:
             dialect = engine.dialect
             quoted_schema = dialect.preparer(dialect).quote_schema(config.schema_name)

From 8d48f473471acfe73767529466d61ec75cef7fb8 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 24 Feb 2026 15:14:07 -0600
Subject: [PATCH 25/49] Move building of connect args into separate method

---
 python/lsst/dax/ppdb/sql/_ppdb_sql_base.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
index 81f42dd7..fe1652bf 100644
--- a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
+++ b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
@@ -137,14 +137,7 @@ def __init__(self, config: PpdbSqlBaseConfig) -> None:
         self._check_code_version()
 
     @classmethod
-    def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
-        """Make SQLALchemy engine based on configured parameters.
-
-        Parameters
-        ----------
-        config : `PpdbSqlBaseConfig`
-            Configuration object with SQL parameters.
-        """
+    def _build_connect_args(cls, config: PpdbSqlBaseConfig) -> MutableMapping[str, Any]:
         kw: MutableMapping[str, Any] = {}
         conn_args: dict[str, Any] = {}
         if not config.use_connection_pool:
@@ -159,7 +152,18 @@ def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
                 conn_args.update(timeout=config.connection_timeout)
             elif config.db_url.startswith(("postgresql", "mysql")):
                 conn_args.update(connect_timeout=config.connection_timeout)
-        kw = {"connect_args": conn_args}
+        return {"connect_args": conn_args}
+
+    @classmethod
+    def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
+        """Make SQLALchemy engine based on configured parameters.
+
+        Parameters
+        ----------
+        config : `PpdbSqlBaseConfig`
+            Configuration object with SQL parameters.
+        """
+        kw = cls._build_connect_args(config)
         engine = sqlalchemy.create_engine(config.db_url, **kw)
 
         if engine.dialect.name == "sqlite":

From cb9931bb71e4f90f4461205eb58ea1e85631a536 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 24 Feb 2026 15:21:38 -0600
Subject: [PATCH 26/49] Move listener config to separate method

---
 python/lsst/dax/ppdb/sql/_ppdb_sql_base.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
index fe1652bf..ed953310 100644
--- a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
+++ b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
@@ -154,6 +154,12 @@ def _build_connect_args(cls, config: PpdbSqlBaseConfig) -> MutableMapping[str, A
                 conn_args.update(connect_timeout=config.connection_timeout)
         return {"connect_args": conn_args}
 
+    @classmethod
+    def _config_listeners(cls, engine: sqlalchemy.engine.Engine) -> sqlalchemy.engine.Engine:
+        if engine.dialect.name == "sqlite":
+            # Need to enable foreign keys on every new connection.
+            sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
+
     @classmethod
     def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
         """Make SQLALchemy engine based on configured parameters.
@@ -165,10 +171,7 @@ def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
         """
         kw = cls._build_connect_args(config)
         engine = sqlalchemy.create_engine(config.db_url, **kw)
-
-        if engine.dialect.name == "sqlite":
-            # Need to enable foreign keys on every new connection.
-            sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
+        cls._config_listeners(engine)
 
         return engine
 

From 41442f82edec5d7bf15469288da69759cdc3ca58 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Tue, 24 Feb 2026 17:14:43 -0600
Subject: [PATCH 27/49] Add support for getting db password from Google Secrets
 Manager

This adds an option for getting the PPDB Postgres password from the
Google Secrets Manager if the `PPDB_USE_SECRETS_MANAGER` environment
variable is set to `true`.
---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 2b6beab2..02d33b81 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -21,6 +21,7 @@
 
 import datetime
 import logging
+import os
 import shutil
 from collections.abc import Collection, Iterable, Sequence
 from pathlib import Path
@@ -137,6 +138,7 @@ def __init__(self, config: PpdbBigQueryConfig):
         self.parq_batch_size = config.parq_batch_size
         self.parq_compression = config.parq_compression
         self.delete_existing_dirs = config.delete_existing_dirs
+        self.project_id = config.project_id
 
     @property
     def metadata(self) -> ApdbMetadata:
@@ -409,6 +411,43 @@ def filter_table_names(cls, original_table_names: Iterable[str]) -> Iterable[str
         # Only the metadata table is needed for the BigQuery-based PPDB.
         return ["metadata"]
 
+    @classmethod
+    def _get_secretmanager_password(cls, project_id: str, password_name: str = "ppdb-db-password") -> str:
+        from google.cloud import secretmanager
+
+        client = secretmanager.SecretManagerServiceClient()
+        name = f"projects/{project_id}/secrets/{password_name}/versions/latest"
+        response = client.access_secret_version(request={"name": name})
+        return response.payload.data.decode("UTF-8")
+
+    @classmethod
+    def _use_secret_manager(cls) -> bool:
+        return os.getenv("PPDB_USE_SECRET_MANAGER", "false").lower() == "true"
+
+    @classmethod
+    def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
+        """Make SQLALchemy engine based on configured parameters.
+
+        Parameters
+        ----------
+        config : `PpdbSqlBaseConfig`
+            Configuration object with SQL parameters.
+        """
+        kw = cls._build_connect_args(config)
+        db_url = sqlalchemy.make_url(config.db_url)
+
+        if cls._use_secret_manager():
+            _LOG.info("Using Secret Manager to retrieve database password")
+            if db_url.password is not None:
+                raise ValueError("Database URL should not include a password when using Secret Manager")
+            password = cls._get_secretmanager_password(config.project_id)
+            db_url = db_url.set(password=password)
+
+        engine = sqlalchemy.create_engine(db_url, **kw)
+        cls._config_listeners(engine)
+
+        return engine
+
     @classmethod
     def init_bigquery(
         cls,

From dfaa6560be818eb67c7907bc704d3b747ad55a8b Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 25 Feb 2026 15:11:26 -0600
Subject: [PATCH 28/49] Rearrange SQL init code

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 40 ++++++++++++++-----
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 02d33b81..a79c79de 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -39,6 +39,7 @@
     monitor,
     schema_model,
 )
+from lsst.dax.apdb.sql import ApdbMetadataSql
 from lsst.dax.apdb.timer import Timer
 
 from .._arrow import write_parquet
@@ -128,10 +129,10 @@ class PpdbBigQuery(Ppdb, PpdbSqlBase):
     """
 
     def __init__(self, config: PpdbBigQueryConfig):
-        # Initialize the SQL interface for the PPDB.
-        PpdbSqlBase.__init__(self, config.sql)
+        # Initialize the SQL interface for the PPDB
+        self._init_sql(config)
 
-        # Read parameters from config.
+        # Read parameters from config
         if config.replication_dir is None:
             raise ValueError("Directory for chunk export is not set in configuration.")
         self.replication_path = config.replication_path
@@ -151,6 +152,22 @@ def metadata(self) -> ApdbMetadata:
         """
         return self._metadata
 
+    def _init_sql(self, config: PpdbBigQueryConfig) -> None:
+        sql_config = config.sql
+        self._sa_metadata, self._schema_version = self.read_schema(
+            sql_config.felis_path, sql_config.schema_name, sql_config.felis_schema, sql_config.db_url
+        )
+
+        self._engine = self._make_engine(config)  # Includes Secrets Manager support
+        sa_metadata = sqlalchemy.MetaData(schema=sql_config.schema_name)
+
+        meta_table = sqlalchemy.schema.Table("metadata", sa_metadata, autoload_with=self._engine)
+        self._metadata = ApdbMetadataSql(self._engine, meta_table)
+
+        # Check schema amd code version compatibility.
+        self._check_schema_version(self._schema_version)
+        self._check_code_version()
+
     def _generate_manifest(
         self,
         replica_chunk: ReplicaChunk,
@@ -425,17 +442,19 @@ def _use_secret_manager(cls) -> bool:
         return os.getenv("PPDB_USE_SECRET_MANAGER", "false").lower() == "true"
 
     @classmethod
-    def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
+    def _make_engine(cls, config: PpdbBigQueryConfig) -> sqlalchemy.engine.Engine:
         """Make SQLALchemy engine based on configured parameters.
 
         Parameters
         ----------
-        config : `PpdbSqlBaseConfig`
+        config : `PpdbBigQueryConfig`
             Configuration object with SQL parameters.
         """
-        kw = cls._build_connect_args(config)
-        db_url = sqlalchemy.make_url(config.db_url)
+        sql_config = config.sql
+        db_url = sqlalchemy.make_url(sql_config.db_url)
 
+        # If using Secret Manager, retrieve the password and update the
+        # database URL.
         if cls._use_secret_manager():
             _LOG.info("Using Secret Manager to retrieve database password")
             if db_url.password is not None:
@@ -443,7 +462,9 @@ def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
             password = cls._get_secretmanager_password(config.project_id)
             db_url = db_url.set(password=password)
 
+        kw = cls._build_connect_args(sql_config)
         engine = sqlalchemy.create_engine(db_url, **kw)
+
         cls._config_listeners(engine)
 
         return engine
@@ -515,8 +536,6 @@ def init_bigquery(
         sql_config = PpdbSqlBaseConfig(
             db_url=db_url, schema_name=db_schema, felis_path=felis_path, felis_schema=felis_schema
         )
-        engine = cls.make_engine(sql_config)
-        cls.make_database(engine, sql_config, sa_metadata, schema_version, db_drop)
 
         # Build config parameters.
         bq_config = PpdbBigQueryConfig(
@@ -535,6 +554,9 @@ def init_bigquery(
         if stage_chunk_topic is not None:
             bq_config.stage_chunk_topic = stage_chunk_topic
 
+        engine = cls._make_engine(bq_config)
+        cls.make_database(engine, bq_config.sql, sa_metadata, schema_version, db_drop)
+
         # Validate the config if requested.
         if validate_config:
             _LOG.info("validating BigQuery configuration")

From bc9aeeb5da6868b3aaded81d45420f1fb549f2b9 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 25 Feb 2026 16:26:44 -0600
Subject: [PATCH 29/49] Rename the `config` module to `ppdb_config`

This follows DM naming conventions, since the module defines the class
`PpdbConfig`.
---
 python/lsst/dax/ppdb/__init__.py                   | 2 +-
 python/lsst/dax/ppdb/_factory.py                   | 2 +-
 python/lsst/dax/ppdb/bigquery/chunk_uploader.py    | 2 +-
 python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py     | 2 +-
 python/lsst/dax/ppdb/ppdb.py                       | 2 +-
 python/lsst/dax/ppdb/{config.py => ppdb_config.py} | 0
 python/lsst/dax/ppdb/tests/_ppdb.py                | 2 +-
 7 files changed, 6 insertions(+), 6 deletions(-)
 rename python/lsst/dax/ppdb/{config.py => ppdb_config.py} (100%)

diff --git a/python/lsst/dax/ppdb/__init__.py b/python/lsst/dax/ppdb/__init__.py
index d8aeb139..2f4dab94 100644
--- a/python/lsst/dax/ppdb/__init__.py
+++ b/python/lsst/dax/ppdb/__init__.py
@@ -19,7 +19,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-from .config import *
+from .ppdb_config import *
 from .ppdb import *
 from .replicator import *
 from .version import *  # Generated by sconsUtils
diff --git a/python/lsst/dax/ppdb/_factory.py b/python/lsst/dax/ppdb/_factory.py
index aee2ee52..c3774778 100644
--- a/python/lsst/dax/ppdb/_factory.py
+++ b/python/lsst/dax/ppdb/_factory.py
@@ -26,8 +26,8 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from .config import PpdbConfig
     from .ppdb import Ppdb
+    from .ppdb_config import PpdbConfig
 
 
 def config_type_for_name(type_name: str) -> type[PpdbConfig]:
diff --git a/python/lsst/dax/ppdb/bigquery/chunk_uploader.py b/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
index d23a123c..6bef4383 100644
--- a/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
+++ b/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
@@ -42,7 +42,7 @@
     ) from e
 
 
-from ..config import PpdbConfig
+from ..ppdb_config import PpdbConfig
 from .manifest import Manifest
 from .ppdb_bigquery import PpdbBigQuery, PpdbBigQueryConfig
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index a79c79de..f1cfc8fd 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -43,8 +43,8 @@
 from lsst.dax.apdb.timer import Timer
 
 from .._arrow import write_parquet
-from ..config import PpdbConfig
 from ..ppdb import Ppdb, PpdbReplicaChunk
+from ..ppdb_config import PpdbConfig
 from ..sql import PpdbSqlBase, PpdbSqlBaseConfig
 from .manifest import Manifest, TableStats
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
diff --git a/python/lsst/dax/ppdb/ppdb.py b/python/lsst/dax/ppdb/ppdb.py
index 31b6a315..e175bb7f 100644
--- a/python/lsst/dax/ppdb/ppdb.py
+++ b/python/lsst/dax/ppdb/ppdb.py
@@ -33,7 +33,7 @@
 from lsst.resources import ResourcePathExpression
 
 from ._factory import ppdb_from_config
-from .config import PpdbConfig
+from .ppdb_config import PpdbConfig
 
 
 @dataclass(frozen=True)
diff --git a/python/lsst/dax/ppdb/config.py b/python/lsst/dax/ppdb/ppdb_config.py
similarity index 100%
rename from python/lsst/dax/ppdb/config.py
rename to python/lsst/dax/ppdb/ppdb_config.py
diff --git a/python/lsst/dax/ppdb/tests/_ppdb.py b/python/lsst/dax/ppdb/tests/_ppdb.py
index 6a2e38d8..ffb50e84 100644
--- a/python/lsst/dax/ppdb/tests/_ppdb.py
+++ b/python/lsst/dax/ppdb/tests/_ppdb.py
@@ -44,8 +44,8 @@
 from lsst.dax.apdb.tests.data_factory import makeForcedSourceCatalog, makeObjectCatalog, makeSourceCatalog
 from lsst.sphgeom import Angle, Circle, Region, UnitVector3d
 
-from ..config import PpdbConfig
 from ..ppdb import Ppdb, PpdbReplicaChunk
+from ..ppdb_config import PpdbConfig
 from ..replicator import Replicator
 
 if TYPE_CHECKING:

From 331cf4a16fc2b2f5d9784862ce33fb8c66460242 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 25 Feb 2026 16:24:24 -0600
Subject: [PATCH 30/49] Move the method for getting promotable chunks to
 PpdbBigQuery

This functionality is moved into this repository, so that the cloud
functions may access it.
---
 pyproject.toml                                |  2 +-
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 35 +++++++++++++++++++
 python/lsst/dax/ppdb/config/__init__.py       |  0
 python/lsst/dax/ppdb/config/sql/__init__.py   |  0
 .../config/sql/select_promotable_chunks.sql   | 24 +++++++++++++
 5 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 python/lsst/dax/ppdb/config/__init__.py
 create mode 100644 python/lsst/dax/ppdb/config/sql/__init__.py
 create mode 100644 python/lsst/dax/ppdb/config/sql/select_promotable_chunks.sql

diff --git a/pyproject.toml b/pyproject.toml
index 930ac8f0..ac527e91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,7 @@ where = ["python"]
 zip-safe = true
 
 [tool.setuptools.package-data]
-"lsst.dax.ppdb" = ["py.typed", "bigquery/updates/sql/*.sql", "tests/config/*.yaml"]
+"lsst.dax.ppdb" = ["py.typed", "bigquery/updates/sql/*.sql", "tests/config/*.yaml", "config/sql/*.sql"]
 
 [tool.setuptools.dynamic]
 version = { attr = "lsst_versions.get_lsst_version" }
diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index f1cfc8fd..1fed0f8c 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -41,6 +41,7 @@
 )
 from lsst.dax.apdb.sql import ApdbMetadataSql
 from lsst.dax.apdb.timer import Timer
+from lsst.resources import ResourcePath
 
 from .._arrow import write_parquet
 from ..ppdb import Ppdb, PpdbReplicaChunk
@@ -674,3 +675,37 @@ def _handle_updates(
             replica_chunk.id,
             chunk_dir / "update_records.json",
         )
+
+    def get_promotable_chunks(self) -> list[int]:
+        """
+        Return the first uninterrupted sequence of staged chunks such that all
+        prior chunks are promoted.
+
+        Returns
+        -------
+        chunk_ids : `list`[`int`]
+            A list of tuples containing the `apdb_replica_chunk` values of the
+            promotable chunks.
+
+        Notes
+        -----
+        This query finds the contiguous sequence of ``staged`` chunks beginning
+        with the earliest chunk that is not yet ``promoted``, and ending just
+        before the first chunk that is not ``staged``. If no such ending
+        exists, all `staged` chunks from that point onward are returned. If no
+        chunks are `staged` after the first non-`promoted` chunk, an empty list
+        is returned.
+        """
+        table = self.get_table("PpdbReplicaChunk")
+        quoted_table_name = (
+            self._engine.dialect.identifier_preparer.quote(table.schema)
+            + "."
+            + self._engine.dialect.identifier_preparer.quote(table.name)
+        )
+        sql_resource_path = "resource://lsst.dax.ppdb.config.sql/select_promotable_chunks.sql"
+        sql_text = ResourcePath(sql_resource_path).read().decode("utf-8")
+        sql_text = sql_text.format(table_name=quoted_table_name)
+        with self._engine.connect() as conn:
+            result = conn.execute(sqlalchemy.text(sql_text))
+            chunk_ids = [row[0] for row in result]
+        return chunk_ids
diff --git a/python/lsst/dax/ppdb/config/__init__.py b/python/lsst/dax/ppdb/config/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/python/lsst/dax/ppdb/config/sql/__init__.py b/python/lsst/dax/ppdb/config/sql/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/python/lsst/dax/ppdb/config/sql/select_promotable_chunks.sql b/python/lsst/dax/ppdb/config/sql/select_promotable_chunks.sql
new file mode 100644
index 00000000..e776a1f0
--- /dev/null
+++ b/python/lsst/dax/ppdb/config/sql/select_promotable_chunks.sql
@@ -0,0 +1,24 @@
+WITH start AS (
+SELECT MIN(apdb_replica_chunk) AS s
+FROM {table_name}
+WHERE status <> 'promoted'
+    AND status <> 'skipped'
+),
+stop AS (
+SELECT MIN(p.apdb_replica_chunk) AS e
+FROM {table_name} p
+JOIN start ON TRUE
+WHERE start.s IS NOT NULL
+    AND p.apdb_replica_chunk >= start.s
+    AND p.status <> 'staged'
+    AND status <> 'skipped'
+)
+SELECT p.apdb_replica_chunk
+FROM {table_name} p
+JOIN start ON TRUE
+LEFT JOIN stop ON TRUE
+WHERE start.s IS NOT NULL
+AND p.status = 'staged'
+AND p.apdb_replica_chunk >= start.s
+AND (stop.e IS NULL OR p.apdb_replica_chunk < stop.e)
+ORDER BY p.apdb_replica_chunk;

From 3df612ea6c2a1ec90139f70fc9a80d3bd6166091 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 25 Feb 2026 16:34:02 -0600
Subject: [PATCH 31/49] Move SQL files into `config/sql` dir

---
 pyproject.toml                                                | 2 +-
 python/lsst/dax/ppdb/bigquery/updates/sql/__init__.py         | 0
 python/lsst/dax/ppdb/bigquery/updates/updates_merger.py       | 4 ++--
 .../updates => config}/sql/merge_diaforcedsource_updates.sql  | 0
 .../updates => config}/sql/merge_diaobject_updates.sql        | 0
 .../updates => config}/sql/merge_diasource_updates.sql        | 0
 6 files changed, 3 insertions(+), 3 deletions(-)
 delete mode 100644 python/lsst/dax/ppdb/bigquery/updates/sql/__init__.py
 rename python/lsst/dax/ppdb/{bigquery/updates => config}/sql/merge_diaforcedsource_updates.sql (100%)
 rename python/lsst/dax/ppdb/{bigquery/updates => config}/sql/merge_diaobject_updates.sql (100%)
 rename python/lsst/dax/ppdb/{bigquery/updates => config}/sql/merge_diasource_updates.sql (100%)

diff --git a/pyproject.toml b/pyproject.toml
index ac527e91..937bb005 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,7 @@ where = ["python"]
 zip-safe = true
 
 [tool.setuptools.package-data]
-"lsst.dax.ppdb" = ["py.typed", "bigquery/updates/sql/*.sql", "tests/config/*.yaml", "config/sql/*.sql"]
+"lsst.dax.ppdb" = ["py.typed", "tests/config/*.yaml", "config/sql/*.sql"]
 
 [tool.setuptools.dynamic]
 version = { attr = "lsst_versions.get_lsst_version" }
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/sql/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
index d1ea3f5c..d1c6fdfb 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -27,7 +27,7 @@
 
 from lsst.resources import ResourcePath
 
-_SQL_RESOURCE_PACKAGE = "lsst.dax.ppdb.bigquery.updates.sql"
+_SQL_RESOURCE_PACKAGE = "lsst.dax.ppdb.config.sql"
 
 
 class UpdatesMerger(ABC):
@@ -42,7 +42,7 @@ class UpdatesMerger(ABC):
     SQL_RESOURCE_NAME: str
     """Base name of the SQL file (without .sql extension) containing the MERGE
     statement for this merger. The SQL file must be located in the
-    `lsst.dax.ppdb.bigquery.updates.sql` package."""
+    `lsst.dax.ppdb.config.sql` package."""
 
     def __init__(self, client: bigquery.Client) -> None:
         """
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaforcedsource_updates.sql b/python/lsst/dax/ppdb/config/sql/merge_diaforcedsource_updates.sql
similarity index 100%
rename from python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaforcedsource_updates.sql
rename to python/lsst/dax/ppdb/config/sql/merge_diaforcedsource_updates.sql
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaobject_updates.sql b/python/lsst/dax/ppdb/config/sql/merge_diaobject_updates.sql
similarity index 100%
rename from python/lsst/dax/ppdb/bigquery/updates/sql/merge_diaobject_updates.sql
rename to python/lsst/dax/ppdb/config/sql/merge_diaobject_updates.sql
diff --git a/python/lsst/dax/ppdb/bigquery/updates/sql/merge_diasource_updates.sql b/python/lsst/dax/ppdb/config/sql/merge_diasource_updates.sql
similarity index 100%
rename from python/lsst/dax/ppdb/bigquery/updates/sql/merge_diasource_updates.sql
rename to python/lsst/dax/ppdb/config/sql/merge_diasource_updates.sql

From 295e1dca0dc9c6a2a92446d5c777118b83156e68 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 25 Feb 2026 17:27:16 -0600
Subject: [PATCH 32/49] Add `sql_resource` moduel for accessing SQL files as
 resources

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 10 ++--
 python/lsst/dax/ppdb/bigquery/sql_resource.py | 57 +++++++++++++++++++
 .../ppdb/bigquery/updates/updates_merger.py   | 19 ++-----
 3 files changed, 68 insertions(+), 18 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/sql_resource.py

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 1fed0f8c..cf4032c3 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -41,7 +41,6 @@
 )
 from lsst.dax.apdb.sql import ApdbMetadataSql
 from lsst.dax.apdb.timer import Timer
-from lsst.resources import ResourcePath
 
 from .._arrow import write_parquet
 from ..ppdb import Ppdb, PpdbReplicaChunk
@@ -49,6 +48,7 @@
 from ..sql import PpdbSqlBase, PpdbSqlBaseConfig
 from .manifest import Manifest, TableStats
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
+from .sql_resource import SqlResource
 
 __all__ = ["ConfigValidationError", "PpdbBigQuery", "PpdbBigQueryConfig"]
 
@@ -702,10 +702,10 @@ def get_promotable_chunks(self) -> list[int]:
             + "."
             + self._engine.dialect.identifier_preparer.quote(table.name)
         )
-        sql_resource_path = "resource://lsst.dax.ppdb.config.sql/select_promotable_chunks.sql"
-        sql_text = ResourcePath(sql_resource_path).read().decode("utf-8")
-        sql_text = sql_text.format(table_name=quoted_table_name)
+
+        sql = SqlResource("select_promotable_chunks", {"table_name": quoted_table_name}).sql
+
         with self._engine.connect() as conn:
-            result = conn.execute(sqlalchemy.text(sql_text))
+            result = conn.execute(sqlalchemy.text(sql))
             chunk_ids = [row[0] for row in result]
         return chunk_ids
diff --git a/python/lsst/dax/ppdb/bigquery/sql_resource.py b/python/lsst/dax/ppdb/bigquery/sql_resource.py
new file mode 100644
index 00000000..2548bd3c
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/sql_resource.py
@@ -0,0 +1,57 @@
+# This file is part of dax_ppdb
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+from lsst.resources import ResourcePath
+
+
+class SqlResource:
+    """Class for loading SQL query text from a resource file and optionally
+    formatting it with provided arguments.
+
+    Parameters
+    ----------
+    sql_resource_name : `str`
+        Base name of the SQL file (without .sql extension) containing the
+        query.
+        The SQL file must be located in the `lsst.dax.ppdb.config.sql` package.
+    format_args : `dict` [ `str`, `str` ], optional
+        Optional dictionary of arguments for formatting the SQL text.
+    """
+
+    SQL_RESOURCE_PACKAGE = "lsst.dax.ppdb.config.sql"
+
+    def __init__(self, sql_resource_name: str, format_args: dict[str, str] | None = None) -> None:
+        sql_resource_path = f"resource://{self.SQL_RESOURCE_PACKAGE}/{sql_resource_name}.sql"
+        sql = ResourcePath(sql_resource_path).read().decode("utf-8")
+        if format_args is not None:
+            try:
+                sql = sql.format(**format_args)
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to format SQL resource at {sql_resource_path} with arguments {format_args}"
+                ) from e
+        self._sql = sql
+
+    @property
+    def sql(self) -> str:
+        """SQL query string (`str`)."""
+        return self._sql
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
index d1c6fdfb..dfa85e02 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -25,9 +25,7 @@
 
 from google.cloud import bigquery
 
-from lsst.resources import ResourcePath
-
-_SQL_RESOURCE_PACKAGE = "lsst.dax.ppdb.config.sql"
+from ..sql_resource import SqlResource
 
 
 class UpdatesMerger(ABC):
@@ -70,16 +68,11 @@ def merge(self, *, updates_table_fqn: str, target_dataset_fqn: str) -> bigquery.
         google.cloud.bigquery.job.QueryJob
             The completed BigQuery job.
         """
-        try:
-            sql_resource_path = f"resource://{_SQL_RESOURCE_PACKAGE}/{self.SQL_RESOURCE_NAME}.sql"
-            print(f"Reading SQL from resource: {sql_resource_path}")
-            sql_text = ResourcePath(sql_resource_path).read().decode("utf-8")
-        except Exception as e:
-            raise RuntimeError(f"Failed to read SQL resource at {sql_resource_path}") from e
-
-        sql_text = sql_text.format(updates_table=updates_table_fqn, target_dataset=target_dataset_fqn)
-
-        job = self._client.query(sql_text)
+        sql = SqlResource(
+            self.SQL_RESOURCE_NAME,
+            format_args={"updates_table": updates_table_fqn, "target_dataset": target_dataset_fqn},
+        ).sql
+        job = self._client.query(sql)
         job.result()
 
         return job

From c9d375d801c84b4f8d78d72f0b83a52420d42423 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 26 Feb 2026 15:10:35 -0600
Subject: [PATCH 33/49] Add GCS URI to PpdbReplicaChunkExtended model and
 database

---
 .../lsst/dax/ppdb/bigquery/chunk_uploader.py  | 30 +++++++++++--------
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 19 ++++++++----
 .../bigquery/ppdb_replica_chunk_extended.py   | 20 +++++++++++++
 3 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/chunk_uploader.py b/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
index 6bef4383..8228ad34 100644
--- a/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
+++ b/python/lsst/dax/ppdb/bigquery/chunk_uploader.py
@@ -270,7 +270,7 @@ def _process_chunk(self, replica_chunk: PpdbReplicaChunkExtended) -> None:
                     )
 
         try:
-            # 1) Upload the files to GCS for non-empty chunks.
+            # 1) Upload the files to GCS for non-empty chunks
             if upload_file_list:
                 gcs_names = {path: posixpath.join(gcs_prefix, path.name) for path in upload_file_list}
                 try:
@@ -284,7 +284,7 @@ def _process_chunk(self, replica_chunk: PpdbReplicaChunkExtended) -> None:
                 except* UploadError as eg:
                     raise ChunkUploadError(chunk_id, f"{len(eg.exceptions)} upload(s) failed") from eg
 
-            # 2) Upload manifest, even for empty chunks.
+            # 2) Upload manifest, even for empty chunks
             try:
                 self.storage.upload_from_string(
                     posixpath.join(gcs_prefix, replica_chunk.manifest_name),
@@ -293,23 +293,29 @@ def _process_chunk(self, replica_chunk: PpdbReplicaChunkExtended) -> None:
             except UploadError as e:
                 raise ChunkUploadError(chunk_id, "Manifest upload failed") from e
 
-            # 3) Update status in the database, but not for empty chunks.
-            # They have already been marked as skipped during export.
+            # Next two steps are inapplicable to empty chunks.
             if not is_empty:
+                # 3) Update status and GCS URI in the database
+                gcs_prefix = posixpath.join(self.bucket_name, gcs_prefix)
+                updated_replica_chunk = replica_chunk.with_new_status(ChunkStatus.UPLOADED).with_new_gcs_uri(
+                    f"gs://{gcs_prefix}"
+                )
                 try:
-                    self._bq.store_chunk(replica_chunk.with_new_status(ChunkStatus.UPLOADED), True)
+                    self._bq.store_chunk(updated_replica_chunk, True)
+                    _LOG.info(
+                        "Updated replica chunk %d in database with status 'uploaded' and GCS URI: %s",
+                        chunk_id,
+                        gcs_prefix,
+                    )
                 except Exception as e:
-                    raise ChunkUploadError(
-                        chunk_id, "failed to update replica chunk status in database"
-                    ) from e
+                    raise ChunkUploadError(chunk_id, "Failed to update replica chunk in database") from e
 
-            # 4) Publish Pub/Sub staging message to trigger BigQuery load, but
-            # not for empty chunks. (Empty chunks do not need to be staged.)
-            if not is_empty:
+                # 4) Publish Pub/Sub event to trigger staging of the chunk in
+                # BigQuery
                 try:
                     self._post_to_stage_chunk_topic(self.bucket_name, gcs_prefix, chunk_id)
                 except Exception as e:
-                    raise ChunkUploadError(chunk_id, "failed to publish staging message") from e
+                    raise ChunkUploadError(chunk_id, "Failed to publish staging message") from e
 
         except ChunkUploadError as err:
             try:
diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index cf4032c3..6ddc9066 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -336,6 +336,7 @@ def get_replica_chunks_ext(
             table.columns["replica_time"],
             table.columns["status"],  # Extended column
             table.columns["directory"],  # Extended column
+            table.columns["gcs_uri"],  # Extended column
         ).order_by(table.columns["last_update_time"])
         if start_chunk_id is not None:
             query = query.where(table.columns["apdb_replica_chunk"] >= start_chunk_id)
@@ -355,6 +356,7 @@ def get_replica_chunks_ext(
                         replica_time=replica_time,
                         status=row[4],
                         directory=Path(row[5]),
+                        gcs_uri=row[6],
                     )
                 )
         return ids
@@ -382,6 +384,7 @@ def store_chunk(self, replica_chunk: PpdbReplicaChunkExtended, update: bool) ->
                 "replica_time": replica_chunk.replica_time_dt_utc,
                 "status": replica_chunk.status,
                 "directory": str(replica_chunk.directory),
+                "gcs_uri": replica_chunk.gcs_uri,
             }
             if update:
                 self.upsert(connection, table, row, "apdb_replica_chunk")
@@ -419,6 +422,12 @@ def create_replica_chunk_table(cls, table_name: str | None = None) -> schema_mod
                     datatype=felis.datamodel.DataType.string,
                     nullable=True,  # We might want to allow NULL if an error occurs when exporting.
                 ),
+                schema_model.Column(
+                    name="gcs_uri",
+                    id=f"#{table_name}.gcs_uri",
+                    datatype=felis.datamodel.DataType.string,
+                    nullable=True,
+                ),
             ]
         )
         return replica_chunk_table
@@ -684,17 +693,17 @@ def get_promotable_chunks(self) -> list[int]:
         Returns
         -------
         chunk_ids : `list`[`int`]
-            A list of tuples containing the `apdb_replica_chunk` values of the
-            promotable chunks.
+            A list of tuples containing the ``apdb_replica_chunk`` values of
+            the promotable chunks.
 
         Notes
         -----
         This query finds the contiguous sequence of ``staged`` chunks beginning
         with the earliest chunk that is not yet ``promoted``, and ending just
         before the first chunk that is not ``staged``. If no such ending
-        exists, all `staged` chunks from that point onward are returned. If no
-        chunks are `staged` after the first non-`promoted` chunk, an empty list
-        is returned.
+        exists, all ``staged`` chunks from that point onward are returned. If
+        no chunks are ``staged`` after the first non-``promoted`` chunk, an
+        empty list is returned.
         """
         table = self.get_table("PpdbReplicaChunk")
         quoted_table_name = (
diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_replica_chunk_extended.py b/python/lsst/dax/ppdb/bigquery/ppdb_replica_chunk_extended.py
index bd8d6422..2ad8f0f7 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_replica_chunk_extended.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_replica_chunk_extended.py
@@ -59,6 +59,10 @@ class PpdbReplicaChunkExtended(PpdbReplicaChunk):
     directory: Path
     """Directory where the exported replica chunk data is stored locally."""
 
+    gcs_uri: str | None = None
+    """GCS URI where the replica chunk data is stored, or `None` if not
+    uploaded yet."""
+
     @property
     def manifest_name(self) -> str:
         """Filename of the manifest file for this chunk."""
@@ -127,3 +131,19 @@ def with_new_status(self, new_status: ChunkStatus) -> PpdbReplicaChunkExtended:
             The new chunk with the updated status.
         """
         return dataclasses.replace(self, status=new_status)
+
+    def with_new_gcs_uri(self, new_gcs_uri: str) -> PpdbReplicaChunkExtended:
+        """Create a new `PpdbReplicaChunkExtended` with the same properties as
+        this one, but with a different GCS URI.
+
+        Parameters
+        ----------
+        new_gcs_uri : `str`
+            The new GCS URI to set.
+
+        Returns
+        -------
+        new_chunk : `PpdbReplicaChunkExtended`
+            The new chunk with the updated GCS URI.
+        """
+        return dataclasses.replace(self, gcs_uri=new_gcs_uri)

From da7d8239ce1c22fdec6adffcc7301f1c5a467f05 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 26 Feb 2026 15:51:13 -0600
Subject: [PATCH 34/49]  Add `UpdatesManager` for applying updates from JSON
 files in GCS

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   |  52 ++++
 .../dax/ppdb/bigquery/updates/__init__.py     |   7 +-
 .../ppdb/bigquery/updates/updates_manager.py  | 107 +++++++
 .../ppdb/bigquery/updates/updates_merger.py   |  16 +-
 .../ppdb/bigquery/updates/updates_table.py    |   8 +-
 tests/test_updates_manager.py                 | 260 ++++++++++++++++++
 6 files changed, 442 insertions(+), 8 deletions(-)
 create mode 100644 python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
 create mode 100644 tests/test_updates_manager.py

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 6ddc9066..fa15c20a 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -142,6 +142,8 @@ def __init__(self, config: PpdbBigQueryConfig):
         self.delete_existing_dirs = config.delete_existing_dirs
         self.project_id = config.project_id
 
+        self._config = config
+
     @property
     def metadata(self) -> ApdbMetadata:
         """Implement `Ppdb` interface to return APDB metadata object.
@@ -361,6 +363,56 @@ def get_replica_chunks_ext(
                 )
         return ids
 
+    def get_replica_chunks_ext_by_ids(self, chunk_ids: Sequence[int]) -> Sequence[PpdbReplicaChunkExtended]:
+        """Find replica chunks for a list of chunk IDs.
+
+        Parameters
+        ----------
+        chunk_ids : `~collections.abc.Sequence` [ `int` ]
+            Replica chunk IDs to retrieve.
+
+        Returns
+        -------
+        chunks : `~collections.abc.Sequence` [ `PpdbReplicaChunkExtended` ]
+            List of matching chunks ordered by ``last_update_time``.
+        """
+        if not chunk_ids:
+            return []
+
+        table = self.get_table("PpdbReplicaChunk")
+        query = (
+            sqlalchemy.sql.select(
+                table.columns["apdb_replica_chunk"],
+                table.columns["last_update_time"],
+                table.columns["unique_id"],
+                table.columns["replica_time"],
+                table.columns["status"],
+                table.columns["directory"],
+                table.columns["gcs_uri"],
+            )
+            .where(table.columns["apdb_replica_chunk"].in_(chunk_ids))
+            .order_by(table.columns["apdb_replica_chunk"])
+        )
+
+        chunks: list[PpdbReplicaChunkExtended] = []
+        with self._engine.connect() as conn:
+            result = conn.execution_options(stream_results=True, max_row_buffer=10000).execute(query)
+            for row in result:
+                last_update_time = self.to_astropy_tai(row[1])
+                replica_time = self.to_astropy_tai(row[3])
+                chunks.append(
+                    PpdbReplicaChunkExtended(
+                        id=row[0],
+                        last_update_time=last_update_time,
+                        unique_id=row[2],
+                        replica_time=replica_time,
+                        status=row[4],
+                        directory=Path(row[5]),
+                        gcs_uri=row[6],
+                    )
+                )
+        return chunks
+
     def store_chunk(self, replica_chunk: PpdbReplicaChunkExtended, update: bool) -> None:
         """Insert or replace single record in PpdbReplicaChunk table, including
         the status and directory of the replica chunk.
diff --git a/python/lsst/dax/ppdb/bigquery/updates/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
index 7673a1ca..ef9abea9 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/__init__.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
@@ -19,13 +19,14 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-from .update_records import UpdateRecords
 from .expanded_update_record import ExpandedUpdateRecord
-from .update_record_expander import UpdateRecordExpander
-from .updates_table import UpdatesTable
 from .updates_merger import (
     UpdatesMerger,
     DiaObjectUpdatesMerger,
     DiaSourceUpdatesMerger,
     DiaForcedSourceUpdatesMerger,
 )
+from .update_records import UpdateRecords
+from .update_record_expander import UpdateRecordExpander
+from .updates_table import UpdatesTable
+from .updates_manager import UpdatesManager
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py b/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
new file mode 100644
index 00000000..dc605f10
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
@@ -0,0 +1,107 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import posixpath
+import urllib
+from collections.abc import Sequence
+
+from google.cloud import bigquery, storage
+
+from ..ppdb_bigquery import PpdbBigQuery
+from .update_record_expander import UpdateRecordExpander
+from .update_records import DEFAULT_FILENAME, UpdateRecords
+from .updates_merger import (
+    DiaForcedSourceUpdatesMerger,
+    DiaObjectUpdatesMerger,
+    DiaSourceUpdatesMerger,
+    UpdatesMerger,
+)
+from .updates_table import UpdatesTable
+
+DEFAULT_MERGERS = (DiaObjectUpdatesMerger, DiaSourceUpdatesMerger, DiaForcedSourceUpdatesMerger)
+
+
+class UpdatesManager:
+    """Class responsible for managing the process of applying updates to the
+    PPDB database, including merging updates and inserting them into the
+    database.
+    """
+
+    def __init__(
+        self,
+        ppdb: PpdbBigQuery,
+        mergers: Sequence[type[UpdatesMerger]] = DEFAULT_MERGERS,
+        updates_table_name="updates",
+        deduplicated_updates_table_name="updates_deduplicated",
+    ) -> None:
+        self._ppdb = ppdb
+        self._mergers = mergers
+        self._deduplicated_updates_table_name = deduplicated_updates_table_name
+
+        self._bq_client = bigquery.Client()
+
+        self._updates_table = UpdatesTable(
+            self._bq_client,
+            f"{self._ppdb._config.project_id}.{self._ppdb._config.dataset_id}.{updates_table_name}",
+        )
+        self._updates_table.create()
+
+        self._gcs_client = storage.Client()
+        self._bucket = self._gcs_client.bucket(self._ppdb._config.bucket_name)
+
+    def apply_updates(self, replica_chunk_ids: Sequence[int], table_name_postfix: str | None = None) -> None:
+        replica_chunks = self._ppdb.get_replica_chunks_ext_by_ids(replica_chunk_ids)
+        for replica_chunk in replica_chunks:
+            if replica_chunk.gcs_uri is None:
+                raise ValueError(f"Replica chunk {replica_chunk.id} does not have a GCS URI")
+
+            # Parse the GCS URI to get the bucket name and object name
+            parsed_uri = urllib.parse.urlparse(replica_chunk.gcs_uri)
+            bucket_name = parsed_uri.netloc
+            object_name = posixpath.join(parsed_uri.path.lstrip("/"), DEFAULT_FILENAME)
+
+            # Get the blob from the bucket
+            bucket = self._gcs_client.bucket(bucket_name)
+            blob = bucket.blob(object_name)
+            content = blob.download_as_text()
+
+            # Expand the update records into the appropriate format for
+            # inserting into the updates table
+            update_records = UpdateRecords.from_json_string(content)
+            expanded_update_records = UpdateRecordExpander.expand_updates(update_records)
+            self._updates_table.insert(expanded_update_records)
+
+        # Deduplicate the update records to a new table
+        deduplicated_updates_table_fqn = (
+            f"{self._ppdb.project_id}.{self._ppdb._config.dataset_id}.{self._deduplicated_updates_table_name}"
+        )
+        self._updates_table.deduplicate_to(deduplicated_updates_table_fqn)
+
+        # Merge the deduplicated updates into the target tables
+        for merger in self._mergers:
+            merger_instance = merger(self._bq_client)
+            if table_name_postfix:
+                # Apply a postfix like "_next" to the target table
+                merger_instance.target_table_name += f"_{table_name_postfix}"
+            target_dataset_fqn = f"{self._ppdb._config.project_id}.{self._ppdb._config.dataset_id}"
+            merger_instance.merge(
+                updates_table_fqn=deduplicated_updates_table_fqn, target_dataset_fqn=target_dataset_fqn
+            )
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
index dfa85e02..d03ab69a 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -42,14 +42,28 @@ class UpdatesMerger(ABC):
     statement for this merger. The SQL file must be located in the
     `lsst.dax.ppdb.config.sql` package."""
 
-    def __init__(self, client: bigquery.Client) -> None:
+    def __init__(self, client: bigquery.Client, target_table_name: str = None) -> None:
         """
         Parameters
         ----------
         client
             BigQuery client.
+        target_table_name
+            Optional name of the target table. If not provided, the class-level
+            TABLE_NAME will be used.
         """
         self._client: bigquery.Client = client
+        self._target_table_name = target_table_name or self.TABLE_NAME
+
+    @property
+    def target_table_name(self) -> str:
+        """Get the name of the target table this merger applies to."""
+        return self._target_table_name
+
+    @target_table_name.setter
+    def target_table_name(self, value: str) -> None:
+        """Set the name of the target table this merger applies to."""
+        self._target_table_name = value
 
     def merge(self, *, updates_table_fqn: str, target_dataset_fqn: str) -> bigquery.QueryJob:
         """
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_table.py b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
index 5553d342..b8054bbf 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_table.py
@@ -117,14 +117,14 @@ def create(self) -> bigquery.Table:
         table = bigquery.Table(self._table_fqn, schema=schema)
         return self._client.create_table(table)
 
-    def append(self, records: Iterable[ExpandedUpdateRecord]) -> bigquery.LoadJob:
+    def insert(self, records: Iterable[ExpandedUpdateRecord]) -> bigquery.LoadJob:
         """
-        Append `ExpandedUpdateRecord` rows into the updates table.
+        Insert `ExpandedUpdateRecord` rows into the updates table.
 
         Parameters
         ----------
         records
-            Iterable of update records to append.
+            Iterable of update records to insert.
 
         Returns
         -------
@@ -155,7 +155,7 @@ def append(self, records: Iterable[ExpandedUpdateRecord]) -> bigquery.LoadJob:
             for r in records
         ]
 
-        print("Appending rows to BigQuery:", rows)  # Debug print to verify the data being loaded
+        print("Inserting rows into BigQuery:", rows)  # Debug print to verify the data being loaded
 
         job = self._client.load_table_from_json(
             rows,
diff --git a/tests/test_updates_manager.py b/tests/test_updates_manager.py
new file mode 100644
index 00000000..b536612b
--- /dev/null
+++ b/tests/test_updates_manager.py
@@ -0,0 +1,260 @@
+# This file is part of dax_ppdb.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import io
+import json
+import posixpath
+import unittest
+import uuid
+from collections.abc import Collection, Sequence
+
+import astropy
+import felis
+from google.cloud import bigquery, storage
+
+from lsst.dax.apdb import (
+    ApdbTableData,
+    ReplicaChunk,
+)
+from lsst.dax.ppdb import Ppdb
+from lsst.dax.ppdb.bigquery import PpdbBigQuery, updates
+from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
+from lsst.dax.ppdb.tests._bigquery import _PostgresMixin
+from lsst.dax.ppdb.tests._updates import _create_test_update_records
+
+
+def _generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
+    """Generate a unique bucket name for testing."""
+    test_id = uuid.uuid4().hex[:16]
+    return f"{test_prefix}-{test_id}"
+
+
+class UpdatesManagerTestCase(_PostgresMixin, unittest.TestCase):
+    """A test case for the handling of APDB record updates by PpdbBigQuery and
+    related classes including the ChunkUploader.
+    """
+
+    def setUp(self):
+        super().setUp()
+
+        # Create the PPDB config
+        self.ppdb_config = self.make_instance()
+
+        # Set up BigQuery client and test dataset
+        self.bq_client = bigquery.Client()
+        self.ppdb_config.project_id = self.bq_client.project
+        self.ppdb_config.dataset_id = f"test_updates_manager_{uuid.uuid4().hex[:8]}"
+        self.target_dataset_fqn = f"{self.ppdb_config.project_id}.{self.ppdb_config.dataset_id}"
+        self._create_test_dataset(self.bq_client, self.ppdb_config.dataset_id)
+
+        # Generate a unique bucket name for the test and create it
+        self.ppdb_config.bucket_name = _generate_test_bucket_name("ppdb-updates-manager-test")
+        storage_client = storage.Client()
+        try:
+            bucket = storage_client.bucket(self.ppdb_config.bucket_name)
+            bucket.create(location="US")
+        except Exception as e:
+            self.fail(f"Failed to create test GCS bucket: {e}")
+
+        # Create the PPDB instance
+        self.ppdb = Ppdb.from_config(self.ppdb_config)
+        assert isinstance(self.ppdb, PpdbBigQuery)
+
+    def tearDown(self):
+        # Cleanup the test dataset
+        try:
+            self.bq_client.delete_dataset(
+                self.ppdb_config.dataset_id, delete_contents=True, not_found_ok=True
+            )
+        except Exception as e:
+            print(f"Failed to delete test dataset: {e}")
+
+        # Cleanup the test GCS bucket
+        storage_client = storage.Client()
+        try:
+            bucket = storage_client.bucket(self.ppdb_config.bucket_name)
+            blobs = list(bucket.list_blobs())
+            for blob in blobs:
+                blob.delete()
+            bucket.delete()
+        except Exception as e:
+            print(f"Failed to delete test GCS bucket: {e}")
+
+        super().tearDown()
+
+    def _json_rows_to_buf(self, rows):
+        buf = io.StringIO()
+        for row in rows:
+            buf.write(json.dumps(row) + "\n")
+        buf.seek(0)
+        return buf
+
+    def _create_test_dataset(self, client: bigquery.Client, dataset_id: str) -> None:
+        dataset = bigquery.Dataset(f"{client.project}.{dataset_id}")
+        client.create_dataset(dataset, exists_ok=False)
+
+        # Create DiaObject table
+        schema = [
+            bigquery.SchemaField("diaObjectId", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("validityEndMjdTai", "FLOAT", mode="NULLABLE"),
+            bigquery.SchemaField("nDiaSources", "INTEGER", mode="NULLABLE"),
+        ]
+        table_fqn = f"{self.target_dataset_fqn}.DiaObject"
+        table = bigquery.Table(table_fqn, schema=schema)
+        client.create_table(table)
+        rows = [
+            {"diaObjectId": 200001, "validityEndMjdTai": None, "nDiaSources": 3},
+            {"diaObjectId": 200002, "validityEndMjdTai": None, "nDiaSources": 7},
+            {"diaObjectId": 200003, "validityEndMjdTai": 59000.0, "nDiaSources": 2},
+        ]
+        buf = self._json_rows_to_buf(rows)
+        job = client.load_table_from_file(
+            buf,
+            table_fqn,
+            job_config=bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON),
+        )
+        job.result()
+
+        # Create test DiaSource table
+        schema = [
+            bigquery.SchemaField("diaSourceId", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("diaObjectId", "INTEGER", mode="NULLABLE"),
+            bigquery.SchemaField("ssObjectId", "INTEGER", mode="NULLABLE"),
+            bigquery.SchemaField("ssObjectReassocTimeMjdTai", "FLOAT", mode="NULLABLE"),
+            bigquery.SchemaField("timeWithdrawnMjdTai", "FLOAT", mode="NULLABLE"),
+        ]
+        table_fqn = f"{self.target_dataset_fqn}.DiaSource"
+        table = bigquery.Table(table_fqn, schema=schema)
+        self.bq_client.create_table(table)
+        rows = [
+            {
+                "diaSourceId": 100001,
+                "diaObjectId": 200001,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+            {
+                "diaSourceId": 100002,
+                "diaObjectId": 200002,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+            {
+                "diaSourceId": 100003,
+                "diaObjectId": 200003,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+            {
+                "diaSourceId": 100004,
+                "diaObjectId": 200004,
+                "ssObjectId": None,
+                "ssObjectReassocTimeMjdTai": None,
+                "timeWithdrawnMjdTai": None,
+            },
+        ]
+        job = client.load_table_from_file(
+            self._json_rows_to_buf(rows),
+            table_fqn,
+            job_config=bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON),
+        )
+        job.result()
+
+        # Create test DiaForcedSource table
+        schema = [
+            bigquery.SchemaField("diaObjectId", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("visit", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("detector", "INTEGER", mode="REQUIRED"),
+            bigquery.SchemaField("timeWithdrawnMjdTai", "FLOAT", mode="NULLABLE"),
+        ]
+        table_fqn = f"{self.target_dataset_fqn}.DiaForcedSource"
+        table = bigquery.Table(table_fqn, schema=schema)
+        self.bq_client.create_table(table)
+        rows = [
+            {"diaObjectId": 200001, "visit": 12345, "detector": 42, "timeWithdrawnMjdTai": None},
+            {"diaObjectId": 200001, "visit": 12346, "detector": 42, "timeWithdrawnMjdTai": None},
+        ]
+        job = self.bq_client.load_table_from_file(
+            self._json_rows_to_buf(rows),
+            table_fqn,
+            job_config=bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON),
+        )
+        job.result()
+
+    def test_apply_updates(self):
+        """Test that the update records are correctly uploaded to Google Cloud
+        Storage after replication.
+        """
+        # Patch the ChunkUploader to print the message that would be published
+        # to the Pub/Sub topic instead of publishing, because there is no
+        # support for that service in the test environment.
+        dataset_id = self.ppdb_config.dataset_id
+
+        class DummyChunkUploader(ChunkUploader):
+            def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
+                message = {
+                    "dataset": dataset_id,
+                    "chunk_id": str(chunk_id),
+                    "folder": f"gs://{posixpath.join(bucket_name, chunk_prefix)}",
+                }
+                print(f"Dummy publish to Pub/Sub topic: {message}")
+
+        class DummyApdbTableData(ApdbTableData):
+            def column_names(self) -> Sequence[str]:
+                return []
+
+            def column_defs(self) -> Sequence[tuple[str, felis.datamodel.DataType]]:
+                return []
+
+            def rows(self) -> Collection[tuple]:
+                return []
+
+        # Create and store the test update records
+        update_records = _create_test_update_records()
+        self.ppdb.store(
+            ReplicaChunk(
+                id=update_records.replica_chunk_id,
+                last_update_time=astropy.time.Time("2021-01-01T00:01:00", format="isot", scale="tai"),
+                unique_id=uuid.uuid4(),
+            ),
+            objects=DummyApdbTableData(),
+            sources=DummyApdbTableData(),
+            forced_sources=DummyApdbTableData(),
+            update_records=update_records.records,
+            update=True,
+        )
+
+        # Configure and run the uploader
+        uploader = DummyChunkUploader(
+            self.ppdb_config,
+            wait_interval=0,
+            exit_on_empty=True,
+            exit_on_error=True,
+        )
+        print(f"Uploader will copy files to {uploader.bucket_name}/{uploader.prefix}")
+        uploader.run()
+
+        # Apply the updates to the target tables
+        updates_manager = updates.UpdatesManager(self.ppdb)
+        updates_manager.apply_updates([update_records.replica_chunk_id])

From 3264f36f9c8ce6e148ed7b57eb90255149b48c10 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 27 Feb 2026 17:36:26 -0600
Subject: [PATCH 35/49] Add .scratch to .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index a83e3ca4..1487de3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,6 @@ pytest_session.txt
 
 # VS Code
 .vscode
+
+# Scratch directory
+.scratch

From 73869d53ee2fa92c21599cc0016dd49912716076 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 27 Feb 2026 17:36:54 -0600
Subject: [PATCH 36/49] Cleanup some test classes (WIP) and other minor changes

---
 pyproject.toml                                |  2 +-
 python/lsst/dax/ppdb/bigquery/__init__.py     |  1 +
 python/lsst/dax/ppdb/bigquery/sql_resource.py |  5 +-
 python/lsst/dax/ppdb/config/__init__.py       |  0
 .../schemas/test_apdb_schema.yaml}            |  0
 python/lsst/dax/ppdb/config/sql/__init__.py   |  0
 python/lsst/dax/ppdb/tests/_bigquery.py       | 51 +++++++++++++--
 python/lsst/dax/ppdb/tests/_ppdb.py           |  2 +-
 tests/test_ppdb_bigquery.py                   |  6 +-
 tests/test_update_records.py                  | 61 +++++++----------
 tests/test_updates_manager.py                 | 65 +++++++++----------
 tests/test_updates_merger.py                  |  6 +-
 tests/test_updates_table.py                   | 20 +++---
 13 files changed, 121 insertions(+), 98 deletions(-)
 delete mode 100644 python/lsst/dax/ppdb/config/__init__.py
 rename python/lsst/dax/ppdb/{tests/config/schema.yaml => config/schemas/test_apdb_schema.yaml} (100%)
 delete mode 100644 python/lsst/dax/ppdb/config/sql/__init__.py

diff --git a/pyproject.toml b/pyproject.toml
index 937bb005..24e1e41a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,7 @@ where = ["python"]
 zip-safe = true
 
 [tool.setuptools.package-data]
-"lsst.dax.ppdb" = ["py.typed", "tests/config/*.yaml", "config/sql/*.sql"]
+"lsst.dax.ppdb" = ["py.typed", "config/schemas/*.yaml", "config/sql/*.sql"]
 
 [tool.setuptools.dynamic]
 version = { attr = "lsst_versions.get_lsst_version" }
diff --git a/python/lsst/dax/ppdb/bigquery/__init__.py b/python/lsst/dax/ppdb/bigquery/__init__.py
index 19d8c17d..e7b3071b 100644
--- a/python/lsst/dax/ppdb/bigquery/__init__.py
+++ b/python/lsst/dax/ppdb/bigquery/__init__.py
@@ -20,5 +20,6 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 from .manifest import Manifest
+from .chunk_uploader import ChunkUploader
 from .ppdb_bigquery import PpdbBigQuery, PpdbBigQueryConfig
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
diff --git a/python/lsst/dax/ppdb/bigquery/sql_resource.py b/python/lsst/dax/ppdb/bigquery/sql_resource.py
index 2548bd3c..99d4f89d 100644
--- a/python/lsst/dax/ppdb/bigquery/sql_resource.py
+++ b/python/lsst/dax/ppdb/bigquery/sql_resource.py
@@ -37,10 +37,9 @@ class SqlResource:
         Optional dictionary of arguments for formatting the SQL text.
     """
 
-    SQL_RESOURCE_PACKAGE = "lsst.dax.ppdb.config.sql"
-
     def __init__(self, sql_resource_name: str, format_args: dict[str, str] | None = None) -> None:
-        sql_resource_path = f"resource://{self.SQL_RESOURCE_PACKAGE}/{sql_resource_name}.sql"
+        # FIXME: Move the config dir into a resources dir (similar to obs_lsst)
+        sql_resource_path = f"resource://lsst.dax.ppdb/config/sql/{sql_resource_name}.sql"
         sql = ResourcePath(sql_resource_path).read().decode("utf-8")
         if format_args is not None:
             try:
diff --git a/python/lsst/dax/ppdb/config/__init__.py b/python/lsst/dax/ppdb/config/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/python/lsst/dax/ppdb/tests/config/schema.yaml b/python/lsst/dax/ppdb/config/schemas/test_apdb_schema.yaml
similarity index 100%
rename from python/lsst/dax/ppdb/tests/config/schema.yaml
rename to python/lsst/dax/ppdb/config/schemas/test_apdb_schema.yaml
diff --git a/python/lsst/dax/ppdb/config/sql/__init__.py b/python/lsst/dax/ppdb/config/sql/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/python/lsst/dax/ppdb/tests/_bigquery.py b/python/lsst/dax/ppdb/tests/_bigquery.py
index 1861396e..551c1e2e 100644
--- a/python/lsst/dax/ppdb/tests/_bigquery.py
+++ b/python/lsst/dax/ppdb/tests/_bigquery.py
@@ -20,23 +20,27 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import gc
+import posixpath
 import shutil
 import tempfile
+import uuid
 from typing import Any
 
+from google.cloud import storage
+
 from lsst.dax.apdb import (
     ApdbConfig,
 )
 from lsst.dax.apdb.sql import ApdbSql
 from lsst.dax.ppdb import PpdbConfig
-from lsst.dax.ppdb.bigquery import PpdbBigQuery
+from lsst.dax.ppdb.bigquery import ChunkUploader, PpdbBigQuery
+from lsst.dax.ppdb.tests._ppdb import TEST_SCHEMA_RESOURCE_PATH
 
 try:
     import testing.postgresql
 except ImportError:
     testing = None
 
-from lsst.dax.ppdb.tests import TEST_SCHEMA_RESOURCE_PATH
 
 TEST_CONFIG = {
     "db_drop": True,
@@ -49,7 +53,42 @@
 }
 
 
-class _SqliteMixin:
+def generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
+    """Generate a unique bucket name for testing."""
+    test_id = uuid.uuid4().hex[:16]
+    return f"{test_prefix}-{test_id}"
+
+
+def delete_test_bucket(bucket_or_bucket_name: str | storage.Bucket) -> None:
+    storage_client = storage.Client()
+    try:
+        if isinstance(bucket_or_bucket_name, str):
+            bucket = storage_client.bucket(bucket_or_bucket_name)
+        else:
+            bucket = bucket_or_bucket_name
+        blobs = list(bucket.list_blobs())
+        for blob in blobs:
+            blob.delete()
+        bucket.delete()
+    except Exception as e:
+        print(f"Failed to delete test GCS bucket: {e}")
+
+
+class ChunkUploaderWithoutPubSub(ChunkUploader):
+    """A dummy implementation of the ChunkUploader that does not actually
+    post messages to Pub/Sub.
+    """
+
+    def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
+        message = {
+            "dataset": None,
+            "chunk_id": str(chunk_id),
+            "folder": f"gs://{posixpath.join(bucket_name, chunk_prefix)}",
+        }
+        print(f"Dummy publish to Pub/Sub topic: {message}")
+
+
+class SqliteMixin:
     """Mixin class to provide Sqlite-specific setup/teardown and instance
     creation.
     """
@@ -87,7 +126,7 @@ def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
         return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
 
 
-class _PostgresMixin:
+class PostgresMixin:
     """Mixin class to provide Postgres-specific setup/teardown and instance
     creation.
     """
@@ -116,10 +155,10 @@ def tearDown(self) -> None:
         self.server = self.postgresql()
         shutil.rmtree(self.tempdir, ignore_errors=True)
 
-    def make_instance(self, **kwargs: Any) -> PpdbConfig:
+    def make_instance(self, config_dict: dict[str, Any] = TEST_CONFIG, **kwargs: Any) -> PpdbConfig:
         """Make config class instance used in all tests."""
         kw = {
-            **TEST_CONFIG,
+            **config_dict,
             "db_url": self.server.url(),
             "db_schema": "ppdb_test",
             "felis_path": TEST_SCHEMA_RESOURCE_PATH,
diff --git a/python/lsst/dax/ppdb/tests/_ppdb.py b/python/lsst/dax/ppdb/tests/_ppdb.py
index ffb50e84..245639ea 100644
--- a/python/lsst/dax/ppdb/tests/_ppdb.py
+++ b/python/lsst/dax/ppdb/tests/_ppdb.py
@@ -60,7 +60,7 @@ class TestCaseMixin:
         """Do-nothing definition of mixin base class for regular execution."""
 
 
-TEST_SCHEMA_RESOURCE_PATH = "resource://lsst.dax.ppdb.tests.config/schema.yaml"
+TEST_SCHEMA_RESOURCE_PATH = "resource://lsst.dax.ppdb/config/schemas/test_apdb_schema.yaml"
 
 
 def _make_region(xyz: tuple[float, float, float] = (1.0, 1.0, -1.0)) -> Region:
diff --git a/tests/test_ppdb_bigquery.py b/tests/test_ppdb_bigquery.py
index b23381e5..43a870d9 100644
--- a/tests/test_ppdb_bigquery.py
+++ b/tests/test_ppdb_bigquery.py
@@ -22,7 +22,7 @@
 import unittest
 
 from lsst.dax.ppdb.tests import PpdbTest
-from lsst.dax.ppdb.tests._bigquery import _PostgresMixin, _SqliteMixin
+from lsst.dax.ppdb.tests._bigquery import PostgresMixin, SqliteMixin
 
 try:
     import testing.postgresql
@@ -30,10 +30,10 @@
     testing = None
 
 
-class SqliteTestCase(_SqliteMixin, PpdbTest, unittest.TestCase):
+class SqliteTestCase(SqliteMixin, PpdbTest, unittest.TestCase):
     """A test case for the PpdbBigQuery class using a SQLite backend."""
 
 
 @unittest.skipUnless(testing is not None, "testing.postgresql module not found")
-class PostgresTestCase(_PostgresMixin, PpdbTest, unittest.TestCase):
+class PostgresTestCase(PostgresMixin, PpdbTest, unittest.TestCase):
     """A test case for the PpdbBigQuery class using a Postgres backend."""
diff --git a/tests/test_update_records.py b/tests/test_update_records.py
index e523544a..c6a1dd8e 100644
--- a/tests/test_update_records.py
+++ b/tests/test_update_records.py
@@ -20,11 +20,10 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import json
-import posixpath
 import unittest
-import uuid
 
 import pytest
+from google.cloud import storage
 
 try:
     from lsst.dax.ppdb.bigquery import updates
@@ -41,17 +40,16 @@
 from lsst.dax.ppdb.bigquery import PpdbBigQuery
 from lsst.dax.ppdb.replicator import Replicator
 from lsst.dax.ppdb.tests import ApdbMixin
-from lsst.dax.ppdb.tests._bigquery import _PostgresMixin
-
-
-def _generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
-    """Generate a unique bucket name for testing."""
-    test_id = uuid.uuid4().hex[:16]
-    return f"{test_prefix}-{test_id}"
+from lsst.dax.ppdb.tests._bigquery import (
+    ChunkUploaderWithoutPubSub,
+    PostgresMixin,
+    delete_test_bucket,
+    generate_test_bucket_name,
+)
 
 
 @unittest.skipIf(updates is None, "Google Cloud dependencies not available")
-class UpdateRecordsTestCase(_PostgresMixin, ApdbMixin, unittest.TestCase):
+class UpdateRecordsTestCase(PostgresMixin, ApdbMixin, unittest.TestCase):
     """A test case for the handling of APDB record updates by PpdbBigQuery and
     related classes including the ChunkUploader.
     """
@@ -259,6 +257,8 @@ def test_json_serialization(self) -> None:
             "Unexpected dec in deserialized ApdbWithdrawDiaForcedSourceRecord, should not be 0.0",
         )
 
+    # FIXME: This should be in a separate test case and probably a separate
+    # module as well.
     @pytest.mark.skipif(
         pytest.importorskip("lsst.dax.ppdbx.gcp", reason="dax_ppdbx_gcp is not installed") is None,
         reason="",
@@ -267,36 +267,21 @@ def test_chunk_uploader(self) -> None:
         """Test that the update records are correctly uploaded to Google Cloud
         Storage after replication.
         """
-        from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
-        from lsst.dax.ppdbx.gcp.gcs import StorageClient
-
         # Change the configuration to use a unique test bucket name to avoid
         # conflicts
-        ppdb_config_copy = self.ppdb_config.model_copy()
-        ppdb_config_copy.bucket_name = _generate_test_bucket_name("ppdb-test-gcs-upload")
-
-        # Patch the ChunkUploader to print the message that would be published
-        # to the Pub/Sub topic instead of publishing, because there is no
-        # support for that service in the test environment.
-        class DummyChunkUploader(ChunkUploader):
-            def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
-                message = {
-                    "dataset": self.dataset_id,
-                    "chunk_id": str(chunk_id),
-                    "folder": f"gs://{posixpath.join(bucket_name, chunk_prefix)}",
-                }
-                print(f"Dummy publish to Pub/Sub topic: {message}")
+        self.ppdb_config.bucket_name = generate_test_bucket_name("ppdb-test-gcs-upload")
 
         # Create the test GCS bucket
-        storage_client = StorageClient(ppdb_config_copy.bucket_name)
+        storage_client = storage.Client()
         try:
-            storage_client.create_bucket()
+            bucket = storage_client.bucket(self.ppdb_config.bucket_name)
+            bucket.create(location="US")
         except Exception as e:
             self.fail(f"Failed to create test GCS bucket: {e}")
 
         # Configure and run the uploader
-        uploader = DummyChunkUploader(
-            ppdb_config_copy,
+        uploader = ChunkUploaderWithoutPubSub(
+            self.ppdb_config,
             wait_interval=0,
             exit_on_empty=True,
             exit_on_error=True,
@@ -304,15 +289,18 @@ def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_
         print(f"Uploader will copy files to {uploader.bucket_name}/{uploader.prefix}/")
         uploader.run()
 
-        # Retrieve the update records file
-        update_records_files = storage_client.list_files("**/update_records.json")
+        # Retrieve the update records file[]
+        blobs = list(bucket.list_blobs(match_glob="**/update_records.json"))
+        update_records_files = [b.name for b in blobs]
         self.assertEqual(
             len(update_records_files),
             1,
             f"Expected exactly one update_records.json file in GCS, found "
             f"{len(update_records_files)}: {update_records_files}",
         )
-        update_records_str = storage_client.read_as_string(update_records_files[0])
+
+        # Download the contents of the update records file as a string
+        update_records_str = blobs[0].download_as_text()
 
         # Print the contents of the update records file for debugging
         update_records_json = json.loads(update_records_str)
@@ -339,8 +327,9 @@ def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_
             f"Expected 3 update records in the file from GCS, found {len(update_records.records)}",
         )
 
+        # FIXME: This should be in a tearDown() method.
         # Delete the test GCS bucket
         try:
-            storage_client.delete_bucket(force=True)
+            delete_test_bucket(bucket)
         except Exception as e:
-            self.fail(f"Failed to delete test GCS bucket: {e}")
+            raise RuntimeError(f"Failed to delete test GCS bucket: {e}")
diff --git a/tests/test_updates_manager.py b/tests/test_updates_manager.py
index b536612b..74d36d8b 100644
--- a/tests/test_updates_manager.py
+++ b/tests/test_updates_manager.py
@@ -21,7 +21,6 @@
 
 import io
 import json
-import posixpath
 import unittest
 import uuid
 from collections.abc import Collection, Sequence
@@ -36,18 +35,15 @@
 )
 from lsst.dax.ppdb import Ppdb
 from lsst.dax.ppdb.bigquery import PpdbBigQuery, updates
-from lsst.dax.ppdb.bigquery.chunk_uploader import ChunkUploader
-from lsst.dax.ppdb.tests._bigquery import _PostgresMixin
+from lsst.dax.ppdb.tests._bigquery import (
+    ChunkUploaderWithoutPubSub,
+    PostgresMixin,
+    generate_test_bucket_name,
+)
 from lsst.dax.ppdb.tests._updates import _create_test_update_records
 
 
-def _generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
-    """Generate a unique bucket name for testing."""
-    test_id = uuid.uuid4().hex[:16]
-    return f"{test_prefix}-{test_id}"
-
-
-class UpdatesManagerTestCase(_PostgresMixin, unittest.TestCase):
+class UpdatesManagerTestCase(PostgresMixin, unittest.TestCase):
     """A test case for the handling of APDB record updates by PpdbBigQuery and
     related classes including the ChunkUploader.
     """
@@ -55,18 +51,30 @@ class UpdatesManagerTestCase(_PostgresMixin, unittest.TestCase):
     def setUp(self):
         super().setUp()
 
-        # Create the PPDB config
-        self.ppdb_config = self.make_instance()
-
         # Set up BigQuery client and test dataset
         self.bq_client = bigquery.Client()
-        self.ppdb_config.project_id = self.bq_client.project
-        self.ppdb_config.dataset_id = f"test_updates_manager_{uuid.uuid4().hex[:8]}"
-        self.target_dataset_fqn = f"{self.ppdb_config.project_id}.{self.ppdb_config.dataset_id}"
-        self._create_test_dataset(self.bq_client, self.ppdb_config.dataset_id)
 
-        # Generate a unique bucket name for the test and create it
-        self.ppdb_config.bucket_name = _generate_test_bucket_name("ppdb-updates-manager-test")
+        bucket_name = generate_test_bucket_name("ppdb-updates-manager-test")
+        dataset_id = f"test_updates_manager_{uuid.uuid4().hex[:8]}"
+        project_id = self.bq_client.project
+        config = {
+            "db_drop": True,
+            "validate_config": False,
+            "delete_existing_dirs": True,
+            "bucket_name": bucket_name,
+            "object_prefix": "data/test",
+            "dataset_id": dataset_id,
+            "project_id": project_id,
+        }
+
+        # Setup the Postgres database and create the config instance
+        self.ppdb_config = self.make_instance(config)
+
+        # Create the test dataset and tables in BigQuery
+        self.target_dataset_fqn = f"{project_id}.{dataset_id}"
+        self._create_test_dataset(self.bq_client, dataset_id)
+
+        # Create the test GCS bucket
         storage_client = storage.Client()
         try:
             bucket = storage_client.bucket(self.ppdb_config.bucket_name)
@@ -79,7 +87,7 @@ def setUp(self):
         assert isinstance(self.ppdb, PpdbBigQuery)
 
     def tearDown(self):
-        # Cleanup the test dataset
+        # Delete the test dataset
         try:
             self.bq_client.delete_dataset(
                 self.ppdb_config.dataset_id, delete_contents=True, not_found_ok=True
@@ -87,7 +95,7 @@ def tearDown(self):
         except Exception as e:
             print(f"Failed to delete test dataset: {e}")
 
-        # Cleanup the test GCS bucket
+        # Delete the test GCS bucket
         storage_client = storage.Client()
         try:
             bucket = storage_client.bucket(self.ppdb_config.bucket_name)
@@ -206,19 +214,6 @@ def test_apply_updates(self):
         """Test that the update records are correctly uploaded to Google Cloud
         Storage after replication.
         """
-        # Patch the ChunkUploader to print the message that would be published
-        # to the Pub/Sub topic instead of publishing, because there is no
-        # support for that service in the test environment.
-        dataset_id = self.ppdb_config.dataset_id
-
-        class DummyChunkUploader(ChunkUploader):
-            def _post_to_stage_chunk_topic(self, bucket_name: str, chunk_prefix: str, chunk_id: int) -> None:
-                message = {
-                    "dataset": dataset_id,
-                    "chunk_id": str(chunk_id),
-                    "folder": f"gs://{posixpath.join(bucket_name, chunk_prefix)}",
-                }
-                print(f"Dummy publish to Pub/Sub topic: {message}")
 
         class DummyApdbTableData(ApdbTableData):
             def column_names(self) -> Sequence[str]:
@@ -246,7 +241,7 @@ def rows(self) -> Collection[tuple]:
         )
 
         # Configure and run the uploader
-        uploader = DummyChunkUploader(
+        uploader = ChunkUploaderWithoutPubSub(
             self.ppdb_config,
             wait_interval=0,
             exit_on_empty=True,
diff --git a/tests/test_updates_merger.py b/tests/test_updates_merger.py
index ed4dffa7..5fdca1bb 100644
--- a/tests/test_updates_merger.py
+++ b/tests/test_updates_merger.py
@@ -98,7 +98,7 @@ def test_merge_diaobject(self):
         updates_table.create()
         update_records = _create_test_update_records()
         expanded = UpdateRecordExpander.expand_updates(update_records)
-        updates_table.append(expanded)
+        updates_table.insert(expanded)
         dedup_fqn = f"{self.updates_table_fqn}_dedup"
         updates_table.deduplicate_to(dedup_fqn)
         table_fqn = f"{self.target_dataset_fqn}.DiaObject"
@@ -168,7 +168,7 @@ def test_merge_diasource(self):
         updates_table.create()
         update_records = _create_test_update_records()
         expanded = UpdateRecordExpander.expand_updates(update_records)
-        updates_table.append(expanded)
+        updates_table.insert(expanded)
         dedup_fqn = f"{self.updates_table_fqn}_dedup"
         updates_table.deduplicate_to(dedup_fqn)
 
@@ -211,7 +211,7 @@ def test_merge_diaforcedsource(self):
         updates_table.create()
         update_records = _create_test_update_records()
         expanded = UpdateRecordExpander.expand_updates(update_records)
-        updates_table.append(expanded)
+        updates_table.insert(expanded)
         dedup_fqn = f"{self.updates_table_fqn}_dedup"
         updates_table.deduplicate_to(dedup_fqn)
 
diff --git a/tests/test_updates_table.py b/tests/test_updates_table.py
index aeb196b4..bddbbd7b 100644
--- a/tests/test_updates_table.py
+++ b/tests/test_updates_table.py
@@ -107,8 +107,8 @@ def test_create_table_already_exists(self) -> None:
         # Check that it's a conflict-type error
         self.assertIn("already exists", str(cm.exception).lower())
 
-    def test_append_records(self) -> None:
-        """Test appending ExpandedUpdateRecord objects to the table."""
+    def test_insert_records(self) -> None:
+        """Test insertion of expanded records into the table."""
         # Create the table first
         self.updates_table.create()
 
@@ -116,8 +116,8 @@ def test_append_records(self) -> None:
         update_records = _create_test_update_records()
         expanded_records = UpdateRecordExpander.expand_updates(update_records)
 
-        # Append the records
-        job = self.updates_table.append(expanded_records)
+        # Insert the records
+        job = self.updates_table.insert(expanded_records)
 
         # Verify the job completed successfully
         self.assertIsNone(job.errors)
@@ -148,13 +148,13 @@ def test_append_records(self) -> None:
         # self.assertEqual(row.field_name, "timeWithdrawnMjdTai")
         # self.assertEqual(row.replica_chunk_id, self.replica_chunk_id)
 
-    def test_append_empty_records(self) -> None:
-        """Test appending empty list of records."""
+    def test_insert_empty_records(self) -> None:
+        """Test insertion of empty record list."""
         # Create the table first
         self.updates_table.create()
 
-        # Append empty list
-        job = self.updates_table.append([])
+        # Insert empty list
+        job = self.updates_table.insert([])
 
         # Verify the job completed successfully
         self.assertIsNone(job.errors)
@@ -174,8 +174,8 @@ def test_deduplicate_records(self) -> None:
         update_records = _create_test_update_records()
         expanded_records = UpdateRecordExpander.expand_updates(update_records)
 
-        # Append all records (including duplicates)
-        self.updates_table.append(expanded_records)
+        # Insert all records (including duplicates)
+        self.updates_table.insert(expanded_records)
 
         # Count original records
         query = f"SELECT COUNT(*) as count FROM `{self.table_fqn}`"

From b34a1383e1bfa18de38a018a7c54c4bce8e8cc32 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 27 Feb 2026 17:37:18 -0600
Subject: [PATCH 37/49] Add BigQuery classes from dax_ppdbx_gcp

---
 python/lsst/dax/ppdb/bigquery/query_runner.py | 161 +++++++++++
 .../ppdb/bigquery/replica_chunk_promoter.py   | 258 ++++++++++++++++++
 2 files changed, 419 insertions(+)
 create mode 100644 python/lsst/dax/ppdb/bigquery/query_runner.py
 create mode 100644 python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py

diff --git a/python/lsst/dax/ppdb/bigquery/query_runner.py b/python/lsst/dax/ppdb/bigquery/query_runner.py
new file mode 100644
index 00000000..e4e57166
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/query_runner.py
@@ -0,0 +1,161 @@
+# This file is part of dax_ppdbx_gcp
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+import os
+from typing import TypeAlias
+
+__all__ = [
+    "QueryRunner",
+]
+
+import logging
+
+from google.cloud import bigquery
+
+AnyBigQueryJob: TypeAlias = (
+    bigquery.job.QueryJob
+    | bigquery.job.LoadJob
+    | bigquery.job.CopyJob
+    | bigquery.job.ExtractJob
+    | bigquery.job.UnknownJob
+)
+
+
+class QueryRunner:
+    """Class to run BigQuery queries with logging.
+
+    Parameters
+    ----------
+    project_id : `str`
+        Google Cloud project ID.
+    dataset_id : `str`
+        BigQuery dataset ID.
+    """
+
+    def __init__(self, project_id: str, dataset_id: str):
+        self._project_id = project_id
+        self._dataset_id = dataset_id
+        self._bq_client = bigquery.Client(project=project_id)
+        self._dataset = self._bq_client.get_dataset(f"{project_id}.{dataset_id}")
+        self._location = self._dataset.location
+
+    @classmethod
+    def from_env(cls) -> QueryRunner:
+        """Create a QueryRunner instance using environment variables.
+
+        Returns
+        -------
+        query_runner: `QueryRunner`
+            An instance of QueryRunner initialized with project and dataset IDs
+            from environment variables.
+        """
+        project_id = os.environ.get("PROJECT_ID")
+        if project_id is None:
+            raise OSError("Environment variable 'PROJECT_ID' is not set")
+
+        dataset_id = os.environ.get("DATASET_ID")
+        if dataset_id is None:
+            raise OSError("Environment variable 'DATASET_ID' is not set")
+
+        return cls(project_id, dataset_id)
+
+    @property
+    def project_id(self) -> str:
+        """Google Cloud project ID (`str`, read-only)."""
+        return self._project_id
+
+    @property
+    def dataset(self) -> bigquery.Dataset:
+        """Dataset reference (`bigquery.Dataset`, read-only)."""
+        return self._dataset
+
+    @property
+    def dataset_id(self) -> str:
+        """Dataset ID (`str`, read-only)."""
+        return self._dataset_id
+
+    @property
+    def location(self) -> str:
+        """Dataset location, typically the region where it is hosted (`str`,
+        read-only).
+        """
+        return self._location
+
+    @classmethod
+    def log_job(cls, job: AnyBigQueryJob, label: str, level: int = logging.DEBUG) -> None:
+        """Log details of a BigQuery job.
+
+        Parameters
+        ----------
+        job : `bigquery.job.QueryJob`
+            The BigQuery job to log.
+        label : `str`
+            A label for the job, typically indicating the type of operation
+            (e.g., "insert", "delete", "copy").
+        level : `int`, optional
+            The logging level to use for the log message. Defaults to
+            `logging.DEBUG`.
+        """
+        logging.log(
+            level,
+            "BQ %s: job_id=%s location=%s state=%s bytes_processed=%s bytes_billed=%s slot_millis=%s "
+            "dml_rows=%s reference_tables=%s",
+            label,
+            job.job_id,
+            job.location,
+            job.state,
+            getattr(job, "total_bytes_processed", None),
+            getattr(job, "total_bytes_billed", None),
+            getattr(job, "slot_millis", None),
+            getattr(job, "num_dml_affected_rows", None),
+            getattr(job, "referenced_tables", None),
+        )
+
+    def run_job(
+        self, label: str, sql: str, job_config: bigquery.QueryJobConfig | None = None
+    ) -> bigquery.job.QueryJob:
+        """Run a BigQuery job with the given SQL and configuration.
+
+        Parameters
+        ----------
+        label : `str`
+            A label for the job, typically indicating the type of operation
+            (e.g., "insert", "delete", "copy").
+        sql : `str`
+            The SQL query to execute.
+        job_config : `bigquery.QueryJobConfig`, optional
+            Configuration for the job, such as query parameters or write
+            dispositions. If not provided, a default configuration will be
+            used.
+
+        Returns
+        -------
+        job: `bigquery.job.QueryJob`
+            The BigQuery job object representing the executed query. This can
+            be used to check the status of the job, retrieve results, or log
+            additional details.
+        """
+        job = self._bq_client.query(sql, job_config=job_config, location=self.dataset.location)
+        job.result()  # Wait for the job to complete
+        self.log_job(job, label)
+        return job
diff --git a/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
new file mode 100644
index 00000000..9a3fde92
--- /dev/null
+++ b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
@@ -0,0 +1,258 @@
+# This file is part of dax_ppdbx_gcp
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+__all__ = [
+    "NoPromotableChunksError",
+    "ReplicaChunkPromoter",
+]
+
+import logging
+from collections.abc import Callable
+
+from google.api_core.exceptions import NotFound
+from google.cloud import bigquery
+
+from .query_runner import QueryRunner
+
+
+class NoPromotableChunksError(Exception):
+    """Exception raised when there are no promotable chunks available."""
+
+    pass
+
+
+class ReplicaChunkPromoter:
+    """Class to promote replica chunks in BigQuery.
+
+    Parameters
+    ----------
+     promotable_chunks: `list`[`int`]
+        Sequence of tuples containing the APDB replica chunk IDs to promote.
+    runner : `QueryRunner`, optional
+        An instance of `QueryRunner` to execute queries. If not provided, a new
+        instance will be created using environment variables.
+    table_names : `list`[`str`], optional
+        List of table names to promote with standard default.
+    """
+
+    def __init__(
+        self,
+        promotable_chunks: list[int],
+        runner: QueryRunner | None = None,
+        table_names: list[str] | None = None,
+    ):
+        self._promotable_chunks = promotable_chunks
+        self._runner = runner or QueryRunner.from_env()
+        # DM-52326: Hard-coded table names; these should be passed in from
+        # config.
+        self._table_names = table_names or ["DiaObject", "DiaSource", "DiaForcedSource"]
+        self._bq_client = bigquery.Client(project=self._runner.project_id)
+        self._phases = {
+            "build_tmp": self._copy_to_promoted_tmp,
+            "promote_prod": self._promote_tmp_to_prod,
+            "delete_staged_chunks": self._delete_staged_chunks,
+            "cleanup": self._cleanup_promoted_tmp,
+        }
+
+    @property
+    def project_id(self) -> str:
+        """Google Cloud project ID (`str`, read-only)."""
+        return self._runner.project_id
+
+    @property
+    def dataset_id(self) -> str:
+        """Dataset ID (`str`, read-only)."""
+        return self._runner.dataset_id
+
+    @property
+    def table_names(self) -> list[str]:
+        """List of table names to promote (`list`[`str`], read-only)."""
+        return self._table_names
+
+    @property
+    def promotable_chunks(self) -> list[int]:
+        """List of promotable chunks (`list[`int`],
+        read-only).
+        """
+        return self._promotable_chunks
+
+    @promotable_chunks.setter
+    def promotable_chunks(self, chunks: list[int]) -> None:
+        if not chunks:
+            raise NoPromotableChunksError("No promotable chunks provided")
+        self._promotable_chunks = chunks
+
+    @property
+    def runner(self) -> QueryRunner:
+        """Runner for executing BigQuery jobs (`QueryRunner`, read-only)."""
+        return self._runner
+
+    @property
+    def bq_client(self) -> bigquery.Client:
+        """Client for interacting with BigQuery (`bigquery.Client`,
+        read-only).
+        """
+        return self._bq_client
+
+    @property
+    def phases(self) -> dict[str, Callable]:
+        """Phases of the promotion process as a dictionary mapping phase names
+        to their corresponding class methods (`dict`[`str`, `Callable`],
+        read-only).
+        """
+        return self._phases
+
+    @property
+    def table_prod_refs(self) -> list[str]:
+        """Fully-qualified production table references (`list`[`str`],
+        read-only).
+        """
+        return [f"{self.project_id}.{self.dataset_id}.{table_name}" for table_name in self.table_names]
+
+    @property
+    def table_staging_refs(self) -> list[str]:
+        """Fully-qualified staging table references (`list`[`str`],
+        read-only).
+        """
+        return [
+            f"{self.project_id}.{self.dataset_id}._{table_name}_staging" for table_name in self.table_names
+        ]
+
+    @property
+    def table_promoted_tmp_refs(self) -> list[str]:
+        """Fully-qualified promoted temporary table references (`list`[`str`],
+        read-only).
+        """
+        return [
+            f"{self.project_id}.{self.dataset_id}._{table_name}_promoted_tmp"
+            for table_name in self.table_names
+        ]
+
+    def _execute_phase(self, phase: str) -> None:
+        """Execute a specific promotion phase.
+
+        Parameters
+        ----------
+        phase : `str`
+            The name of the promotion phase to execute. This should be one of
+            the keys in the `phases` property.
+        """
+        if phase not in self.phases:
+            raise ValueError(f"Unknown promotion phase: {phase}")
+        logging.debug("Executing promotion phase: %s", phase)
+        self._phases[phase]()
+
+    def _copy_to_promoted_tmp(self) -> None:
+        """
+        Build ``_{table_name}_promoted_tmp`` efficiently by cloning prod and
+        inserting only staged rows for the given replica chunk IDs.
+        """
+        job_cfg = bigquery.QueryJobConfig(
+            query_parameters=[bigquery.ArrayQueryParameter("ids", "INT64", self.promotable_chunks)]
+        )
+
+        for prod_ref, tmp_ref, stage_ref in zip(
+            self.table_prod_refs, self.table_promoted_tmp_refs, self.table_staging_refs, strict=False
+        ):
+            # Drop any existing tmp table (should not exist but just to be
+            # safe)
+            self.runner.run_job("drop_tmp", f"DROP TABLE IF EXISTS `{tmp_ref}`")
+
+            # Clone prod table structure and data (zero-copy)
+            self.runner.run_job("clone_prod", f"CREATE TABLE `{tmp_ref}` CLONE `{prod_ref}`")
+
+            # Build ordered target list from the cloned tmp schema
+            tmp_schema = self.bq_client.get_table(tmp_ref).schema
+            target_names = [f.name for f in tmp_schema if f.name != "apdb_replica_chunk"]
+            target_list_sql = ", ".join(f"`{n}`" for n in target_names)
+
+            # Build source list, handling geo_point conversion
+            source_list_sql = ", ".join(
+                "ST_GEOGPOINT(s.`ra`, s.`dec`)" if n == "geo_point" else f"s.`{n}`" for n in target_names
+            )
+
+            # Insert staged rows into tmp, excluding apdb_replica_chunk column
+            sql = f"""
+            INSERT INTO `{tmp_ref}` ({target_list_sql})
+            SELECT {source_list_sql}
+            FROM `{stage_ref}` AS s
+            WHERE s.apdb_replica_chunk IN UNNEST(@ids)
+            """
+            logging.debug("SQL for inserting staged rows into %s: %s", tmp_ref, sql)
+            self.runner.run_job("insert_staged_to_tmp", sql, job_config=job_cfg)
+
+    def _promote_tmp_to_prod(self) -> None:
+        """
+        Swap each prod table with its corresponding *_promoted_tmp by replacing
+        prod contents in a single atomic copy job. This preserves schema,
+        partitioning, and clustering with zero-copy when in the same dataset.
+        """
+        for prod_ref, tmp_ref in zip(self.table_prod_refs, self.table_promoted_tmp_refs, strict=False):
+            # Ensure tmp exists
+            try:
+                self.bq_client.get_table(tmp_ref)
+            except NotFound as e:
+                raise RuntimeError(f"Missing tmp table for promotion: {tmp_ref}") from e
+
+            # Atomic zero-copy replacement of prod with tmp
+            copy_cfg = bigquery.CopyJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE)
+            job = self.bq_client.copy_table(
+                tmp_ref, prod_ref, job_config=copy_cfg, location=self._runner.location
+            )
+            job.result()
+            QueryRunner.log_job(job, "promote_tmp_to_prod")
+
+    def _cleanup_promoted_tmp(self) -> None:
+        """Drop the promotion temporary tables."""
+        for tmp_ref in self.table_promoted_tmp_refs:
+            self.bq_client.delete_table(tmp_ref, not_found_ok=True)
+            logging.debug("Dropped %s (if it existed)", tmp_ref)
+
+    def _delete_staged_chunks(self) -> None:
+        """Delete only rows for the promoted replica chunk IDs from each
+        staging table.
+        """
+        job_config = bigquery.QueryJobConfig(
+            query_parameters=[bigquery.ArrayQueryParameter("ids", "INT64", self.promotable_chunks)]
+        )
+
+        for staging_ref in self.table_staging_refs:
+            try:
+                sql = f"DELETE FROM `{staging_ref}` WHERE apdb_replica_chunk IN UNNEST(@ids)"
+                self.runner.run_job("delete_staged_chunks", sql, job_config=job_config)
+                logging.debug(
+                    "Deleted %d chunk(s) from staging table %s", len(self.promotable_chunks), staging_ref
+                )
+            except NotFound:
+                logging.warning("Staging table %s does not exist, skipping delete", staging_ref)
+
+    def promote_chunks(self) -> None:
+        """Promote APDB replica chunks into production."""
+        try:
+            for phase in ("build_tmp", "promote_prod", "delete_staged_chunks"):
+                self._execute_phase(phase)
+        finally:
+            try:
+                self._execute_phase("cleanup")
+            except Exception:
+                logging.exception("Cleanup of temporary tables failed")

From 380a0a61e9909c678306119685c5ddea64920b7f Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 18 Mar 2026 17:50:09 -0500
Subject: [PATCH 38/49] WIP: Integrate application of updates into promotion
 process

---
 pyproject.toml                                |  6 +-
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 23 +++++--
 python/lsst/dax/ppdb/bigquery/query_runner.py | 21 ------
 .../ppdb/bigquery/replica_chunk_promoter.py   | 64 ++++++++++++++-----
 .../ppdb/bigquery/updates/updates_manager.py  | 19 ++++--
 .../ppdb/bigquery/updates/updates_merger.py   |  6 +-
 .../sql/merge_diaforcedsource_updates.sql     | 18 +-----
 .../config/sql/merge_diaobject_updates.sql    | 18 +-----
 .../config/sql/merge_diasource_updates.sql    | 16 +----
 requirements.txt                              |  7 +-
 10 files changed, 95 insertions(+), 103 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 24e1e41a..de2ff161 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,10 +23,12 @@ classifiers = [
 keywords = ["lsst"]
 dependencies = [
     "astropy",
+    "google-cloud-bigquery",
     "pyarrow",
     "pydantic >=2,<3",
     "pyyaml >= 5.1",
     "sqlalchemy",
+    "lsst-dax-ppdbx-gcp",
     "lsst-felis",
     "lsst-sdm-schemas",
     "lsst-utils",
@@ -43,10 +45,6 @@ test = [
     "pytest >= 3.2",
     "pytest-openfiles >= 0.5.0"
 ]
-gcp = [
-    "google-cloud-bigquery",
-    "lsst-dax-ppdbx-gcp"
-]
 
 [tool.setuptools.packages.find]
 where = ["python"]
diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index fa15c20a..ab34bedf 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -48,6 +48,7 @@
 from ..sql import PpdbSqlBase, PpdbSqlBaseConfig
 from .manifest import Manifest, TableStats
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
+from .query_runner import QueryRunner
 from .sql_resource import SqlResource
 
 __all__ = ["ConfigValidationError", "PpdbBigQuery", "PpdbBigQueryConfig"]
@@ -144,16 +145,26 @@ def __init__(self, config: PpdbBigQueryConfig):
 
         self._config = config
 
+        self._query_runner: QueryRunner | None = None
+
     @property
     def metadata(self) -> ApdbMetadata:
-        """Implement `Ppdb` interface to return APDB metadata object.
+        """APDB metadata object from `Ppdb` interface (`ApdbMetadata`)."""
+        return self._metadata
 
-        Returns
-        -------
-        metadata : `ApdbMetadata`
-            APDB metadata object.
+    @property
+    def config(self) -> PpdbBigQueryConfig:
+        """PPDB config associated with this instance."""
+        return self._config
+
+    @property
+    def query_runner(self) -> QueryRunner:
+        """Query runner for executing SQL in BigQuery
+        (`~lsst.dax.ppdb.bigquery.QueryRunner`).
         """
-        return self._metadata
+        if not self._query_runner:
+            self._query_runner = QueryRunner(self.config.project_id, self.config.dataset_id)
+        return self._query_runner
 
     def _init_sql(self, config: PpdbBigQueryConfig) -> None:
         sql_config = config.sql
diff --git a/python/lsst/dax/ppdb/bigquery/query_runner.py b/python/lsst/dax/ppdb/bigquery/query_runner.py
index e4e57166..40eaf023 100644
--- a/python/lsst/dax/ppdb/bigquery/query_runner.py
+++ b/python/lsst/dax/ppdb/bigquery/query_runner.py
@@ -21,7 +21,6 @@
 
 from __future__ import annotations
 
-import os
 from typing import TypeAlias
 
 __all__ = [
@@ -59,26 +58,6 @@ def __init__(self, project_id: str, dataset_id: str):
         self._dataset = self._bq_client.get_dataset(f"{project_id}.{dataset_id}")
         self._location = self._dataset.location
 
-    @classmethod
-    def from_env(cls) -> QueryRunner:
-        """Create a QueryRunner instance using environment variables.
-
-        Returns
-        -------
-        query_runner: `QueryRunner`
-            An instance of QueryRunner initialized with project and dataset IDs
-            from environment variables.
-        """
-        project_id = os.environ.get("PROJECT_ID")
-        if project_id is None:
-            raise OSError("Environment variable 'PROJECT_ID' is not set")
-
-        dataset_id = os.environ.get("DATASET_ID")
-        if dataset_id is None:
-            raise OSError("Environment variable 'DATASET_ID' is not set")
-
-        return cls(project_id, dataset_id)
-
     @property
     def project_id(self) -> str:
         """Google Cloud project ID (`str`, read-only)."""
diff --git a/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
index 9a3fde92..2e39b270 100644
--- a/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
+++ b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
@@ -32,7 +32,9 @@
 from google.api_core.exceptions import NotFound
 from google.cloud import bigquery
 
+from .ppdb_bigquery import PpdbBigQuery
 from .query_runner import QueryRunner
+from .updates import UpdatesManager
 
 
 class NoPromotableChunksError(Exception):
@@ -46,34 +48,39 @@ class ReplicaChunkPromoter:
 
     Parameters
     ----------
-     promotable_chunks: `list`[`int`]
-        Sequence of tuples containing the APDB replica chunk IDs to promote.
-    runner : `QueryRunner`, optional
-        An instance of `QueryRunner` to execute queries. If not provided, a new
-        instance will be created using environment variables.
+    ppdb : `PpdbBigQuery`
+        Interface to the PPDB in BigQuery.
     table_names : `list`[`str`], optional
-        List of table names to promote with standard default.
+        List of table names to promote or if None a default list will be used.
     """
 
     def __init__(
         self,
-        promotable_chunks: list[int],
-        runner: QueryRunner | None = None,
+        ppdb: PpdbBigQuery,
         table_names: list[str] | None = None,
     ):
-        self._promotable_chunks = promotable_chunks
-        self._runner = runner or QueryRunner.from_env()
+        self._ppdb = ppdb
+        self._runner = ppdb.query_runner
         # DM-52326: Hard-coded table names; these should be passed in from
         # config.
         self._table_names = table_names or ["DiaObject", "DiaSource", "DiaForcedSource"]
         self._bq_client = bigquery.Client(project=self._runner.project_id)
         self._phases = {
+            "get_promotable_chunks": self._get_promotable_chunks,
             "build_tmp": self._copy_to_promoted_tmp,
+            "apply_record_updates": self._apply_record_updates,
             "promote_prod": self._promote_tmp_to_prod,
             "delete_staged_chunks": self._delete_staged_chunks,
-            "cleanup": self._cleanup_promoted_tmp,
+            "mark_promoted": self._mark_chunks_promoted,
         }
 
+        self._promotable_chunks: list[int] = []
+
+    @property
+    def ppdb(self) -> PpdbBigQuery:
+        """PPDB interface to BigQuery."""
+        return self._ppdb
+
     @property
     def project_id(self) -> str:
         """Google Cloud project ID (`str`, read-only)."""
@@ -96,6 +103,11 @@ def promotable_chunks(self) -> list[int]:
         """
         return self._promotable_chunks
 
+    @property
+    def promotable_chunk_count(self) -> int:
+        """Count of promotable chunks that were found in the database."""
+        return len(self.promotable_chunks)
+
     @promotable_chunks.setter
     def promotable_chunks(self, chunks: list[int]) -> None:
         if not chunks:
@@ -162,6 +174,11 @@ def _execute_phase(self, phase: str) -> None:
         logging.debug("Executing promotion phase: %s", phase)
         self._phases[phase]()
 
+    def _get_promotable_chunks(self) -> None:
+        """Get list of promotable chunks from the database."""
+        self.promotable_chunks = self.ppdb.get_promotable_chunks()
+        logging.info("Promotable chunk count: %s", len(self.promotable_chunks))
+
     def _copy_to_promoted_tmp(self) -> None:
         """
         Build ``_{table_name}_promoted_tmp`` efficiently by cloning prod and
@@ -217,12 +234,12 @@ def _promote_tmp_to_prod(self) -> None:
             # Atomic zero-copy replacement of prod with tmp
             copy_cfg = bigquery.CopyJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE)
             job = self.bq_client.copy_table(
-                tmp_ref, prod_ref, job_config=copy_cfg, location=self._runner.location
+                tmp_ref, prod_ref, job_config=copy_cfg, location=self.runner.location
             )
             job.result()
             QueryRunner.log_job(job, "promote_tmp_to_prod")
 
-    def _cleanup_promoted_tmp(self) -> None:
+    def _cleanup(self) -> None:
         """Drop the promotion temporary tables."""
         for tmp_ref in self.table_promoted_tmp_refs:
             self.bq_client.delete_table(tmp_ref, not_found_ok=True)
@@ -246,13 +263,26 @@ def _delete_staged_chunks(self) -> None:
             except NotFound:
                 logging.warning("Staging table %s does not exist, skipping delete", staging_ref)
 
+    def _apply_record_updates(self) -> None:
+        """Apply record updates to the promoted temporary tables."""
+        updates_manager = UpdatesManager(self.ppdb, table_name_postfix="_promoted_tmp")
+        updates_manager.apply_updates(self._promotable_chunks)
+
+    def _mark_chunks_promoted(self) -> None:
+        """Mark the replica chunks as promoted in the database."""
+        self.ppdb.mark_chunks_promoted(self._promotable_chunks)
+
     def promote_chunks(self) -> None:
-        """Promote APDB replica chunks into production."""
+        """Promote APDB replica chunks into production by executing a series of
+        phases.
+        """
         try:
-            for phase in ("build_tmp", "promote_prod", "delete_staged_chunks"):
+            for phase in self._phases.keys():
                 self._execute_phase(phase)
         finally:
             try:
-                self._execute_phase("cleanup")
+                # Cleanup is always executed separately, not as an ordered
+                # phase.
+                self._cleanup()
             except Exception:
-                logging.exception("Cleanup of temporary tables failed")
+                logging.exception("Cleanup of chunk promotion failed")
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py b/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
index dc605f10..fd9374e1 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
@@ -36,7 +36,11 @@
 )
 from .updates_table import UpdatesTable
 
-DEFAULT_MERGERS = (DiaObjectUpdatesMerger, DiaSourceUpdatesMerger, DiaForcedSourceUpdatesMerger)
+DEFAULT_MERGERS = (
+    DiaObjectUpdatesMerger,
+    DiaSourceUpdatesMerger,
+    DiaForcedSourceUpdatesMerger,
+)
 
 
 class UpdatesManager:
@@ -51,6 +55,7 @@ def __init__(
         mergers: Sequence[type[UpdatesMerger]] = DEFAULT_MERGERS,
         updates_table_name="updates",
         deduplicated_updates_table_name="updates_deduplicated",
+        table_name_postfix: str | None = None,
     ) -> None:
         self._ppdb = ppdb
         self._mergers = mergers
@@ -62,12 +67,16 @@ def __init__(
             self._bq_client,
             f"{self._ppdb._config.project_id}.{self._ppdb._config.dataset_id}.{updates_table_name}",
         )
+
+        # TODO: Catch error if already exists
         self._updates_table.create()
 
         self._gcs_client = storage.Client()
         self._bucket = self._gcs_client.bucket(self._ppdb._config.bucket_name)
 
-    def apply_updates(self, replica_chunk_ids: Sequence[int], table_name_postfix: str | None = None) -> None:
+        self._table_name_postfix = table_name_postfix
+
+    def apply_updates(self, replica_chunk_ids: Sequence[int]) -> None:
         replica_chunks = self._ppdb.get_replica_chunks_ext_by_ids(replica_chunk_ids)
         for replica_chunk in replica_chunks:
             if replica_chunk.gcs_uri is None:
@@ -98,9 +107,9 @@ def apply_updates(self, replica_chunk_ids: Sequence[int], table_name_postfix: st
         # Merge the deduplicated updates into the target tables
         for merger in self._mergers:
             merger_instance = merger(self._bq_client)
-            if table_name_postfix:
-                # Apply a postfix like "_next" to the target table
-                merger_instance.target_table_name += f"_{table_name_postfix}"
+            if self._table_name_postfix:
+                # Apply a postfix to the canonical target table name
+                merger_instance.target_table_name += f"{self.table_name_postfix}"
             target_dataset_fqn = f"{self._ppdb._config.project_id}.{self._ppdb._config.dataset_id}"
             merger_instance.merge(
                 updates_table_fqn=deduplicated_updates_table_fqn, target_dataset_fqn=target_dataset_fqn
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
index d03ab69a..f55aecb6 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -84,7 +84,11 @@ def merge(self, *, updates_table_fqn: str, target_dataset_fqn: str) -> bigquery.
         """
         sql = SqlResource(
             self.SQL_RESOURCE_NAME,
-            format_args={"updates_table": updates_table_fqn, "target_dataset": target_dataset_fqn},
+            format_args={
+                "updates_table": updates_table_fqn,
+                "target_dataset": target_dataset_fqn,
+                "target_table": self.target_table_name,
+                },
         ).sql
         job = self._client.query(sql)
         job.result()
diff --git a/python/lsst/dax/ppdb/config/sql/merge_diaforcedsource_updates.sql b/python/lsst/dax/ppdb/config/sql/merge_diaforcedsource_updates.sql
index 8c60f86c..8eef46b7 100644
--- a/python/lsst/dax/ppdb/config/sql/merge_diaforcedsource_updates.sql
+++ b/python/lsst/dax/ppdb/config/sql/merge_diaforcedsource_updates.sql
@@ -1,15 +1,4 @@
--- merge_diaforcedsource_updates.sql
---
--- Query parameters:
---   @updates_table   STRING  -- table FQN, e.g. "project.dataset.prod_next"
---   @target_dataset  STRING  -- dataset FQN, e.g. "project.dataset"
---
--- Do NOT include backticks in parameter values.
-
-DECLARE sql STRING;
-
-SET sql = """
-MERGE `{target_dataset}.DiaForcedSource` T
+MERGE `{target_dataset}.{target_table}` T
 USING (
   WITH patch AS (
     SELECT
@@ -36,7 +25,4 @@ ON T.diaObjectId = P.diaObjectId
    AND T.detector = P.detector
 WHEN MATCHED THEN
 UPDATE SET
-  timeWithdrawnMjdTai = IF(P.timeWithdrawnMjdTai_present, P.timeWithdrawnMjdTai_value, T.timeWithdrawnMjdTai)
-""";
-
-EXECUTE IMMEDIATE sql;
+  timeWithdrawnMjdTai = IF(P.timeWithdrawnMjdTai_present, P.timeWithdrawnMjdTai_value, T.timeWithdrawnMjdTai);
diff --git a/python/lsst/dax/ppdb/config/sql/merge_diaobject_updates.sql b/python/lsst/dax/ppdb/config/sql/merge_diaobject_updates.sql
index 143f86a5..9c6c1827 100644
--- a/python/lsst/dax/ppdb/config/sql/merge_diaobject_updates.sql
+++ b/python/lsst/dax/ppdb/config/sql/merge_diaobject_updates.sql
@@ -1,15 +1,4 @@
--- merge_diaobject_updates.sql
---
--- Query parameters:
---   @updates_table   STRING  -- table FQN, e.g. "project.dataset.prod_next"
---   @target_dataset  STRING  -- dataset FQN, e.g. "project.dataset"
---
--- Do NOT include backticks in parameter values.
-
-DECLARE sql STRING;
-
-SET sql = """
-MERGE `{target_dataset}.DiaObject` T
+MERGE `{target_dataset}.{target_table}` T
 USING (
   WITH patch AS (
     SELECT
@@ -40,7 +29,4 @@ ON T.diaObjectId = P.diaObjectId
 WHEN MATCHED THEN
 UPDATE SET
   validityEndMjdTai = IF(P.validityEndMjdTai_present, P.validityEndMjdTai_value, T.validityEndMjdTai),
-  nDiaSources       = IF(P.nDiaSources_present,       P.nDiaSources_value,       T.nDiaSources)
-""";
-
-EXECUTE IMMEDIATE sql;
\ No newline at end of file
+  nDiaSources       = IF(P.nDiaSources_present,       P.nDiaSources_value,       T.nDiaSources);
diff --git a/python/lsst/dax/ppdb/config/sql/merge_diasource_updates.sql b/python/lsst/dax/ppdb/config/sql/merge_diasource_updates.sql
index 5a39b877..5a2d5307 100644
--- a/python/lsst/dax/ppdb/config/sql/merge_diasource_updates.sql
+++ b/python/lsst/dax/ppdb/config/sql/merge_diasource_updates.sql
@@ -1,15 +1,4 @@
--- merge_diasource_updates.sql
---
--- Query parameters:
---   @updates_table   STRING  -- table FQN, e.g. "project.dataset.prod_next"
---   @target_dataset  STRING  -- dataset FQN, e.g. "project.dataset"
---
--- Do NOT include backticks in parameter values.
-
-DECLARE sql STRING;
-
-SET sql = """
-MERGE `{target_dataset}.DiaSource` T
+MERGE `{target_dataset}.{target_table}` T
 USING (
   WITH patch AS (
     SELECT
@@ -57,6 +46,3 @@ UPDATE SET
   ssObjectId               = IF(P.ssObjectId_present,               P.ssObjectId_value,               T.ssObjectId),
   ssObjectReassocTimeMjdTai = IF(P.ssObjectReassocTimeMjdTai_present, P.ssObjectReassocTimeMjdTai_value, T.ssObjectReassocTimeMjdTai),
   timeWithdrawnMjdTai      = IF(P.timeWithdrawnMjdTai_present,      P.timeWithdrawnMjdTai_value,      T.timeWithdrawnMjdTai)
-""";
-
-EXECUTE IMMEDIATE sql;
diff --git a/requirements.txt b/requirements.txt
index a8e9fceb..c0cc7069 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,13 @@
 astropy
+google-cloud-bigquery
 pyarrow
 pydantic >=2,<3
 pyyaml >= 5.1
 sqlalchemy
+
 lsst-dax-apdb @ git+https://github.com/lsst/dax_apdb@main
-lsst-utils @ git+https://github.com/lsst/utils@main
-lsst-resources[s3] @ git+https://github.com/lsst/resources@main
+lsst-dax-ppdbx-gcp @ git+https://github.com/lsst-dm/dax_ppdbx_gcp@main
 lsst-felis @ git+https://github.com/lsst/felis@main
 lsst-sdm-schemas @ git+https://github.com/lsst/sdm_schemas@main
+lsst-utils @ git+https://github.com/lsst/utils@main
+lsst-resources[s3] @ git+https://github.com/lsst/resources@main

From c9f65105a82a6f52759dd40b5fd78302dbe2adb1 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Mar 2026 15:02:03 -0500
Subject: [PATCH 39/49] Add check in tests to skip if there are no valid Google
 credentials

Some test modules require that there are valid Google credentials
available. This adds a check so that if these are not present, the
tests will be skipped, e.g., in GitHub CI where they should not run
but failures should be avoided.
---
 python/lsst/dax/ppdb/tests/_bigquery.py | 30 +++++++++++++++++++++++++
 tests/test_update_record_expander.py    | 10 +++------
 tests/test_update_records.py            | 12 ++++------
 tests/test_updates_manager.py           |  2 ++
 tests/test_updates_merger.py            | 27 ++++++++++------------
 tests/test_updates_table.py             | 16 +++++--------
 6 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/python/lsst/dax/ppdb/tests/_bigquery.py b/python/lsst/dax/ppdb/tests/_bigquery.py
index 551c1e2e..7b65d4af 100644
--- a/python/lsst/dax/ppdb/tests/_bigquery.py
+++ b/python/lsst/dax/ppdb/tests/_bigquery.py
@@ -26,6 +26,9 @@
 import uuid
 from typing import Any
 
+import google.auth
+from google.auth.exceptions import DefaultCredentialsError
+from google.auth.transport.requests import Request
 from google.cloud import storage
 
 from lsst.dax.apdb import (
@@ -177,3 +180,30 @@ def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
         }
         kw.update(kwargs)
         return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
+
+
+def have_valid_google_credentials() -> bool:
+    """Check that valid Google credentials are available for testing.
+
+    Returns
+    -------
+    credentials_valid: `bool`
+        True if valid Google credentials are available, False if not.
+
+    Raises
+    ------
+    google.auth.exceptions.RefreshError
+        Raised if the credentials cannot be refreshed.
+    Exception
+        Raised for other transport or configuration failures.
+    """
+    try:
+        credentials, _ = google.auth.default()
+    except DefaultCredentialsError:
+        return False
+
+    # This will validate the default credentials that were found in the
+    # environment.
+    credentials.refresh(Request())
+
+    return True
diff --git a/tests/test_update_record_expander.py b/tests/test_update_record_expander.py
index 843a025a..596acfed 100644
--- a/tests/test_update_record_expander.py
+++ b/tests/test_update_record_expander.py
@@ -32,13 +32,9 @@
     ApdbWithdrawDiaForcedSourceRecord,
     ApdbWithdrawDiaSourceRecord,
 )
-
-try:
-    from lsst.dax.ppdb.bigquery import updates
-    from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
-    from lsst.dax.ppdb.tests._updates import _create_test_update_records
-except ImportError:
-    updates = None
+from lsst.dax.ppdb.bigquery import updates
+from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
+from lsst.dax.ppdb.tests._updates import _create_test_update_records
 
 
 @unittest.skipIf(updates is None, "Google Cloud environment not available")
diff --git a/tests/test_update_records.py b/tests/test_update_records.py
index c6a1dd8e..3b56857c 100644
--- a/tests/test_update_records.py
+++ b/tests/test_update_records.py
@@ -25,12 +25,6 @@
 import pytest
 from google.cloud import storage
 
-try:
-    from lsst.dax.ppdb.bigquery import updates
-    from lsst.dax.ppdb.bigquery.updates import UpdateRecords
-except ImportError:
-    updates = None
-
 from lsst.dax.apdb import (
     Apdb,
     ApdbReplica,
@@ -38,6 +32,7 @@
 )
 from lsst.dax.ppdb import Ppdb
 from lsst.dax.ppdb.bigquery import PpdbBigQuery
+from lsst.dax.ppdb.bigquery.updates import UpdateRecords
 from lsst.dax.ppdb.replicator import Replicator
 from lsst.dax.ppdb.tests import ApdbMixin
 from lsst.dax.ppdb.tests._bigquery import (
@@ -45,10 +40,11 @@
     PostgresMixin,
     delete_test_bucket,
     generate_test_bucket_name,
+    have_valid_google_credentials,
 )
 
 
-@unittest.skipIf(updates is None, "Google Cloud dependencies not available")
+@unittest.skipIf(not have_valid_google_credentials(), "Missing valid Google credentials")
 class UpdateRecordsTestCase(PostgresMixin, ApdbMixin, unittest.TestCase):
     """A test case for the handling of APDB record updates by PpdbBigQuery and
     related classes including the ChunkUploader.
@@ -332,4 +328,4 @@ def test_chunk_uploader(self) -> None:
         try:
             delete_test_bucket(bucket)
         except Exception as e:
-            raise RuntimeError(f"Failed to delete test GCS bucket: {e}")
+            raise RuntimeError(f"Failed to delete test GCS bucket: {e}") from e
diff --git a/tests/test_updates_manager.py b/tests/test_updates_manager.py
index 74d36d8b..77c02729 100644
--- a/tests/test_updates_manager.py
+++ b/tests/test_updates_manager.py
@@ -39,10 +39,12 @@
     ChunkUploaderWithoutPubSub,
     PostgresMixin,
     generate_test_bucket_name,
+    have_valid_google_credentials,
 )
 from lsst.dax.ppdb.tests._updates import _create_test_update_records
 
 
+@unittest.skipIf(not have_valid_google_credentials(), "Missing valid Google credentials")
 class UpdatesManagerTestCase(PostgresMixin, unittest.TestCase):
     """A test case for the handling of APDB record updates by PpdbBigQuery and
     related classes including the ChunkUploader.
diff --git a/tests/test_updates_merger.py b/tests/test_updates_merger.py
index 5fdca1bb..ead829c5 100644
--- a/tests/test_updates_merger.py
+++ b/tests/test_updates_merger.py
@@ -29,21 +29,18 @@
 except ImportError:
     bigquery = None
 
-try:
-    from lsst.dax.ppdb.bigquery import updates
-    from lsst.dax.ppdb.bigquery.updates import (
-        DiaForcedSourceUpdatesMerger,
-        DiaObjectUpdatesMerger,
-        DiaSourceUpdatesMerger,
-        UpdateRecordExpander,
-        UpdatesTable,
-    )
-    from lsst.dax.ppdb.tests._updates import _create_test_update_records
-except ImportError:
-    updates = None
-
-
-@unittest.skipIf(bigquery is None or updates is None, "Google Cloud dependencies not available")
+from lsst.dax.ppdb.bigquery.updates import (
+    DiaForcedSourceUpdatesMerger,
+    DiaObjectUpdatesMerger,
+    DiaSourceUpdatesMerger,
+    UpdateRecordExpander,
+    UpdatesTable,
+)
+from lsst.dax.ppdb.tests._bigquery import have_valid_google_credentials
+from lsst.dax.ppdb.tests._updates import _create_test_update_records
+
+
+@unittest.skipIf(not have_valid_google_credentials(), "Missing valid Google credentials")
 class TestUpdatesMerger(unittest.TestCase):
     """Test UpdatesMerger functionality."""
 
diff --git a/tests/test_updates_table.py b/tests/test_updates_table.py
index bddbbd7b..07fc1202 100644
--- a/tests/test_updates_table.py
+++ b/tests/test_updates_table.py
@@ -22,20 +22,14 @@
 import unittest
 import uuid
 
-try:
-    from lsst.dax.ppdb.bigquery import updates
-    from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander, UpdatesTable
-    from lsst.dax.ppdb.tests._updates import _create_test_update_records
-except ImportError:
-    updates = None
+from google.cloud import bigquery
 
-try:
-    from google.cloud import bigquery
-except (ModuleNotFoundError, ImportError):
-    bigquery = None
+from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander, UpdatesTable
+from lsst.dax.ppdb.tests._bigquery import have_valid_google_credentials
+from lsst.dax.ppdb.tests._updates import _create_test_update_records
 
 
-@unittest.skipIf(updates is None or bigquery is None, "Google Cloud dependencies not available")
+@unittest.skipIf(not have_valid_google_credentials(), "Missing valid Google credentials")
 class TestUpdatesTable(unittest.TestCase):
     """Test UpdatesTable functionality."""
 

From 4072a8d8ef795af041ac6afd632a1fa264e0ada8 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Mar 2026 15:56:24 -0500
Subject: [PATCH 40/49] FIXUP

---
 python/lsst/dax/ppdb/bigquery/updates/updates_merger.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
index f55aecb6..778a6be0 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -88,7 +88,7 @@ def merge(self, *, updates_table_fqn: str, target_dataset_fqn: str) -> bigquery.
                 "updates_table": updates_table_fqn,
                 "target_dataset": target_dataset_fqn,
                 "target_table": self.target_table_name,
-                },
+            },
         ).sql
         job = self._client.query(sql)
         job.result()

From 7484bca50ba4d83fdbffa391b99f277c52566718 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Mar 2026 16:09:55 -0500
Subject: [PATCH 41/49] Fix type alias issue reported by ruff

---
 python/lsst/dax/ppdb/bigquery/query_runner.py | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/query_runner.py b/python/lsst/dax/ppdb/bigquery/query_runner.py
index 40eaf023..e9901658 100644
--- a/python/lsst/dax/ppdb/bigquery/query_runner.py
+++ b/python/lsst/dax/ppdb/bigquery/query_runner.py
@@ -21,8 +21,6 @@
 
 from __future__ import annotations
 
-from typing import TypeAlias
-
 __all__ = [
     "QueryRunner",
 ]
@@ -31,14 +29,6 @@
 
 from google.cloud import bigquery
 
-AnyBigQueryJob: TypeAlias = (
-    bigquery.job.QueryJob
-    | bigquery.job.LoadJob
-    | bigquery.job.CopyJob
-    | bigquery.job.ExtractJob
-    | bigquery.job.UnknownJob
-)
-
 
 class QueryRunner:
     """Class to run BigQuery queries with logging.
@@ -81,7 +71,16 @@ def location(self) -> str:
         return self._location
 
     @classmethod
-    def log_job(cls, job: AnyBigQueryJob, label: str, level: int = logging.DEBUG) -> None:
+    def log_job(
+        cls,
+        job: bigquery.job.QueryJob
+        | bigquery.job.LoadJob
+        | bigquery.job.CopyJob
+        | bigquery.job.ExtractJob
+        | bigquery.job.UnknownJob,
+        label: str,
+        level: int = logging.DEBUG,
+    ) -> None:
         """Log details of a BigQuery job.
 
         Parameters

From d52dfb4d206bac446dc328c74a46d41e86b6c248 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Mar 2026 16:30:05 -0500
Subject: [PATCH 42/49] Add missing docstring

---
 python/lsst/dax/ppdb/tests/_bigquery.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/lsst/dax/ppdb/tests/_bigquery.py b/python/lsst/dax/ppdb/tests/_bigquery.py
index 7b65d4af..2726b70d 100644
--- a/python/lsst/dax/ppdb/tests/_bigquery.py
+++ b/python/lsst/dax/ppdb/tests/_bigquery.py
@@ -63,6 +63,13 @@ def generate_test_bucket_name(test_prefix: str = "ppdb-test") -> str:
 
 
 def delete_test_bucket(bucket_or_bucket_name: str | storage.Bucket) -> None:
+    """Delete a cloud storage bucket that was created for testing.
+
+    Parameters
+    ----------
+    bucket_or_bucket_name: `str` or `storage.Bucket`
+        The name of the bucket or the actual bucket to delete.
+    """
     storage_client = storage.Client()
     try:
         if isinstance(bucket_or_bucket_name, str):

From f7cd7917b445f4931f443815c632f3793cc7cd41 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Mar 2026 17:31:55 -0500
Subject: [PATCH 43/49] Add `mark_chunks_promoted` method

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index ab34bedf..a3c6c0d2 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -781,3 +781,31 @@ def get_promotable_chunks(self) -> list[int]:
             result = conn.execute(sqlalchemy.text(sql))
             chunk_ids = [row[0] for row in result]
         return chunk_ids
+
+    def mark_chunks_promoted(self, promotable_chunks: list[int]) -> int:
+        """Set status='promoted' for the given chunk IDs. Returns number
+        updated.
+
+        Parameters
+        ----------
+        promotable_chunks : `list`[`int`]
+            List of integers containing the ``apdb_replica_chunk`` values of
+            the promotable chunks.
+
+        Returns
+        -------
+        count: `int`
+            The number of rows updated in the database, which should be equal
+            to the number of promotable chunks provided, if they were all found
+            and updated successfully.
+        """
+        table = self.get_table("PpdbReplicaChunk")
+        stmt = (
+            sqlalchemy.update(table)
+            .where(table.c.apdb_replica_chunk.in_(promotable_chunks), table.c.status != "promoted")
+            .values(status="promoted")
+        )
+
+        with self._engine.begin() as conn:
+            result: sqlalchemy.engine.CursorResult = conn.execute(stmt)
+            return result.rowcount or 0

From 0519130e7564f673db353d624dd907109cb309cb Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 19 Mar 2026 17:45:45 -0500
Subject: [PATCH 44/49] Fix mypy errors

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   |  4 ++-
 .../ppdb/bigquery/replica_chunk_promoter.py   | 16 +++++-----
 .../updates/update_record_expander.py         |  2 --
 .../ppdb/bigquery/updates/update_records.py   | 31 +++++++++++--------
 .../ppdb/bigquery/updates/updates_manager.py  |  6 ++--
 .../ppdb/bigquery/updates/updates_merger.py   |  2 +-
 python/lsst/dax/ppdb/sql/_ppdb_sql_base.py    |  2 +-
 python/lsst/dax/ppdb/tests/_bigquery.py       | 10 ++----
 python/lsst/dax/ppdb/tests/_ppdb.py           |  2 +-
 python/lsst/dax/ppdb/tests/_updates.py        |  5 ++-
 10 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index a3c6c0d2..22095b4d 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -736,7 +736,7 @@ def _handle_updates(
 
         update_records = UpdateRecords(
             replica_chunk_id=replica_chunk.id,
-            records=apdb_update_records,
+            records=list(apdb_update_records),
             record_count=len(apdb_update_records),
         )
         update_records.write_json_file(chunk_dir / "update_records.json")
@@ -769,6 +769,8 @@ def get_promotable_chunks(self) -> list[int]:
         empty list is returned.
         """
         table = self.get_table("PpdbReplicaChunk")
+        if not table.schema:
+            raise ValueError("Table schema is not set, cannot construct query")
         quoted_table_name = (
             self._engine.dialect.identifier_preparer.quote(table.schema)
             + "."
diff --git a/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
index 2e39b270..4c67a2e4 100644
--- a/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
+++ b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
@@ -98,22 +98,20 @@ def table_names(self) -> list[str]:
 
     @property
     def promotable_chunks(self) -> list[int]:
-        """List of promotable chunks (`list[`int`],
-        read-only).
-        """
+        """List of promotable chunks (`list` [ `int` ], read-only)."""
         return self._promotable_chunks
 
-    @property
-    def promotable_chunk_count(self) -> int:
-        """Count of promotable chunks that were found in the database."""
-        return len(self.promotable_chunks)
-
     @promotable_chunks.setter
     def promotable_chunks(self, chunks: list[int]) -> None:
         if not chunks:
             raise NoPromotableChunksError("No promotable chunks provided")
         self._promotable_chunks = chunks
 
+    @property
+    def promotable_chunk_count(self) -> int:
+        """Count of promotable chunks that were found in the database."""
+        return len(self.promotable_chunks)
+
     @property
     def runner(self) -> QueryRunner:
         """Runner for executing BigQuery jobs (`QueryRunner`, read-only)."""
@@ -176,7 +174,7 @@ def _execute_phase(self, phase: str) -> None:
 
     def _get_promotable_chunks(self) -> None:
         """Get list of promotable chunks from the database."""
-        self.promotable_chunks = self.ppdb.get_promotable_chunks()
+        self._promotable_chunks = self.ppdb.get_promotable_chunks()
         logging.info("Promotable chunk count: %s", len(self.promotable_chunks))
 
     def _copy_to_promoted_tmp(self) -> None:
diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
index f5c95fdf..b0fe2fb8 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_record_expander.py
@@ -168,7 +168,6 @@ def expand_single_record(
 
         # Get the record ID
         record_id = cls._get_record_id(update_record)
-        record_id_hash = cls._compute_record_id_hash(record_id)
 
         expanded_records = []
         for field_name in field_names:
@@ -182,7 +181,6 @@ def expand_single_record(
             expanded_record = ExpandedUpdateRecord(
                 table_name=table_name,
                 record_id=record_id,
-                record_id_hash=record_id_hash,
                 field_name=field_name,
                 value_json=value,
                 replica_chunk_id=replica_chunk_id,
diff --git a/python/lsst/dax/ppdb/bigquery/updates/update_records.py b/python/lsst/dax/ppdb/bigquery/updates/update_records.py
index ae986fb3..a3107c85 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/update_records.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/update_records.py
@@ -23,7 +23,7 @@
 
 import json
 from pathlib import Path
-from typing import Any
+from typing import Any, cast
 
 from pydantic import BaseModel, field_serializer, field_validator
 
@@ -90,19 +90,24 @@ def deserialize_records(
             The list of APDB update records.
         """
         if records and isinstance(records[0], ApdbUpdateRecord):
-            return records
+            return cast(list[ApdbUpdateRecord], records)
         deserialized_records: list[ApdbUpdateRecord] = []
-        for record_dict in records:
-            record_copy = record_dict.copy()
-            update_time_ns = record_copy.pop("update_time_ns")
-            update_order = record_copy.pop("update_order")
-            json_str = json.dumps(record_copy)
-            update_record = ApdbUpdateRecord.from_json(
-                update_time_ns,
-                update_order,
-                json_str,
-            )
-            deserialized_records.append(update_record)
+        for record in records:
+            if isinstance(record, dict):
+                record_copy = record.copy()
+                update_time_ns = record_copy.pop("update_time_ns")
+                update_order = record_copy.pop("update_order")
+                json_str = json.dumps(record_copy)
+                update_record = ApdbUpdateRecord.from_json(
+                    update_time_ns,
+                    update_order,
+                    json_str,
+                )
+                deserialized_records.append(update_record)
+            elif isinstance(record, ApdbUpdateRecord):
+                deserialized_records.append(record)
+            else:
+                raise TypeError("Each record must be a dict or ApdbUpdateRecord")
         return deserialized_records
 
     def write_json_file(self, path: Path) -> None:
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py b/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
index fd9374e1..a0eaaff5 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
@@ -53,8 +53,8 @@ def __init__(
         self,
         ppdb: PpdbBigQuery,
         mergers: Sequence[type[UpdatesMerger]] = DEFAULT_MERGERS,
-        updates_table_name="updates",
-        deduplicated_updates_table_name="updates_deduplicated",
+        updates_table_name: str = "updates",
+        deduplicated_updates_table_name: str = "updates_deduplicated",
         table_name_postfix: str | None = None,
     ) -> None:
         self._ppdb = ppdb
@@ -109,7 +109,7 @@ def apply_updates(self, replica_chunk_ids: Sequence[int]) -> None:
             merger_instance = merger(self._bq_client)
             if self._table_name_postfix:
                 # Apply a postfix to the canonical target table name
-                merger_instance.target_table_name += f"{self.table_name_postfix}"
+                merger_instance.target_table_name += f"{self._table_name_postfix}"
             target_dataset_fqn = f"{self._ppdb._config.project_id}.{self._ppdb._config.dataset_id}"
             merger_instance.merge(
                 updates_table_fqn=deduplicated_updates_table_fqn, target_dataset_fqn=target_dataset_fqn
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
index 778a6be0..e74b68d6 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/updates_merger.py
@@ -42,7 +42,7 @@ class UpdatesMerger(ABC):
     statement for this merger. The SQL file must be located in the
     `lsst.dax.ppdb.config.sql` package."""
 
-    def __init__(self, client: bigquery.Client, target_table_name: str = None) -> None:
+    def __init__(self, client: bigquery.Client, target_table_name: str | None = None) -> None:
         """
         Parameters
         ----------
diff --git a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
index ed953310..5db51d5d 100644
--- a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
+++ b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
@@ -155,7 +155,7 @@ def _build_connect_args(cls, config: PpdbSqlBaseConfig) -> MutableMapping[str, A
         return {"connect_args": conn_args}
 
     @classmethod
-    def _config_listeners(cls, engine: sqlalchemy.engine.Engine) -> sqlalchemy.engine.Engine:
+    def _config_listeners(cls, engine: sqlalchemy.engine.Engine) -> None:
         if engine.dialect.name == "sqlite":
             # Need to enable foreign keys on every new connection.
             sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
diff --git a/python/lsst/dax/ppdb/tests/_bigquery.py b/python/lsst/dax/ppdb/tests/_bigquery.py
index 2726b70d..e8be8ad6 100644
--- a/python/lsst/dax/ppdb/tests/_bigquery.py
+++ b/python/lsst/dax/ppdb/tests/_bigquery.py
@@ -119,9 +119,7 @@ def make_instance(self, **kwargs: Any) -> PpdbConfig:
             "felis_path": TEST_SCHEMA_RESOURCE_PATH,
             "replication_dir": self.tempdir,
         }
-        bq_config = PpdbBigQuery.init_bigquery(
-            **kw,
-        )  # type: ignore[arg-type]
+        bq_config = PpdbBigQuery.init_bigquery(**kw)  # type: ignore[arg-type]
         return bq_config
 
     def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
@@ -147,7 +145,6 @@ class PostgresMixin:
     def setUpClass(cls) -> None:
         # Create the postgres test server.
         cls.postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True)
-        super().setUpClass()
 
     @classmethod
     def tearDownClass(cls) -> None:
@@ -155,7 +152,6 @@ def tearDownClass(cls) -> None:
         # so they're closed before we shut down the server.
         gc.collect()
         cls.postgresql.clear_cache()
-        super().tearDownClass()
 
     def setUp(self) -> None:
         self.server = self.postgresql()
@@ -174,7 +170,7 @@ def make_instance(self, config_dict: dict[str, Any] = TEST_CONFIG, **kwargs: Any
             "felis_path": TEST_SCHEMA_RESOURCE_PATH,
             "replication_dir": self.tempdir,
         }
-        bq_config = PpdbBigQuery.init_bigquery(**kw)  # type: ignore[arg-type]
+        bq_config = PpdbBigQuery.init_bigquery(**kw)
         return bq_config
 
     def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
@@ -186,7 +182,7 @@ def make_apdb_instance(self, **kwargs: Any) -> ApdbConfig:
             "enable_replica": True,
         }
         kw.update(kwargs)
-        return ApdbSql.init_database(**kw)  # type: ignore[arg-type]
+        return ApdbSql.init_database(**kw)
 
 
 def have_valid_google_credentials() -> bool:
diff --git a/python/lsst/dax/ppdb/tests/_ppdb.py b/python/lsst/dax/ppdb/tests/_ppdb.py
index 245639ea..e2c90c47 100644
--- a/python/lsst/dax/ppdb/tests/_ppdb.py
+++ b/python/lsst/dax/ppdb/tests/_ppdb.py
@@ -71,7 +71,7 @@ def _make_region(xyz: tuple[float, float, float] = (1.0, 1.0, -1.0)) -> Region:
     return region
 
 
-class ApdbMixin:
+class ApdbMixin(unittest.TestCase):
     """Mixin class containing APDB setuup and record generation for PPDB
     testing.
     """
diff --git a/python/lsst/dax/ppdb/tests/_updates.py b/python/lsst/dax/ppdb/tests/_updates.py
index bc978ffe..45d3fb1b 100644
--- a/python/lsst/dax/ppdb/tests/_updates.py
+++ b/python/lsst/dax/ppdb/tests/_updates.py
@@ -19,13 +19,13 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-import datetime
 
 from lsst.dax.apdb import (
     ApdbCloseDiaObjectValidityRecord,
     ApdbReassignDiaSourceToDiaObjectRecord,
     ApdbReassignDiaSourceToSSObjectRecord,
     ApdbUpdateNDiaSourcesRecord,
+    ApdbUpdateRecord,
     ApdbWithdrawDiaForcedSourceRecord,
     ApdbWithdrawDiaSourceRecord,
 )
@@ -35,7 +35,7 @@
 
 def _create_test_update_records() -> UpdateRecords:
     """Create test UpdateRecords with sample ApdbUpdateRecord instances."""
-    records = []
+    records: list[ApdbUpdateRecord] = []
 
     # Hardcoded test values
     test_update_time_ns = 1640995200000000000  # 2022-01-01 00:00:00 UTC in nanoseconds
@@ -153,5 +153,4 @@ def _create_test_update_records() -> UpdateRecords:
         replica_chunk_id=test_replica_chunk_id,
         record_count=len(records),
         records=records,
-        file_created_at=datetime.datetime.now(datetime.UTC),
     )

From bb01e3ac7382d9c404a463f3008bcad9e3130d0d Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Mar 2026 11:33:16 -0500
Subject: [PATCH 45/49] WIP: Introduce class for handling SQL passwords

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 106 +++++++-----------
 python/lsst/dax/ppdb/sql/__init__.py          |   2 +-
 python/lsst/dax/ppdb/sql/_ppdb_sql_base.py    |  61 ++++++++--
 3 files changed, 91 insertions(+), 78 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 22095b4d..0f697eaf 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -28,6 +28,7 @@
 
 import felis
 import sqlalchemy
+from google.cloud import secretmanager
 
 from lsst.dax.apdb import (
     ApdbMetadata,
@@ -39,17 +40,17 @@
     monitor,
     schema_model,
 )
-from lsst.dax.apdb.sql import ApdbMetadataSql
 from lsst.dax.apdb.timer import Timer
 
 from .._arrow import write_parquet
 from ..ppdb import Ppdb, PpdbReplicaChunk
 from ..ppdb_config import PpdbConfig
-from ..sql import PpdbSqlBase, PpdbSqlBaseConfig
+from ..sql import PasswordProvider, PpdbSqlBase, PpdbSqlBaseConfig
 from .manifest import Manifest, TableStats
 from .ppdb_replica_chunk_extended import ChunkStatus, PpdbReplicaChunkExtended
 from .query_runner import QueryRunner
 from .sql_resource import SqlResource
+from .updates.update_records import UpdateRecords
 
 __all__ = ["ConfigValidationError", "PpdbBigQuery", "PpdbBigQueryConfig"]
 
@@ -117,6 +118,30 @@ def fq_dataset_id(self) -> str:
         return f"{self.project_id}:{self.dataset_id}"
 
 
+class _SecretManagerPasswordProvider(PasswordProvider):
+    """Retrieves a database password from Google Cloud Secret Manager.
+
+    Parameters
+    ----------
+    project_id : `str`
+        GCP project that owns the secret.
+    secret_name : `str`, optional
+        Name of the secret. Defaults to ``"ppdb-db-password"``.
+    """
+
+    def __init__(self, project_id: str, secret_name: str = "ppdb-db-password") -> None:
+        self._project_id = project_id
+        self._secret_name = secret_name
+
+    def get_password(self) -> str:
+        """Return the password fetched from Secret Manager."""
+        client = secretmanager.SecretManagerServiceClient()
+        name = f"projects/{self._project_id}/secrets/{self._secret_name}/versions/latest"
+        _LOG.info("Retrieving database password from Secret Manager: %s", name)
+        response = client.access_secret_version(request={"name": name})
+        return response.payload.data.decode("UTF-8")
+
+
 class ConfigValidationError(Exception):
     """Indicates an error validating the configuration."""
 
@@ -131,8 +156,15 @@ class PpdbBigQuery(Ppdb, PpdbSqlBase):
     """
 
     def __init__(self, config: PpdbBigQueryConfig):
-        # Initialize the SQL interface for the PPDB
-        self._init_sql(config)
+        # Build an optional password provider for GCP Secret Manager.
+        password_provider: PasswordProvider | None = None
+        if os.getenv("PPDB_USE_SECRET_MANAGER", "false").lower() == "true":
+            _LOG.info("Using Secret Manager to retrieve database password")
+            password_provider = _SecretManagerPasswordProvider(config.project_id)
+
+        # Delegate SQL initialisation (schema load, engine, metadata, version
+        # checks) to the base class, passing the optional password provider.
+        PpdbSqlBase.__init__(self, config.sql, password_provider=password_provider)
 
         # Read parameters from config
         if config.replication_dir is None:
@@ -166,22 +198,6 @@ def query_runner(self) -> QueryRunner:
             self._query_runner = QueryRunner(self.config.project_id, self.config.dataset_id)
         return self._query_runner
 
-    def _init_sql(self, config: PpdbBigQueryConfig) -> None:
-        sql_config = config.sql
-        self._sa_metadata, self._schema_version = self.read_schema(
-            sql_config.felis_path, sql_config.schema_name, sql_config.felis_schema, sql_config.db_url
-        )
-
-        self._engine = self._make_engine(config)  # Includes Secrets Manager support
-        sa_metadata = sqlalchemy.MetaData(schema=sql_config.schema_name)
-
-        meta_table = sqlalchemy.schema.Table("metadata", sa_metadata, autoload_with=self._engine)
-        self._metadata = ApdbMetadataSql(self._engine, meta_table)
-
-        # Check schema amd code version compatibility.
-        self._check_schema_version(self._schema_version)
-        self._check_code_version()
-
     def _generate_manifest(
         self,
         replica_chunk: ReplicaChunk,
@@ -501,47 +517,6 @@ def filter_table_names(cls, original_table_names: Iterable[str]) -> Iterable[str
         # Only the metadata table is needed for the BigQuery-based PPDB.
         return ["metadata"]
 
-    @classmethod
-    def _get_secretmanager_password(cls, project_id: str, password_name: str = "ppdb-db-password") -> str:
-        from google.cloud import secretmanager
-
-        client = secretmanager.SecretManagerServiceClient()
-        name = f"projects/{project_id}/secrets/{password_name}/versions/latest"
-        response = client.access_secret_version(request={"name": name})
-        return response.payload.data.decode("UTF-8")
-
-    @classmethod
-    def _use_secret_manager(cls) -> bool:
-        return os.getenv("PPDB_USE_SECRET_MANAGER", "false").lower() == "true"
-
-    @classmethod
-    def _make_engine(cls, config: PpdbBigQueryConfig) -> sqlalchemy.engine.Engine:
-        """Make SQLALchemy engine based on configured parameters.
-
-        Parameters
-        ----------
-        config : `PpdbBigQueryConfig`
-            Configuration object with SQL parameters.
-        """
-        sql_config = config.sql
-        db_url = sqlalchemy.make_url(sql_config.db_url)
-
-        # If using Secret Manager, retrieve the password and update the
-        # database URL.
-        if cls._use_secret_manager():
-            _LOG.info("Using Secret Manager to retrieve database password")
-            if db_url.password is not None:
-                raise ValueError("Database URL should not include a password when using Secret Manager")
-            password = cls._get_secretmanager_password(config.project_id)
-            db_url = db_url.set(password=password)
-
-        kw = cls._build_connect_args(sql_config)
-        engine = sqlalchemy.create_engine(db_url, **kw)
-
-        cls._config_listeners(engine)
-
-        return engine
-
     @classmethod
     def init_bigquery(
         cls,
@@ -627,7 +602,11 @@ def init_bigquery(
         if stage_chunk_topic is not None:
             bq_config.stage_chunk_topic = stage_chunk_topic
 
-        engine = cls._make_engine(bq_config)
+        password_provider: PasswordProvider | None = None
+        if os.getenv("PPDB_USE_SECRET_MANAGER", "false").lower() == "true":
+            _LOG.info("Using Secret Manager to retrieve database password")
+            password_provider = _SecretManagerPasswordProvider(bq_config.project_id)
+        engine = cls.make_engine(bq_config.sql, password_provider=password_provider)
         cls.make_database(engine, bq_config.sql, sa_metadata, schema_version, db_drop)
 
         # Validate the config if requested.
@@ -731,9 +710,6 @@ def _handle_updates(
         Serializes the ApdbUpdateRecord objects into a dictionary structure
         for processing.
         """
-        # Import inlined here to avoid triggering google cloud imports
-        from .updates.update_records import UpdateRecords
-
         update_records = UpdateRecords(
             replica_chunk_id=replica_chunk.id,
             records=list(apdb_update_records),
diff --git a/python/lsst/dax/ppdb/sql/__init__.py b/python/lsst/dax/ppdb/sql/__init__.py
index 92e21081..566853c3 100644
--- a/python/lsst/dax/ppdb/sql/__init__.py
+++ b/python/lsst/dax/ppdb/sql/__init__.py
@@ -20,4 +20,4 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 from ._ppdb_sql import PpdbSql, PpdbSqlConfig
-from ._ppdb_sql_base import PpdbSqlBase, PpdbSqlBaseConfig
+from ._ppdb_sql_base import PasswordProvider, PpdbSqlBase, PpdbSqlBaseConfig
diff --git a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
index 5db51d5d..542029e5 100644
--- a/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
+++ b/python/lsst/dax/ppdb/sql/_ppdb_sql_base.py
@@ -21,11 +21,12 @@
 
 from __future__ import annotations
 
-__all__ = ["PpdbSqlBase"]
+__all__ = ["PasswordProvider", "PpdbSqlBase"]
 
 import logging
 import os
 import sqlite3
+from abc import ABC, abstractmethod
 from collections.abc import Iterable, MutableMapping
 from contextlib import closing
 from typing import Any
@@ -49,6 +50,25 @@
 _LOG = logging.getLogger(__name__)
 
 
+class PasswordProvider(ABC):
+    """Abstract base class for objects that supply a database password.
+
+    Implementations are free to retrieve the password from any source
+    (e.g. environment variables, a secrets manager, a local file) without
+    `PpdbSqlBase` needing to know about the mechanism.
+    """
+
+    @abstractmethod
+    def get_password(self) -> str:
+        """Return the database password.
+
+        Returns
+        -------
+        password : `str`
+            Plain-text password to embed in the database connection URL.
+        """
+
+
 class MissingSchemaVersionError(RuntimeError):
     """Exception raised when schema version is not defined in the schema.
 
@@ -121,12 +141,12 @@ class PpdbSqlBase:
     meta_schema_version_key = "version:schema"
     """Name of the metadata key to store Felis schema version number."""
 
-    def __init__(self, config: PpdbSqlBaseConfig) -> None:
+    def __init__(self, config: PpdbSqlBaseConfig, password_provider: PasswordProvider | None = None) -> None:
         self._sa_metadata, self._schema_version = self.read_schema(
             config.felis_path, config.schema_name, config.felis_schema, config.db_url
         )
 
-        self._engine = self.make_engine(config)
+        self._engine = self.make_engine(config, password_provider=password_provider)
         sa_metadata = sqlalchemy.MetaData(schema=config.schema_name)
 
         meta_table = sqlalchemy.schema.Table("metadata", sa_metadata, autoload_with=self._engine)
@@ -155,23 +175,40 @@ def _build_connect_args(cls, config: PpdbSqlBaseConfig) -> MutableMapping[str, A
         return {"connect_args": conn_args}
 
     @classmethod
-    def _config_listeners(cls, engine: sqlalchemy.engine.Engine) -> None:
-        if engine.dialect.name == "sqlite":
-            # Need to enable foreign keys on every new connection.
-            sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
-
-    @classmethod
-    def make_engine(cls, config: PpdbSqlBaseConfig) -> sqlalchemy.engine.Engine:
+    def make_engine(
+        cls,
+        config: PpdbSqlBaseConfig,
+        *,
+        password_provider: PasswordProvider | None = None,
+    ) -> sqlalchemy.engine.Engine:
         """Make SQLALchemy engine based on configured parameters.
 
         Parameters
         ----------
         config : `PpdbSqlBaseConfig`
             Configuration object with SQL parameters.
+        password_provider : `PasswordProvider`, optional
+            If provided, the password returned by
+            ``password_provider.get_password()`` is injected into the
+            database URL.  The URL must not already contain a password when
+            this argument is given.
+
+        Raises
+        ------
+        ValueError
+            Raised if ``password_provider`` is given but the URL already
+            contains a password.
         """
+        db_url = sqlalchemy.make_url(config.db_url)
+        if password_provider is not None:
+            if db_url.password is not None:
+                raise ValueError("Database URL must not contain a password when password_provider is used.")
+            db_url = db_url.set(password=password_provider.get_password())
         kw = cls._build_connect_args(config)
-        engine = sqlalchemy.create_engine(config.db_url, **kw)
-        cls._config_listeners(engine)
+        engine = sqlalchemy.create_engine(db_url, **kw)
+        if engine.dialect.name == "sqlite":
+            # Need to enable foreign keys on every new connection.
+            sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
 
         return engine
 

From 855c826c4dd47d9cc60b47bd54362335d2eff7b1 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Mar 2026 11:51:26 -0500
Subject: [PATCH 46/49] Fix circular reference in imports

---
 python/lsst/dax/ppdb/bigquery/updates/__init__.py | 1 -
 tests/test_updates_manager.py                     | 5 +++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/updates/__init__.py b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
index ef9abea9..1536958d 100644
--- a/python/lsst/dax/ppdb/bigquery/updates/__init__.py
+++ b/python/lsst/dax/ppdb/bigquery/updates/__init__.py
@@ -29,4 +29,3 @@
 from .update_records import UpdateRecords
 from .update_record_expander import UpdateRecordExpander
 from .updates_table import UpdatesTable
-from .updates_manager import UpdatesManager
diff --git a/tests/test_updates_manager.py b/tests/test_updates_manager.py
index 77c02729..22ae556c 100644
--- a/tests/test_updates_manager.py
+++ b/tests/test_updates_manager.py
@@ -34,7 +34,8 @@
     ReplicaChunk,
 )
 from lsst.dax.ppdb import Ppdb
-from lsst.dax.ppdb.bigquery import PpdbBigQuery, updates
+from lsst.dax.ppdb.bigquery import PpdbBigQuery
+from lsst.dax.ppdb.bigquery.updates.updates_manager import UpdatesManager
 from lsst.dax.ppdb.tests._bigquery import (
     ChunkUploaderWithoutPubSub,
     PostgresMixin,
@@ -253,5 +254,5 @@ def rows(self) -> Collection[tuple]:
         uploader.run()
 
         # Apply the updates to the target tables
-        updates_manager = updates.UpdatesManager(self.ppdb)
+        updates_manager = UpdatesManager(self.ppdb)
         updates_manager.apply_updates([update_records.replica_chunk_id])

From cdfa69f30c1ce235e43ebc31ccf99447e5607dca Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Mar 2026 11:51:52 -0500
Subject: [PATCH 47/49] Remove unnecessary property functions

---
 .../ppdb/bigquery/replica_chunk_promoter.py   | 86 +++++--------------
 1 file changed, 21 insertions(+), 65 deletions(-)

diff --git a/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
index 4c67a2e4..424b8a27 100644
--- a/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
+++ b/python/lsst/dax/ppdb/bigquery/replica_chunk_promoter.py
@@ -27,14 +27,13 @@
 ]
 
 import logging
-from collections.abc import Callable
 
 from google.api_core.exceptions import NotFound
 from google.cloud import bigquery
 
 from .ppdb_bigquery import PpdbBigQuery
 from .query_runner import QueryRunner
-from .updates import UpdatesManager
+from .updates.updates_manager import UpdatesManager
 
 
 class NoPromotableChunksError(Exception):
@@ -60,6 +59,8 @@ def __init__(
         table_names: list[str] | None = None,
     ):
         self._ppdb = ppdb
+        self._project_id = self._ppdb._config.project_id
+        self._dataset_id = self._ppdb._config.dataset_id
         self._runner = ppdb.query_runner
         # DM-52326: Hard-coded table names; these should be passed in from
         # config.
@@ -76,26 +77,6 @@ def __init__(
 
         self._promotable_chunks: list[int] = []
 
-    @property
-    def ppdb(self) -> PpdbBigQuery:
-        """PPDB interface to BigQuery."""
-        return self._ppdb
-
-    @property
-    def project_id(self) -> str:
-        """Google Cloud project ID (`str`, read-only)."""
-        return self._runner.project_id
-
-    @property
-    def dataset_id(self) -> str:
-        """Dataset ID (`str`, read-only)."""
-        return self._runner.dataset_id
-
-    @property
-    def table_names(self) -> list[str]:
-        """List of table names to promote (`list`[`str`], read-only)."""
-        return self._table_names
-
     @property
     def promotable_chunks(self) -> list[int]:
         """List of promotable chunks (`list` [ `int` ], read-only)."""
@@ -107,37 +88,12 @@ def promotable_chunks(self, chunks: list[int]) -> None:
             raise NoPromotableChunksError("No promotable chunks provided")
         self._promotable_chunks = chunks
 
-    @property
-    def promotable_chunk_count(self) -> int:
-        """Count of promotable chunks that were found in the database."""
-        return len(self.promotable_chunks)
-
-    @property
-    def runner(self) -> QueryRunner:
-        """Runner for executing BigQuery jobs (`QueryRunner`, read-only)."""
-        return self._runner
-
-    @property
-    def bq_client(self) -> bigquery.Client:
-        """Client for interacting with BigQuery (`bigquery.Client`,
-        read-only).
-        """
-        return self._bq_client
-
-    @property
-    def phases(self) -> dict[str, Callable]:
-        """Phases of the promotion process as a dictionary mapping phase names
-        to their corresponding class methods (`dict`[`str`, `Callable`],
-        read-only).
-        """
-        return self._phases
-
     @property
     def table_prod_refs(self) -> list[str]:
         """Fully-qualified production table references (`list`[`str`],
         read-only).
         """
-        return [f"{self.project_id}.{self.dataset_id}.{table_name}" for table_name in self.table_names]
+        return [f"{self._project_id}.{self._dataset_id}.{table_name}" for table_name in self._table_names]
 
     @property
     def table_staging_refs(self) -> list[str]:
@@ -145,7 +101,7 @@ def table_staging_refs(self) -> list[str]:
         read-only).
         """
         return [
-            f"{self.project_id}.{self.dataset_id}._{table_name}_staging" for table_name in self.table_names
+            f"{self._project_id}.{self._dataset_id}._{table_name}_staging" for table_name in self._table_names
         ]
 
     @property
@@ -154,8 +110,8 @@ def table_promoted_tmp_refs(self) -> list[str]:
         read-only).
         """
         return [
-            f"{self.project_id}.{self.dataset_id}._{table_name}_promoted_tmp"
-            for table_name in self.table_names
+            f"{self._project_id}.{self._dataset_id}._{table_name}_promoted_tmp"
+            for table_name in self._table_names
         ]
 
     def _execute_phase(self, phase: str) -> None:
@@ -167,14 +123,14 @@ def _execute_phase(self, phase: str) -> None:
             The name of the promotion phase to execute. This should be one of
             the keys in the `phases` property.
         """
-        if phase not in self.phases:
+        if phase not in self._phases:
             raise ValueError(f"Unknown promotion phase: {phase}")
         logging.debug("Executing promotion phase: %s", phase)
         self._phases[phase]()
 
     def _get_promotable_chunks(self) -> None:
         """Get list of promotable chunks from the database."""
-        self._promotable_chunks = self.ppdb.get_promotable_chunks()
+        self._promotable_chunks = self._ppdb.get_promotable_chunks()
         logging.info("Promotable chunk count: %s", len(self.promotable_chunks))
 
     def _copy_to_promoted_tmp(self) -> None:
@@ -191,13 +147,13 @@ def _copy_to_promoted_tmp(self) -> None:
         ):
             # Drop any existing tmp table (should not exist but just to be
             # safe)
-            self.runner.run_job("drop_tmp", f"DROP TABLE IF EXISTS `{tmp_ref}`")
+            self._runner.run_job("drop_tmp", f"DROP TABLE IF EXISTS `{tmp_ref}`")
 
             # Clone prod table structure and data (zero-copy)
-            self.runner.run_job("clone_prod", f"CREATE TABLE `{tmp_ref}` CLONE `{prod_ref}`")
+            self._runner.run_job("clone_prod", f"CREATE TABLE `{tmp_ref}` CLONE `{prod_ref}`")
 
             # Build ordered target list from the cloned tmp schema
-            tmp_schema = self.bq_client.get_table(tmp_ref).schema
+            tmp_schema = self._bq_client.get_table(tmp_ref).schema
             target_names = [f.name for f in tmp_schema if f.name != "apdb_replica_chunk"]
             target_list_sql = ", ".join(f"`{n}`" for n in target_names)
 
@@ -214,7 +170,7 @@ def _copy_to_promoted_tmp(self) -> None:
             WHERE s.apdb_replica_chunk IN UNNEST(@ids)
             """
             logging.debug("SQL for inserting staged rows into %s: %s", tmp_ref, sql)
-            self.runner.run_job("insert_staged_to_tmp", sql, job_config=job_cfg)
+            self._runner.run_job("insert_staged_to_tmp", sql, job_config=job_cfg)
 
     def _promote_tmp_to_prod(self) -> None:
         """
@@ -225,14 +181,14 @@ def _promote_tmp_to_prod(self) -> None:
         for prod_ref, tmp_ref in zip(self.table_prod_refs, self.table_promoted_tmp_refs, strict=False):
             # Ensure tmp exists
             try:
-                self.bq_client.get_table(tmp_ref)
+                self._bq_client.get_table(tmp_ref)
             except NotFound as e:
                 raise RuntimeError(f"Missing tmp table for promotion: {tmp_ref}") from e
 
             # Atomic zero-copy replacement of prod with tmp
             copy_cfg = bigquery.CopyJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE)
-            job = self.bq_client.copy_table(
-                tmp_ref, prod_ref, job_config=copy_cfg, location=self.runner.location
+            job = self._bq_client.copy_table(
+                tmp_ref, prod_ref, job_config=copy_cfg, location=self._runner.location
             )
             job.result()
             QueryRunner.log_job(job, "promote_tmp_to_prod")
@@ -240,7 +196,7 @@ def _promote_tmp_to_prod(self) -> None:
     def _cleanup(self) -> None:
         """Drop the promotion temporary tables."""
         for tmp_ref in self.table_promoted_tmp_refs:
-            self.bq_client.delete_table(tmp_ref, not_found_ok=True)
+            self._bq_client.delete_table(tmp_ref, not_found_ok=True)
             logging.debug("Dropped %s (if it existed)", tmp_ref)
 
     def _delete_staged_chunks(self) -> None:
@@ -254,7 +210,7 @@ def _delete_staged_chunks(self) -> None:
         for staging_ref in self.table_staging_refs:
             try:
                 sql = f"DELETE FROM `{staging_ref}` WHERE apdb_replica_chunk IN UNNEST(@ids)"
-                self.runner.run_job("delete_staged_chunks", sql, job_config=job_config)
+                self._runner.run_job("delete_staged_chunks", sql, job_config=job_config)
                 logging.debug(
                     "Deleted %d chunk(s) from staging table %s", len(self.promotable_chunks), staging_ref
                 )
@@ -263,12 +219,12 @@ def _delete_staged_chunks(self) -> None:
 
     def _apply_record_updates(self) -> None:
         """Apply record updates to the promoted temporary tables."""
-        updates_manager = UpdatesManager(self.ppdb, table_name_postfix="_promoted_tmp")
-        updates_manager.apply_updates(self._promotable_chunks)
+        updates_manager = UpdatesManager(self._ppdb, table_name_postfix="_promoted_tmp")
+        updates_manager.apply_updates(self.promotable_chunks)
 
     def _mark_chunks_promoted(self) -> None:
         """Mark the replica chunks as promoted in the database."""
-        self.ppdb.mark_chunks_promoted(self._promotable_chunks)
+        self._ppdb.mark_chunks_promoted(self.promotable_chunks)
 
     def promote_chunks(self) -> None:
         """Promote APDB replica chunks into production by executing a series of

From 32f0fea9f77e250ca3ee20127be184b958c49802 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Mar 2026 11:54:15 -0500
Subject: [PATCH 48/49] Remove no longer necessary check for test execution

---
 tests/test_update_record_expander.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/tests/test_update_record_expander.py b/tests/test_update_record_expander.py
index 596acfed..5038bfb5 100644
--- a/tests/test_update_record_expander.py
+++ b/tests/test_update_record_expander.py
@@ -32,12 +32,10 @@
     ApdbWithdrawDiaForcedSourceRecord,
     ApdbWithdrawDiaSourceRecord,
 )
-from lsst.dax.ppdb.bigquery import updates
 from lsst.dax.ppdb.bigquery.updates import ExpandedUpdateRecord, UpdateRecordExpander, UpdateRecords
 from lsst.dax.ppdb.tests._updates import _create_test_update_records
 
 
-@unittest.skipIf(updates is None, "Google Cloud environment not available")
 class UpdateRecordExpanderTestCase(unittest.TestCase):
     """Test UpdateRecordExpander functionality."""
 
@@ -52,8 +50,6 @@ def setUp(self) -> None:
 
     def test_get_update_fields(self) -> None:
         """Test get_update_fields class method."""
-        from lsst.dax.ppdb.bigquery.updates import UpdateRecordExpander
-
         # Test known update types
         self.assertEqual(
             UpdateRecordExpander.get_update_fields("reassign_diasource_to_diaobject"), ["diaObjectId"]
@@ -284,13 +280,6 @@ def test_update_records_all(self) -> None:
 
         expanded = UpdateRecordExpander.expand_updates(update_records)
 
-        # Should have 8 total expanded records:
-        # - 1 from ApdbReassignDiaSourceToDiaObjectRecord
-        # - 2 from ApdbReassignDiaSourceToSSObjectRecord
-        # - 1 from ApdbWithdrawDiaSourceRecord
-        # - 1 from ApdbWithdrawDiaForcedSourceRecord
-        # - 2 from ApdbCloseDiaObjectValidityRecord
-        # - 1 from ApdbUpdateNDiaSourcesRecord
         self.assertEqual(len(expanded), 10)
 
         # Verify all expanded records have correct replica_chunk_id

From 13694171409bbb0496b7560b9687fb198a19b06d Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 20 Mar 2026 18:04:20 -0500
Subject: [PATCH 49/49] Add update method

---
 .../lsst/dax/ppdb/bigquery/ppdb_bigquery.py   | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
index 0f697eaf..dd0a23ed 100644
--- a/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
+++ b/python/lsst/dax/ppdb/bigquery/ppdb_bigquery.py
@@ -25,6 +25,7 @@
 import shutil
 from collections.abc import Collection, Iterable, Sequence
 from pathlib import Path
+from typing import Any
 
 import felis
 import sqlalchemy
@@ -787,3 +788,43 @@ def mark_chunks_promoted(self, promotable_chunks: list[int]) -> int:
         with self._engine.begin() as conn:
             result: sqlalchemy.engine.CursorResult = conn.execute(stmt)
             return result.rowcount or 0
+
+    def update(self, chunk_id: int, values: dict[str, Any]) -> int:
+        """Update an existing replica chunk in the database.
+
+        Parameters
+        ----------
+        chunk_id : `int`
+            The ID of the replica chunk to update.
+        values : `dict`[`str`, `Any`]
+            A dictionary of column names and their new values to update.
+
+        Returns
+        -------
+        count : `int`
+            The number of rows updated. This should be 1 if the update is
+            successful, or 0 if no rows were updated (e.g., if the chunk ID
+            does not exist or the status is already set to the new value).
+        """
+        logging.info("Preparing to update replica chunk %d with values: %s", chunk_id, values)
+        table = self.get_table("PpdbReplicaChunk")
+        stmt = sqlalchemy.update(table).where(table.c.apdb_replica_chunk == chunk_id).values(values)
+        with self._engine.begin() as conn:
+            result = conn.execute(stmt)
+            affected_rows = result.rowcount
+
+        new_status = values.get("status")
+        if affected_rows == 0:
+            logging.warning(
+                "No rows updated for replica chunk %s with status '%s'",
+                chunk_id,
+                new_status,
+            )
+        else:
+            logging.info(
+                "Successfully updated %d row(s) for replica chunk %s to status '%s'",
+                affected_rows,
+                chunk_id,
+                new_status,
+            )
+        return affected_rows