From 4b60c7074843794ef5e84f9dc26c0bd4732ec610 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 29 Sep 2025 12:29:53 -0700
Subject: [PATCH 01/35] introducing branching

---
 .../OpenHouseInternalTableOperations.java     | 141 ++-
 .../spark/catalogtest/BranchTestSpark3_5.java | 878 ++++++++++++++++++
 2 files changed, 990 insertions(+), 29 deletions(-)
 create mode 100644 integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index d9fa34257..793167e47 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -24,6 +24,7 @@
 import java.time.Clock;
 import java.time.Instant;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -287,6 +288,9 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) {
             serializedSnapshotRefs == null
                 ? new HashMap<>()
                 : SnapshotsUtil.parseSnapshotRefs(serializedSnapshotRefs);
+
+        // Multi-branch support is now enabled with snapshot ID matching
+
         updatedMetadata =
             maybeAppendSnapshots(updatedMetadata, appendedSnapshots, snapshotRefs, true);
         updatedMetadata = maybeDeleteSnapshots(updatedMetadata, deletedSnapshots);
@@ -554,6 +558,67 @@ public TableMetadata maybeDeleteSnapshots(
     return result;
   }
 
+  /**
+   * Determines the target branch for a snapshot commit based on the provided snapshotRefs.
+   *
+   * @param snapshotRefs map of branch names to snapshot references
+   * @param defaultBranch default branch to use if no specific branch can be determined
+   * @return target branch name for the snapshot commit
+   */
+  private String determineTargetBranch(
+      Map<String, SnapshotRef> snapshotRefs, String defaultBranch) {
+    return determineTargetBranch(snapshotRefs, Collections.emptyList(), defaultBranch);
+  }
+
+  /**
+   * Determines the target branch for snapshot commits by matching snapshot IDs. When multiple
+   * branches are present, finds which branch should receive the new snapshots.
+   */
+  private String determineTargetBranch(
+      Map<String, SnapshotRef> snapshotRefs, List<Snapshot> newSnapshots, String defaultBranch) {
+    if (MapUtils.isEmpty(snapshotRefs)) {
+      return defaultBranch;
+    }
+
+    // If there's only one branch in the refs, use that as the target
+    if (snapshotRefs.size() == 1) {
+      return snapshotRefs.keySet().iterator().next();
+    }
+
+    // CRITICAL FIX: For multi-branch scenarios, find which branch should get the new snapshots
+    if (!newSnapshots.isEmpty()) {
+      // Get the latest snapshot ID from new snapshots
+      long latestSnapshotId = newSnapshots.get(newSnapshots.size() - 1).snapshotId();
+
+      // Find which branch in snapshotRefs should point to this snapshot
+      for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
+        String branchName = entry.getKey();
+        long branchSnapshotId = entry.getValue().snapshotId();
+
+        if (branchSnapshotId == latestSnapshotId) {
+          log.debug(
+              "Determined target branch '{}' by snapshot ID match: {}",
+              branchName,
+              latestSnapshotId);
+          return branchName;
+        }
+      }
+    }
+
+    // Fallback: if we can't match by snapshot ID, prefer non-main branches for branch operations
+    for (String branchName : snapshotRefs.keySet()) {
+      if (!branchName.equals(SnapshotRef.MAIN_BRANCH)) {
+        log.debug(
+            "Multiple branches, no snapshot match, preferring non-main branch: {}", branchName);
+        return branchName;
+      }
+    }
+
+    // Final fallback to main
+    log.debug("Multiple branches, falling back to main branch");
+    return SnapshotRef.MAIN_BRANCH;
+  }
+
   public TableMetadata maybeAppendSnapshots(
       TableMetadata metadata,
       List<Snapshot> snapshotsToAppend,
@@ -563,62 +628,80 @@ public TableMetadata maybeAppendSnapshots(
     List<String> appendedSnapshots = new ArrayList<>();
     List<String> stagedSnapshots = new ArrayList<>();
     List<String> cherryPickedSnapshots = new ArrayList<>();
-    // Throw an exception if client sent request that included non-main branches in the
-    // snapshotRefs.
-    for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
-      if (!entry.getKey().equals(SnapshotRef.MAIN_BRANCH)) {
-        throw new UnsupportedOperationException("OpenHouse supports only MAIN branch");
-      }
-    }
+
     /**
      * First check if there are new snapshots to be appended to current TableMetadata. If yes,
      * following are the cases to be handled:
      *
-     * <p>[1] A regular (non-wap) snapshot is being added to the MAIN branch.
+     * <p>[1] A regular (non-wap) snapshot is being added to any branch.
      *
      * <p>[2] A staged (wap) snapshot is being created on top of current snapshot as its base.
-     * Recognized by STAGED_WAP_ID_PROP.
+     * Recognized by STAGED_WAP_ID_PROP. These are stage-only and not committed to any branch.
      *
-     * <p>[3] A staged (wap) snapshot is being cherry picked to the MAIN branch wherein current
-     * snapshot in the MAIN branch is not the same as the base snapshot the staged (wap) snapshot
-     * was created on. Recognized by SOURCE_SNAPSHOT_ID_PROP. This case is called non-fast forward
+     * <p>[3] A staged (wap) snapshot is being cherry picked to any branch wherein current snapshot
+     * in the target branch is not the same as the base snapshot the staged (wap) snapshot was
+     * created on. Recognized by SOURCE_SNAPSHOT_ID_PROP. This case is called non-fast forward
      * cherry pick.
      *
      * <p>In case no new snapshots are to be appended to current TableMetadata, there could be a
-     * cherrypick of a staged (wap) snapshot on top of the current snapshot in the MAIN branch which
-     * is the same as the base snapshot the staged (wap) snapshot was created on. This case is
-     * called fast forward cherry pick.
+     * cherrypick of a staged (wap) snapshot on top of the current snapshot in any branch which is
+     * the same as the base snapshot the staged (wap) snapshot was created on. This case is called
+     * fast forward cherry pick.
      */
     if (CollectionUtils.isNotEmpty(snapshotsToAppend)) {
       for (Snapshot snapshot : snapshotsToAppend) {
         snapshotInspector.validateSnapshot(snapshot);
         if (snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)) {
-          // a stage only snapshot using wap.id
+          // a stage only snapshot using wap.id - not committed to any branch
           metadataBuilder.addSnapshot(snapshot);
           stagedSnapshots.add(String.valueOf(snapshot.snapshotId()));
         } else if (snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
           // a snapshot created on a non fast-forward cherry-pick snapshot
-          metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+          // Determine target branch from snapshotRefs or default to MAIN_BRANCH
+          String targetBranch =
+              determineTargetBranch(snapshotRefs, snapshotsToAppend, SnapshotRef.MAIN_BRANCH);
+          metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
           appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
           cherryPickedSnapshots.add(
               String.valueOf(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)));
         } else {
-          // a regular snapshot
-          metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+          // a regular snapshot - assign to appropriate branch using snapshotRefs context
+          if (MapUtils.isNotEmpty(snapshotRefs)) {
+            // We have explicit branch information, use it to assign snapshot
+            String targetBranch =
+                determineTargetBranch(snapshotRefs, snapshotsToAppend, SnapshotRef.MAIN_BRANCH);
+            metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
+          } else {
+            // No explicit branch refs - treat as staged snapshot
+            // This maintains isolation until refs are explicitly updated
+            metadataBuilder.addSnapshot(snapshot);
+          }
           appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
         }
       }
     } else if (MapUtils.isNotEmpty(snapshotRefs)) {
-      // Updated ref in the main branch with no new snapshot means this is a
-      // fast-forward cherry-pick or rollback operation.
-      long newSnapshotId = snapshotRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
-      // Either the current snapshot is null or the current snapshot is not equal
-      // to the new snapshot indicates an update. The first case happens when the
-      // stage/wap snapshot being cherry-picked is the first snapshot.
-      if (MapUtils.isEmpty(metadata.refs())
-          || metadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId() != newSnapshotId) {
-        metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
-        cherryPickedSnapshots.add(String.valueOf(newSnapshotId));
+      // Handle ref updates for all branches (fast-forward cherry-pick or rollback operations)
+      for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
+        String branchName = entry.getKey();
+        long newSnapshotId = entry.getValue().snapshotId();
+
+        // Check if this is an actual update for this branch
+        boolean isUpdate = false;
+        if (MapUtils.isEmpty(metadata.refs())) {
+          // No refs exist yet, this is a new branch
+          isUpdate = true;
+        } else {
+          SnapshotRef currentRef = metadata.refs().get(branchName);
+          if (currentRef == null || currentRef.snapshotId() != newSnapshotId) {
+            // Branch doesn't exist or snapshot is different
+            isUpdate = true;
+          }
+        }
+
+        if (isUpdate) {
+          metadataBuilder.setBranchSnapshot(newSnapshotId, branchName);
+          cherryPickedSnapshots.add(String.valueOf(newSnapshotId));
+        }
       }
     }
     if (recordAction) {
diff --git a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
new file mode 100644
index 000000000..c5b239a4e
--- /dev/null
+++ b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
@@ -0,0 +1,878 @@
+package com.linkedin.openhouse.spark.catalogtest;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import com.linkedin.openhouse.tablestest.OpenHouseSparkITest;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Comprehensive tests for multi-branch WAP operations in Spark 3.5. Tests validate the enhanced
+ * maybeAppendSnapshots functionality that supports: - Non-main branch operations (add/expire
+ * snapshots from any branch) - WAP.id staging with multi-branch support - Cherry picking between
+ * any branches - Fast forward merges for all branches - Backward compatibility with main-only
+ * workflows - Forward compatibility for future wap.branch features
+ */
+public class BranchTestSpark3_5 extends OpenHouseSparkITest {
+
+  // ===== BASIC BRANCH OPERATIONS =====
+
+  @Test
+  public void testBasicBranchOperations() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName);
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Add initial data to main
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.initial')");
+
+      // Create feature branch
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Write to feature branch
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature-a.data1')");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature-a.data2')");
+
+      // Verify branch isolation
+      assertEquals(
+          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 1 row
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature-a has 3 rows
+
+      // Verify refs exist for both branches
+      List<Row> refs =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(2, refs.size());
+      assertEquals("feature_a", refs.get(0).getString(0));
+      assertEquals("main", refs.get(1).getString(0));
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  // ===== WAP STAGING WITH MULTI-BRANCH SUPPORT =====
+
+  @Test
+  public void testWapStagingWithBranches() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup main and feature branches
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.data')");
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature-a.data')");
+
+      // Stage WAP snapshot (should not affect any branch)
+      spark.conf().set("spark.wap.id", "multi-branch-wap");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.staged.data')");
+      spark.conf().unset("spark.wap.id");
+
+      // Verify WAP staging doesn't affect branch visibility
+      assertEquals(
+          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature-a unchanged
+
+      // Verify WAP snapshot exists but no new refs
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + ".snapshots")
+              .collectAsList()
+              .size()); // 1 main + 1 feature + 1 wap
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + ".refs")
+              .collectAsList()
+              .size()); // main + feature-a only
+
+      // Verify WAP snapshot has correct properties
+      List<Row> wapSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'multi-branch-wap'")
+              .collectAsList();
+      assertEquals(1, wapSnapshots.size());
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  // ===== CHERRY PICKING BETWEEN BRANCHES =====
+
+  @Test
+  public void testCherryPickToMainWithFeatureBranch() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup branches
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.base')");
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Create WAP snapshot
+      spark.conf().set("spark.wap.id", "feature-target-wap");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.for.feature')");
+      String wapSnapshotId =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'feature-target-wap'")
+              .first()
+              .mkString();
+      spark.conf().unset("spark.wap.id");
+
+      // CRITICAL: Advance main branch to force non-fast-forward cherry-pick
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.advance')");
+
+      // Cherry-pick WAP to main branch (this tests our enhanced maybeAppendSnapshots)
+      // Main should have 2 rows now (main.base + main.advance)
+      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      spark.sql(
+          String.format(
+              "CALL openhouse.system.cherrypick_snapshot('"
+                  + tableName.replace("openhouse.", "")
+                  + "', %s)",
+              wapSnapshotId));
+
+      // Verify cherry-pick worked - 3 rows of data should appear in main (main.base + main.advance
+      // + wap.for.feature)
+      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      // Verify published WAP snapshot properties
+      List<Row> publishedSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['published-wap-id'] = 'feature-target-wap'")
+              .collectAsList();
+      assertTrue(
+          publishedSnapshots.size() >= 1,
+          "Should find at least one snapshot with published-wap-id");
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  // ===== FAST FORWARD MERGES =====
+
+  @Test
+  public void testFastForwardMergeToMain() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
+
+      // Create feature branch from main
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Advance feature branch
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data1')");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data2')");
+
+      // Verify initial state
+      assertEquals(
+          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 1 row
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature has 3 rows
+
+      // Fast-forward main to feature_a
+      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'feature_a')");
+
+      // Verify fast-forward worked - main should now have same data as feature_a
+      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      // Verify both branches point to same snapshot
+      String mainSnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .first()
+              .mkString();
+      String featureSnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
+              .first()
+              .mkString();
+      assertEquals(mainSnapshot, featureSnapshot);
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  @Test
+  public void testFastForwardMergeToFeature() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
+
+      // Create feature branch from main
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Advance main branch (feature_a stays at base)
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.data1')");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.data2')");
+
+      // Verify initial state
+      assertEquals(
+          3,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 3 rows
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature has 1 row
+
+      // Fast-forward feature_a to main
+      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'feature_a', 'main')");
+
+      // Verify fast-forward worked - feature_a should now have same data as main
+      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      // Verify both branches point to same snapshot
+      String mainSnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .first()
+              .mkString();
+      String featureSnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
+              .first()
+              .mkString();
+      assertEquals(mainSnapshot, featureSnapshot);
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  @Test
+  public void testFastForwardMergeWithWapId() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
+
+      // Create feature branch
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Create WAP snapshot
+      spark.conf().set("spark.wap.id", "test-wap");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.data')");
+      String wapSnapshotId =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'test-wap'")
+              .first()
+              .mkString();
+      spark.conf().unset("spark.wap.id");
+
+      // Advance feature branch normally (not using WAP)
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data')");
+
+      // Verify WAP snapshot doesn't interfere with fast-forward
+      assertEquals(
+          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature advanced
+
+      // Fast-forward main to feature_a should work despite WAP presence
+      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'feature_a')");
+
+      // Verify fast-forward worked
+      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      // Verify WAP snapshot is still available for cherry-pick
+      List<Row> wapSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'test-wap'")
+              .collectAsList();
+      assertEquals(1, wapSnapshots.size());
+      assertEquals(wapSnapshotId, wapSnapshots.get(0).mkString());
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  @Test
+  public void testFastForwardMergeBetweenTwoFeatureBranches() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
+
+      // Create two feature branches from main
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_b");
+
+      // Advance feature_a
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature_a.data1')");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature_a.data2')");
+
+      // Verify initial state
+      assertEquals(
+          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 1 row
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature_a has 3 rows
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_b'")
+              .collectAsList()
+              .size()); // feature_b has 1 row
+
+      // Fast-forward feature_b to feature_a
+      spark.sql(
+          "CALL openhouse.system.fast_forward('" + tableName + "', 'feature_b', 'feature_a')");
+
+      // Verify fast-forward worked
+      assertEquals(
+          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature_a unchanged
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_b'")
+              .collectAsList()
+              .size()); // feature_b now matches feature_a
+
+      // Verify feature_a and feature_b point to same snapshot
+      String featureASnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
+              .first()
+              .mkString();
+      String featureBSnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_b'")
+              .first()
+              .mkString();
+      assertEquals(featureASnapshot, featureBSnapshot);
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  @Test
+  public void testFastForwardMergeIncompatibleLineage() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
+
+      // Create feature branch
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Advance both branches independently (creating divergent history)
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.divergent')");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.divergent')");
+
+      // Verify divergent state
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 2 rows
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature_a has 2 rows (different)
+
+      // Attempt fast-forward should fail due to incompatible lineage
+      assertThrows(
+          Exception.class,
+          () ->
+              spark.sql(
+                  "CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'feature_a')"),
+          "Fast-forward should fail when branches have divergent history");
+
+      // Verify branches remain unchanged after failed fast-forward
+      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      // Verify snapshots are still different
+      String mainSnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .first()
+              .mkString();
+      String featureSnapshot =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
+              .first()
+              .mkString();
+      assertNotEquals(mainSnapshot, featureSnapshot);
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  // ===== SNAPSHOT EXPIRATION FROM NON-MAIN BRANCHES =====
+
+  @Test
+  public void testSnapshotExpirationFromFeatureBranch() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup: Create multiple snapshots to have some that can be expired
+
+      // 1. Create initial main data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.initial')");
+
+      // 2. Create feature branch from main
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // 3. Add multiple snapshots to feature branch
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data1')");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data2')");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data3')");
+
+      // 4. Query metadata tables to find snapshots that are NOT current branch heads
+
+      // Get all snapshots
+      List<Row> allSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".snapshots ORDER BY committed_at")
+              .collectAsList();
+      assertTrue(allSnapshots.size() >= 4, "Should have at least 4 snapshots");
+
+      // Get current branch head snapshots from refs table
+      List<Row> branchHeads =
+          spark.sql("SELECT snapshot_id FROM " + tableName + ".refs").collectAsList();
+      Set<String> referencedSnapshots =
+          branchHeads.stream().map(row -> row.mkString()).collect(Collectors.toSet());
+
+      System.out.println(
+          "DEBUG: All snapshots: "
+              + allSnapshots.stream().map(Row::mkString).collect(Collectors.toList()));
+      System.out.println("DEBUG: Referenced snapshots (branch heads): " + referencedSnapshots);
+
+      // Find snapshots that are NOT referenced by any branch head
+      List<String> unreferencedSnapshots =
+          allSnapshots.stream()
+              .map(Row::mkString)
+              .filter(snapshotId -> !referencedSnapshots.contains(snapshotId))
+              .collect(Collectors.toList());
+
+      System.out.println("DEBUG: Unreferenced snapshots: " + unreferencedSnapshots);
+
+      // We should have at least one unreferenced snapshot (intermediate feature snapshots)
+      assertFalse(
+          unreferencedSnapshots.isEmpty(),
+          "Should have at least one unreferenced snapshot to expire");
+
+      // Select the first unreferenced snapshot to expire
+      String snapshotToExpire = unreferencedSnapshots.get(0);
+
+      // Verify this snapshot exists in the snapshots table
+      List<Row> beforeExpiration =
+          spark.sql("SELECT snapshot_id FROM " + tableName + ".snapshots").collectAsList();
+      assertTrue(
+          beforeExpiration.stream().anyMatch(row -> row.mkString().equals(snapshotToExpire)),
+          "Snapshot to expire should exist before expiration");
+
+      // Expire the unreferenced snapshot
+      spark.sql(
+          String.format(
+              "CALL openhouse.system.expire_snapshots(table => '"
+                  + tableName.replace("openhouse.", "")
+                  + "', snapshot_ids => Array(%s))",
+              snapshotToExpire));
+
+      // Verify snapshot is gone
+      List<Row> afterExpiration =
+          spark.sql("SELECT snapshot_id FROM " + tableName + ".snapshots").collectAsList();
+      assertFalse(
+          afterExpiration.stream().anyMatch(row -> row.mkString().equals(snapshotToExpire)),
+          "Expired snapshot should no longer exist");
+
+      // Verify branches are still intact after expiration
+      // Main should have: main.initial = 1 row
+      assertEquals(1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+
+      // Feature_a should have: main.initial + feature.data1 + feature.data2 + feature.data3 = 4
+      // rows
+      assertEquals(
+          4,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  @Test
+  public void testWapSnapshotExpirationWithMultipleBranches() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup multi-branch environment
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.base')");
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.base')");
+
+      // Create multiple WAP snapshots
+      spark.conf().set("spark.wap.id", "wap-to-keep");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.keep.data')");
+
+      spark.conf().set("spark.wap.id", "wap-to-expire");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.expire.data')");
+      String expireWapId =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'wap-to-expire'")
+              .first()
+              .mkString();
+      spark.conf().unset("spark.wap.id");
+
+      // Expire specific WAP snapshot
+      spark.sql(
+          String.format(
+              "CALL openhouse.system.expire_snapshots(table => '"
+                  + tableName.replace("openhouse.", "")
+                  + "', snapshot_ids => Array(%s))",
+              expireWapId));
+
+      // Verify selective WAP expiration
+      List<Row> remainingWaps =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'wap-to-keep'")
+              .collectAsList();
+      assertEquals(1, remainingWaps.size());
+
+      List<Row> expiredWaps =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'wap-to-expire'")
+              .collectAsList();
+      assertEquals(0, expiredWaps.size());
+
+      // Verify branches unchanged
+      assertEquals(1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  // ===== BACKWARD COMPATIBILITY =====
+
+  @Test
+  public void testBackwardCompatibilityMainBranchOnly() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Traditional main-only workflow (should work exactly as before)
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.1')");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main.2')");
+
+      // WAP staging (traditional)
+      spark.conf().set("spark.wap.id", "compat-test-wap");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('compat.wap.data')");
+      String wapSnapshotId =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'compat-test-wap'")
+              .first()
+              .mkString();
+      spark.conf().unset("spark.wap.id");
+
+      // Traditional cherry-pick to main
+      spark.sql(
+          String.format(
+              "CALL openhouse.system.cherrypick_snapshot('"
+                  + tableName.replace("openhouse.", "")
+                  + "', %s)",
+              wapSnapshotId));
+
+      // Verify traditional behavior preserved
+      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      List<Row> refs = spark.sql("SELECT name FROM " + tableName + ".refs").collectAsList();
+      assertEquals(1, refs.size());
+      assertEquals("main", refs.get(0).getString(0));
+
+      // Traditional snapshot queries should work
+      assertTrue(
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList().size() >= 3);
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  // ===== ERROR SCENARIOS =====
+
+  @Test
+  public void testErrorInsertToNonExistentBranch() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
+
+      // Create one valid branch
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Verify valid branch works
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('valid.data')");
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size());
+
+      // Attempt to insert into non-existent branch should fail
+      assertThrows(
+          Exception.class,
+          () ->
+              spark.sql("INSERT INTO " + tableName + ".branch_nonexistent VALUES ('invalid.data')"),
+          "Insert to non-existent branch should fail");
+
+      // Verify table state unchanged after failed insert
+      assertEquals(
+          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature_a unchanged
+
+      // Verify only valid branches exist
+      List<Row> refs =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(2, refs.size());
+      assertEquals("feature_a", refs.get(0).getString(0));
+      assertEquals("main", refs.get(1).getString(0));
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+
+  @Test
+  public void testErrorCherryPickNonExistentWapId() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data and branch
+      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+
+      // Create a valid WAP snapshot
+      spark.conf().set("spark.wap.id", "valid-wap");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('valid.wap.data')");
+      String validWapId =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'valid-wap'")
+              .first()
+              .mkString();
+      spark.conf().unset("spark.wap.id");
+
+      // Verify valid WAP cherry-pick works
+      spark.sql(
+          String.format(
+              "CALL openhouse.system.cherrypick_snapshot('"
+                  + tableName.replace("openhouse.", "")
+                  + "', %s)",
+              validWapId));
+      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+
+      // Attempt to cherry-pick non-existent snapshot ID should fail
+      long nonExistentSnapshotId = 999999999L;
+      assertThrows(
+          Exception.class,
+          () ->
+              spark.sql(
+                  String.format(
+                      "CALL openhouse.system.cherrypick_snapshot('"
+                          + tableName.replace("openhouse.", "")
+                          + "', %s)",
+                      nonExistentSnapshotId)),
+          "Cherry-pick of non-existent snapshot should fail");
+
+      // Attempt to cherry-pick with malformed snapshot ID should fail
+      assertThrows(
+          Exception.class,
+          () ->
+              spark.sql(
+                  String.format(
+                      "CALL openhouse.system.cherrypick_snapshot('"
+                          + tableName.replace("openhouse.", "")
+                          + "', %s)",
+                      "invalid-id")),
+          "Cherry-pick with invalid snapshot ID should fail");
+
+      // Verify table state unchanged after failed cherry-picks
+      assertEquals(
+          2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
+              .collectAsList()
+              .size()); // feature_a unchanged
+
+      // Verify valid WAP snapshot still exists
+      List<Row> validWaps =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'valid-wap'")
+              .collectAsList();
+      assertEquals(1, validWaps.size());
+
+      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+    }
+  }
+}

From 8546d4323d49b8a97bcbc5fedbba545487f88b9b Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 30 Sep 2025 14:46:58 -0700
Subject: [PATCH 02/35] wap branch green tests

---
 .../spark/catalogtest/BranchTestSpark3_5.java | 467 ++++++++++++++++--
 1 file changed, 420 insertions(+), 47 deletions(-)

diff --git a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
index c5b239a4e..942f5e89a 100644
--- a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
+++ b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
@@ -8,7 +8,12 @@
 import java.util.stream.Collectors;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.MethodOrderer;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestMethodOrder;
+import org.junit.jupiter.api.parallel.Execution;
+import org.junit.jupiter.api.parallel.ExecutionMode;
 
 /**
  * Comprehensive tests for multi-branch WAP operations in Spark 3.5. Tests validate the enhanced
@@ -17,8 +22,53 @@
  * any branches - Fast forward merges for all branches - Backward compatibility with main-only
  * workflows - Forward compatibility for future wap.branch features
  */
+@TestMethodOrder(MethodOrderer.MethodName.class)
+@Execution(ExecutionMode.SAME_THREAD)
 public class BranchTestSpark3_5 extends OpenHouseSparkITest {
 
+  /**
+   * Comprehensive cleanup method to prevent configuration and table bleed-over between tests. This
+   * ensures WAP configurations are properly reset and all test tables are dropped.
+   */
+  @AfterEach
+  public void cleanupAfterTest() {
+    try (SparkSession spark = getSparkSession()) {
+      // Clear WAP configurations to prevent bleed-over between tests
+      spark.conf().unset("spark.wap.id");
+      spark.conf().unset("spark.wap.branch");
+
+      // Drop all test tables to ensure clean state for next test
+      // Get all tables in the d1 database that start with branch_test_ or similar patterns
+      try {
+        List<Row> tables = spark.sql("SHOW TABLES IN openhouse.d1").collectAsList();
+        for (Row table : tables) {
+          String tableName = table.getString(1); // table name is in second column
+          if (tableName.startsWith("branch_test_") || tableName.startsWith("test_")) {
+            String fullTableName = "openhouse.d1." + tableName;
+            spark.sql("DROP TABLE IF EXISTS " + fullTableName);
+          }
+        }
+      } catch (Exception e) {
+        // If SHOW TABLES fails, try to drop common test table patterns
+        // This is a fallback in case the database doesn't exist yet
+        for (String pattern : new String[] {"branch_test_", "test_"}) {
+          for (int i = 0; i < 10; i++) { // Try a few recent timestamps
+            long timestamp = System.currentTimeMillis() - (i * 1000);
+            String tableName = "openhouse.d1." + pattern + timestamp;
+            try {
+              spark.sql("DROP TABLE IF EXISTS " + tableName);
+            } catch (Exception ignored) {
+              // Ignore failures for non-existent tables
+            }
+          }
+        }
+      }
+    } catch (Exception e) {
+      // Log but don't fail the test for cleanup issues
+      System.err.println("Warning: Failed to cleanup after test: " + e.getMessage());
+    }
+  }
+
   // ===== BASIC BRANCH OPERATIONS =====
 
   @Test
@@ -27,7 +77,6 @@ public void testBasicBranchOperations() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName);
       spark.sql("CREATE TABLE " + tableName + " (name string)");
 
       // Add initial data to main
@@ -56,8 +105,6 @@ public void testBasicBranchOperations() throws Exception {
       assertEquals(2, refs.size());
       assertEquals("feature_a", refs.get(0).getString(0));
       assertEquals("main", refs.get(1).getString(0));
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -69,7 +116,6 @@ public void testWapStagingWithBranches() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
       spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
 
@@ -81,7 +127,6 @@ public void testWapStagingWithBranches() throws Exception {
       // Stage WAP snapshot (should not affect any branch)
       spark.conf().set("spark.wap.id", "multi-branch-wap");
       spark.sql("INSERT INTO " + tableName + " VALUES ('wap.staged.data')");
-      spark.conf().unset("spark.wap.id");
 
       // Verify WAP staging doesn't affect branch visibility
       assertEquals(
@@ -116,8 +161,6 @@ public void testWapStagingWithBranches() throws Exception {
                       + ".snapshots WHERE summary['wap.id'] = 'multi-branch-wap'")
               .collectAsList();
       assertEquals(1, wapSnapshots.size());
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -129,7 +172,6 @@ public void testCherryPickToMainWithFeatureBranch() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
       spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
 
@@ -148,9 +190,9 @@ public void testCherryPickToMainWithFeatureBranch() throws Exception {
                       + ".snapshots WHERE summary['wap.id'] = 'feature-target-wap'")
               .first()
               .mkString();
-      spark.conf().unset("spark.wap.id");
 
-      // CRITICAL: Advance main branch to force non-fast-forward cherry-pick
+      // CRITICAL: Unset WAP ID before advancing main branch to force non-fast-forward cherry-pick
+      // spark.conf().unset("spark.wap.id");
       spark.sql("INSERT INTO " + tableName + " VALUES ('main.advance')");
 
       // Cherry-pick WAP to main branch (this tests our enhanced maybeAppendSnapshots)
@@ -184,8 +226,6 @@ public void testCherryPickToMainWithFeatureBranch() throws Exception {
       assertTrue(
           publishedSnapshots.size() >= 1,
           "Should find at least one snapshot with published-wap-id");
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -197,7 +237,6 @@ public void testFastForwardMergeToMain() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
 
       // Setup base data
@@ -244,8 +283,6 @@ public void testFastForwardMergeToMain() throws Exception {
               .first()
               .mkString();
       assertEquals(mainSnapshot, featureSnapshot);
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -255,7 +292,6 @@ public void testFastForwardMergeToFeature() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
 
       // Setup base data
@@ -303,18 +339,15 @@ public void testFastForwardMergeToFeature() throws Exception {
               .first()
               .mkString();
       assertEquals(mainSnapshot, featureSnapshot);
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
   @Test
-  public void testFastForwardMergeWithWapId() throws Exception {
+  public void testFastForwardFeatureToMainAndWapId() throws Exception {
     try (SparkSession spark = getSparkSession()) {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
       spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
 
@@ -335,9 +368,10 @@ public void testFastForwardMergeWithWapId() throws Exception {
                       + ".snapshots WHERE summary['wap.id'] = 'test-wap'")
               .first()
               .mkString();
-      spark.conf().unset("spark.wap.id");
 
-      // Advance feature branch normally (not using WAP)
+      // Unset WAP ID before advancing feature branch normally (not using WAP - else WAP staged
+      // snapshot will apply to feature branch)
+      spark.conf().unset("spark.wap.id");
       spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data')");
 
       // Verify WAP snapshot doesn't interfere with fast-forward
@@ -372,8 +406,6 @@ public void testFastForwardMergeWithWapId() throws Exception {
               .collectAsList();
       assertEquals(1, wapSnapshots.size());
       assertEquals(wapSnapshotId, wapSnapshots.get(0).mkString());
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -383,7 +415,6 @@ public void testFastForwardMergeBetweenTwoFeatureBranches() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
 
       // Setup base data
@@ -445,8 +476,6 @@ public void testFastForwardMergeBetweenTwoFeatureBranches() throws Exception {
               .first()
               .mkString();
       assertEquals(featureASnapshot, featureBSnapshot);
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -456,7 +485,6 @@ public void testFastForwardMergeIncompatibleLineage() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
 
       // Setup base data
@@ -509,8 +537,6 @@ public void testFastForwardMergeIncompatibleLineage() throws Exception {
               .first()
               .mkString();
       assertNotEquals(mainSnapshot, featureSnapshot);
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -522,7 +548,6 @@ public void testSnapshotExpirationFromFeatureBranch() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
       spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
 
@@ -610,8 +635,6 @@ public void testSnapshotExpirationFromFeatureBranch() throws Exception {
               .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
               .collectAsList()
               .size());
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -621,7 +644,6 @@ public void testWapSnapshotExpirationWithMultipleBranches() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
       spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
 
@@ -644,7 +666,6 @@ public void testWapSnapshotExpirationWithMultipleBranches() throws Exception {
                       + ".snapshots WHERE summary['wap.id'] = 'wap-to-expire'")
               .first()
               .mkString();
-      spark.conf().unset("spark.wap.id");
 
       // Expire specific WAP snapshot
       spark.sql(
@@ -681,20 +702,161 @@ public void testWapSnapshotExpirationWithMultipleBranches() throws Exception {
               .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
               .collectAsList()
               .size());
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
   // ===== BACKWARD COMPATIBILITY =====
 
+  @Test
+  public void testWapIdOnFeatureBranchAndMainBranch() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data in main branch
+      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'main_base')");
+
+      // Create feature branch and add base data to it
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES (10, 'feature_base')");
+
+      // Verify initial state - main has 1 row, feature has 2 rows
+      assertEquals(1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
+      assertEquals(
+          2, spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size());
+
+      // Create WAP staged snapshot (invisible to normal reads)
+      spark.conf().set("spark.wap.id", "shared-wap-snapshot");
+      spark.sql("INSERT INTO " + tableName + " VALUES (99, 'wap_staged_data')");
+
+      // Get the WAP snapshot ID
+      String wapSnapshotId =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'shared-wap-snapshot'")
+              .first()
+              .mkString();
+
+      // Verify WAP staging doesn't affect normal reads (principle 2: invisible until published)
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main should not see WAP staged data");
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size(),
+          "Feature should not see WAP staged data");
+
+      // Clear WAP ID to avoid contamination
+      spark.conf().unset("spark.wap.id");
+
+      // Cherry-pick the same WAP snapshot to MAIN branch
+      spark.sql(
+          String.format(
+              "CALL openhouse.system.cherrypick_snapshot('"
+                  + tableName.replace("openhouse.", "")
+                  + "', %s)",
+              wapSnapshotId));
+
+      // Verify cherry-pick to main worked - main should now have the WAP data
+      List<Row> mainAfterCherryPick = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
+      assertEquals(2, mainAfterCherryPick.size(), "Main should have base + cherry-picked WAP data");
+      boolean mainHasWapData =
+          mainAfterCherryPick.stream().anyMatch(row -> "wap_staged_data".equals(row.getString(1)));
+      assertTrue(mainHasWapData, "Main should contain cherry-picked WAP data");
+
+      // Verify feature branch is still unaffected
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size(),
+          "Feature branch should be unchanged");
+
+      // Demonstrate that WAP snapshots work independently on different branches by
+      // creating a separate WAP snapshot while on the feature branch context
+
+      // Create another WAP snapshot that could be applied to feature branch
+      spark.conf().set("spark.wap.id", "feature-specific-wap");
+      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES (50, 'feature_wap_data')");
+
+      String featureWapSnapshotId =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'feature-specific-wap'")
+              .first()
+              .mkString();
+
+      // Clear WAP ID again
+      spark.conf().unset("spark.wap.id");
+
+      // Verify that both WAP snapshots exist but are invisible to normal reads
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main should still only show cherry-picked data");
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size(),
+          "Feature should not show new WAP data yet");
+
+      // Show that we can cherry-pick the feature WAP to main as well (demonstrating cross-branch
+      // capability)
+      spark.sql(
+          String.format(
+              "CALL openhouse.system.cherrypick_snapshot('"
+                  + tableName.replace("openhouse.", "")
+                  + "', %s)",
+              featureWapSnapshotId));
+
+      // Verify main now has both cherry-picked WAP snapshots
+      List<Row> finalMain = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
+      assertEquals(3, finalMain.size(), "Main should have base + first WAP + second WAP data");
+
+      boolean hasOriginalWap =
+          finalMain.stream().anyMatch(row -> "wap_staged_data".equals(row.getString(1)));
+      boolean hasFeatureWap =
+          finalMain.stream().anyMatch(row -> "feature_wap_data".equals(row.getString(1)));
+      assertTrue(hasOriginalWap, "Main should contain first cherry-picked WAP data");
+      assertTrue(hasFeatureWap, "Main should contain second cherry-picked WAP data");
+
+      // Verify feature branch is still independent and unchanged by main's cherry-picks
+      List<Row> finalFeature =
+          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList();
+      assertEquals(
+          2, finalFeature.size(), "Feature should still only have base + feature_base data");
+
+      // Verify that both original WAP snapshots are still available in metadata
+      List<Row> originalWapSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'shared-wap-snapshot'")
+              .collectAsList();
+      List<Row> featureWapSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots WHERE summary['wap.id'] = 'feature-specific-wap'")
+              .collectAsList();
+      assertEquals(1, originalWapSnapshots.size(), "Original WAP snapshot should still exist");
+      assertEquals(1, featureWapSnapshots.size(), "Feature WAP snapshot should still exist");
+    }
+  }
+
   @Test
   public void testBackwardCompatibilityMainBranchOnly() throws Exception {
     try (SparkSession spark = getSparkSession()) {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
       spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
 
@@ -713,7 +875,6 @@ public void testBackwardCompatibilityMainBranchOnly() throws Exception {
                       + ".snapshots WHERE summary['wap.id'] = 'compat-test-wap'")
               .first()
               .mkString();
-      spark.conf().unset("spark.wap.id");
 
       // Traditional cherry-pick to main
       spark.sql(
@@ -732,8 +893,227 @@ public void testBackwardCompatibilityMainBranchOnly() throws Exception {
       // Traditional snapshot queries should work
       assertTrue(
           spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList().size() >= 3);
+    }
+  }
+
+  // ===== WAP BRANCH TESTING =====
+  // These tests validate the intended WAP branch functionality.
+  // WAP branch should stage writes to a specific branch without affecting main.
+
+  @Test
+  public void testStagedChangesVisibleViaConf() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES (1, 'base_data')");
+
+      // Create WAP branch and insert staged data
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH wap_branch");
+      spark.conf().set("spark.wap.branch", "wap_branch");
+      spark.sql("INSERT INTO " + tableName + " VALUES (2, 'staged_data')");
+
+      // When spark.wap.branch is set, SELECT should see WAP branch data (2 rows)
+      List<Row> wapVisible = spark.sql("SELECT * FROM " + tableName).collectAsList();
+      assertEquals(
+          2, wapVisible.size(), "Should see both base and staged data when wap.branch is set");
+
+      // When spark.wap.branch is unset, SELECT should see only main data (1 row)
+      spark.conf().unset("spark.wap.branch");
+      List<Row> mainOnly = spark.sql("SELECT * FROM " + tableName).collectAsList();
+      assertEquals(1, mainOnly.size(), "Should see only base data when wap.branch is unset");
+    }
+  }
+
+  @Test
+  public void testStagedChangesHidden() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
+
+      // Create WAP branch for staged operations
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH wap");
+
+      // Set WAP branch for staged testing
+      spark.conf().set("spark.wap.branch", "wap");
+
+      // INSERT INTO table -> inserts to the WAP branch
+      spark.sql("INSERT INTO " + tableName + " VALUES (1, 'staged_data')");
+
+      // When spark.wap.branch is set:
+      // ✅ SELECT * FROM table → reads from the WAP branch
+      List<Row> tableData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
+      assertEquals(
+          2,
+          tableData.size(),
+          "SELECT * FROM table should read from WAP branch when spark.wap.branch is set");
+      boolean hasBase = tableData.stream().anyMatch(row -> "base".equals(row.getString(1)));
+      boolean hasStaged =
+          tableData.stream().anyMatch(row -> "staged_data".equals(row.getString(1)));
+      assertTrue(hasBase, "WAP branch should contain base data");
+      assertTrue(hasStaged, "WAP branch should contain staged data");
+
+      // ✅ SELECT * FROM table.branch_wap → explicitly reads from WAP branch
+      List<Row> wapBranchData =
+          spark.sql("SELECT * FROM " + tableName + ".branch_wap").collectAsList();
+      assertEquals(2, wapBranchData.size(), "Explicit WAP branch select should show staged data");
+
+      // ✅ SELECT * FROM table.branch_main → explicitly reads from main branch
+      List<Row> mainBranchData =
+          spark.sql("SELECT * FROM " + tableName + ".branch_main").collectAsList();
+      assertEquals(
+          1, mainBranchData.size(), "Explicit main branch select should only show base data");
+      assertEquals(
+          "base", mainBranchData.get(0).getString(1), "Main branch should only contain base data");
+
+      // Now unset spark.wap.branch and ensure main branch is the referenced data
+      spark.conf().unset("spark.wap.branch");
+
+      // When spark.wap.branch is unset, SELECT * FROM table should read from main branch
+      List<Row> afterUnsetData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
+      assertEquals(
+          1,
+          afterUnsetData.size(),
+          "SELECT * FROM table should read from main branch when spark.wap.branch is unset");
+      assertEquals(
+          "base",
+          afterUnsetData.get(0).getString(1),
+          "After unsetting wap.branch, should read from main");
+
+      // INSERT INTO table should go to main branch when spark.wap.branch is unset
+      spark.sql("INSERT INTO " + tableName + " VALUES (2, 'main_data')");
+      List<Row> finalMainData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
+      assertEquals(
+          2, finalMainData.size(), "Main branch should now have 2 rows after unsetting wap.branch");
+      boolean hasMainData =
+          finalMainData.stream().anyMatch(row -> "main_data".equals(row.getString(1)));
+      assertTrue(hasMainData, "Main branch should contain the newly inserted data");
+
+      // WAP branch should remain unchanged
+      List<Row> finalWapData =
+          spark.sql("SELECT * FROM " + tableName + ".branch_wap").collectAsList();
+      assertEquals(
+          2, finalWapData.size(), "WAP branch should remain unchanged with base + staged data");
+    }
+  }
+
+  @Test
+  public void testPublishWapBranch() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
+
+      // Create staging branch
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH staging");
+
+      // Stage changes to WAP branch
+      spark.conf().set("spark.wap.branch", "staging");
+      spark.sql("INSERT INTO " + tableName + " VALUES (1, 'staged_for_publish')");
+
+      // When spark.wap.branch is set, SELECT * FROM table should read from WAP branch
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "SELECT * FROM table should read from WAP branch when spark.wap.branch is set");
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'staging'")
+              .collectAsList()
+              .size(),
+          "Staging should have staged data");
+
+      // Verify main branch still only has base data
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + ".branch_main").collectAsList().size(),
+          "Main branch should not have staged data");
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
+      // Fast-forward main branch to staging branch to publish the staged changes
+      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'staging')");
+
+      // Verify data is now published to main branch (need to explicitly check main branch)
+      List<Row> publishedData =
+          spark.sql("SELECT * FROM " + tableName + ".branch_main").collectAsList();
+      assertEquals(2, publishedData.size(), "Main branch should now have published data");
+
+      boolean hasPublished =
+          publishedData.stream().anyMatch(row -> "staged_for_publish".equals(row.getString(1)));
+      assertTrue(hasPublished, "Main branch should contain the published staged data");
+
+      // Verify that with wap.branch still set, SELECT * FROM table still reads from WAP branch
+      List<Row> wapData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
+      assertEquals(2, wapData.size(), "SELECT * FROM table should still read from WAP branch");
+    }
+  }
+
+  @Test
+  public void testWapIdAndWapBranchIncompatible() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
+
+      // Create staging branch
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH staging");
+
+      // Set both WAP ID and WAP branch - this should be invalid
+      spark.conf().set("spark.wap.id", "test-wap-id");
+      spark.conf().set("spark.wap.branch", "staging");
+
+      // Attempt to write with both configurations should fail
+      assertThrows(
+          Exception.class,
+          () -> spark.sql("INSERT INTO " + tableName + " VALUES (1, 'invalid')"),
+          "Cannot use both wap.id and wap.branch simultaneously");
+    }
+  }
+
+  @Test
+  public void testCannotWriteToBothBranches() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Setup base data
+      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
+
+      // Create branches
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature");
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH staging");
+
+      // Set WAP branch
+      spark.conf().set("spark.wap.branch", "staging");
+
+      // ❌ INVALID: Cannot write to both normal branch and WAP branch
+      assertThrows(
+          Exception.class,
+          () -> spark.sql("INSERT INTO " + tableName + ".branch_feature VALUES (1, 'invalid')"),
+          "Cannot write to explicit branch when wap.branch is set");
     }
   }
 
@@ -745,7 +1125,6 @@ public void testErrorInsertToNonExistentBranch() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
 
       // Setup base data
@@ -786,8 +1165,6 @@ public void testErrorInsertToNonExistentBranch() throws Exception {
       assertEquals(2, refs.size());
       assertEquals("feature_a", refs.get(0).getString(0));
       assertEquals("main", refs.get(1).getString(0));
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 
@@ -797,7 +1174,6 @@ public void testErrorCherryPickNonExistentWapId() throws Exception {
       String tableId = "branch_test_" + System.currentTimeMillis();
       String tableName = "openhouse.d1." + tableId;
 
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
       spark.sql("CREATE TABLE " + tableName + " (name string)");
       spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
 
@@ -816,7 +1192,6 @@ public void testErrorCherryPickNonExistentWapId() throws Exception {
                       + ".snapshots WHERE summary['wap.id'] = 'valid-wap'")
               .first()
               .mkString();
-      spark.conf().unset("spark.wap.id");
 
       // Verify valid WAP cherry-pick works
       spark.sql(
@@ -871,8 +1246,6 @@ public void testErrorCherryPickNonExistentWapId() throws Exception {
                       + ".snapshots WHERE summary['wap.id'] = 'valid-wap'")
               .collectAsList();
       assertEquals(1, validWaps.size());
-
-      spark.sql("DROP TABLE IF EXISTS " + tableName + "");
     }
   }
 }

From e20688889df85d9d8b0c08795490a1d9f64dcdaa Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 30 Sep 2025 14:59:40 -0700
Subject: [PATCH 03/35] accidentally commented line

---
 .../openhouse/spark/catalogtest/BranchTestSpark3_5.java         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
index 942f5e89a..30c9be7a0 100644
--- a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
+++ b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
@@ -192,7 +192,7 @@ public void testCherryPickToMainWithFeatureBranch() throws Exception {
               .mkString();
 
       // CRITICAL: Unset WAP ID before advancing main branch to force non-fast-forward cherry-pick
-      // spark.conf().unset("spark.wap.id");
+      spark.conf().unset("spark.wap.id");
       spark.sql("INSERT INTO " + tableName + " VALUES ('main.advance')");
 
       // Cherry-pick WAP to main branch (this tests our enhanced maybeAppendSnapshots)

From ef1e5b412700bdf4a461456f203254315132ca01 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 30 Sep 2025 17:17:32 -0700
Subject: [PATCH 04/35] remove test with old behavior

---
 .../OpenHouseInternalTableOperationsTest.java | 23 -------------------
 1 file changed, 23 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index f484b60ae..bcec8377b 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -587,29 +587,6 @@ void testDoCommitAppendStageOnlySnapshotsExistingVersion() throws IOException {
     }
   }
 
-  @Test
-  void testDoCommitAppendSnapshotsToNonMainBranch() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-    Map<String, String> properties = new HashMap<>(BASE_TABLE_METADATA.properties());
-    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
-        Mockito.mockStatic(TableMetadataParser.class)) {
-      properties.put(
-          CatalogConstants.SNAPSHOTS_JSON_KEY,
-          SnapshotsUtil.serializedSnapshots(testSnapshots.subList(0, 1)));
-      properties.put(
-          CatalogConstants.SNAPSHOTS_REFS_KEY,
-          SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(testSnapshots.get(0), "branch")));
-      properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
-
-      TableMetadata metadata = BASE_TABLE_METADATA.replaceProperties(properties);
-      // verify throw an error when committing to non-main branch.
-      Assertions.assertThrows(
-          CommitStateUnknownException.class,
-          () -> openHouseInternalTableOperations.doCommit(BASE_TABLE_METADATA, metadata));
-    }
-  }
-
   @Test
   void testAppendSnapshotsWithOldSnapshots() throws IOException {
     TableMetadata metadata =

From d0de1da63e9d360199b32e9dd485207435d632ea Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Wed, 1 Oct 2025 17:50:13 -0700
Subject: [PATCH 05/35] fixing multi-branch commits and ambiguous references

---
 .../OpenHouseInternalTableOperations.java     |  118 +-
 .../spark/catalogtest/BranchTestSpark3_5.java | 1116 +++++++++++++++++
 2 files changed, 1209 insertions(+), 25 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index 793167e47..5ed27c62b 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -571,8 +571,9 @@ private String determineTargetBranch(
   }
 
   /**
-   * Determines the target branch for snapshot commits by matching snapshot IDs. When multiple
-   * branches are present, finds which branch should receive the new snapshots.
+   * Determines the target branch for snapshot commits using explicit branch targeting information.
+   * The snapshotRefs parameter contains the explicit branch targeting from the client commit
+   * operation.
    */
   private String determineTargetBranch(
       Map<String, SnapshotRef> snapshotRefs, List<Snapshot> newSnapshots, String defaultBranch) {
@@ -581,42 +582,102 @@ private String determineTargetBranch(
     }
 
     // If there's only one branch in the refs, use that as the target
+    // This is the most common case - client explicitly specified which branch to commit to
     if (snapshotRefs.size() == 1) {
-      return snapshotRefs.keySet().iterator().next();
+      String targetBranch = snapshotRefs.keySet().iterator().next();
+      log.debug("Using explicit target branch from commit context: {}", targetBranch);
+      return targetBranch;
     }
 
-    // CRITICAL FIX: For multi-branch scenarios, find which branch should get the new snapshots
+    // Multiple branches specified in commit - need to determine which one based on snapshot
+    // relationships
+    log.info(
+        "Multiple branches in snapshotRefs ({}), analyzing snapshot relationships",
+        snapshotRefs.size());
     if (!newSnapshots.isEmpty()) {
-      // Get the latest snapshot ID from new snapshots
-      long latestSnapshotId = newSnapshots.get(newSnapshots.size() - 1).snapshotId();
+      Snapshot latestSnapshot = newSnapshots.get(newSnapshots.size() - 1);
+      long latestSnapshotId = latestSnapshot.snapshotId();
+      log.info("Latest snapshot ID: {}", latestSnapshotId);
 
-      // Find which branch in snapshotRefs should point to this snapshot
+      // First try: exact snapshot ID match within the explicitly targeted branches
+      List<String> exactMatches = new ArrayList<>();
       for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
         String branchName = entry.getKey();
         long branchSnapshotId = entry.getValue().snapshotId();
 
         if (branchSnapshotId == latestSnapshotId) {
-          log.debug(
-              "Determined target branch '{}' by snapshot ID match: {}",
-              branchName,
-              latestSnapshotId);
-          return branchName;
+          exactMatches.add(branchName);
         }
       }
-    }
 
-    // Fallback: if we can't match by snapshot ID, prefer non-main branches for branch operations
-    for (String branchName : snapshotRefs.keySet()) {
-      if (!branchName.equals(SnapshotRef.MAIN_BRANCH)) {
-        log.debug(
-            "Multiple branches, no snapshot match, preferring non-main branch: {}", branchName);
-        return branchName;
+      if (exactMatches.size() == 1) {
+        String targetBranch = exactMatches.get(0);
+        log.info(
+            "Determined target branch '{}' by exact snapshot ID match within commit context: {}",
+            targetBranch,
+            latestSnapshotId);
+        return targetBranch;
+      } else if (exactMatches.size() > 1) {
+        log.error(
+            "Multiple branches point to same snapshot {}: {}", latestSnapshotId, exactMatches);
+        throw new IllegalStateException(
+            String.format(
+                "Multiple explicitly targeted branches point to the same snapshot %s: %s. "
+                    + "This indicates an invalid commit state.",
+                latestSnapshotId, exactMatches));
+      }
+
+      // Second try: parent-child relationship match within the explicitly targeted branches
+      Long parentSnapshotId = latestSnapshot.parentId();
+      log.info("Parent snapshot ID: {}", parentSnapshotId);
+      if (parentSnapshotId != null) {
+        List<String> parentMatches = new ArrayList<>();
+        for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
+          String branchName = entry.getKey();
+          long branchSnapshotId = entry.getValue().snapshotId();
+
+          if (branchSnapshotId == parentSnapshotId) {
+            parentMatches.add(branchName);
+            log.info("Branch '{}' matches parent snapshot {}", branchName, parentSnapshotId);
+          }
+        }
+
+        if (parentMatches.size() == 1) {
+          String targetBranch = parentMatches.get(0);
+          log.info(
+              "Determined target branch '{}' by parent-child relationship within commit context: new snapshot {} is child of branch snapshot {}",
+              targetBranch,
+              latestSnapshotId,
+              parentSnapshotId);
+          return targetBranch;
+        } else if (parentMatches.size() > 1) {
+          log.error(
+              "Multiple branches point to parent snapshot {}: {}", parentSnapshotId, parentMatches);
+          throw new IllegalStateException(
+              String.format(
+                  "Multiple explicitly targeted branches point to parent snapshot %s: %s. "
+                      + "Cannot determine which branch should receive child snapshot %s. "
+                      + "This indicates ambiguous commit targeting - the client should specify a single target branch.",
+                  parentSnapshotId, parentMatches, latestSnapshotId));
+        }
+        // If parentMatches.size() == 0, none of the explicitly targeted branches are parents
+        // This could happen in cherry-pick or other non-linear operations
       }
     }
 
-    // Final fallback to main
-    log.debug("Multiple branches, falling back to main branch");
-    return SnapshotRef.MAIN_BRANCH;
+    // If we reach here, we have multiple explicitly targeted branches but couldn't determine
+    // the target based on snapshot relationships. This suggests the commit operation itself
+    // is ambiguous or invalid.
+    log.error(
+        "Cannot determine target branch from explicitly targeted branches: {}",
+        snapshotRefs.keySet());
+    throw new IllegalStateException(
+        String.format(
+            "Cannot determine target branch from explicitly targeted branches: %s. "
+                + "The commit specifies multiple target branches but snapshot relationships "
+                + "don't clearly indicate which branch should receive the new snapshots. "
+                + "This suggests an invalid or ambiguous commit operation.",
+            snapshotRefs.keySet()));
   }
 
   public TableMetadata maybeAppendSnapshots(
@@ -658,8 +719,10 @@ public TableMetadata maybeAppendSnapshots(
         } else if (snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
           // a snapshot created on a non fast-forward cherry-pick snapshot
           // Determine target branch from snapshotRefs or default to MAIN_BRANCH
+          // Pass only the current snapshot being processed, not the entire list
           String targetBranch =
-              determineTargetBranch(snapshotRefs, snapshotsToAppend, SnapshotRef.MAIN_BRANCH);
+              determineTargetBranch(
+                  snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
           metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
           appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
           cherryPickedSnapshots.add(
@@ -668,8 +731,10 @@ public TableMetadata maybeAppendSnapshots(
           // a regular snapshot - assign to appropriate branch using snapshotRefs context
           if (MapUtils.isNotEmpty(snapshotRefs)) {
             // We have explicit branch information, use it to assign snapshot
+            // Pass only the current snapshot being processed, not the entire list
             String targetBranch =
-                determineTargetBranch(snapshotRefs, snapshotsToAppend, SnapshotRef.MAIN_BRANCH);
+                determineTargetBranch(
+                    snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
             metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
           } else {
             // No explicit branch refs - treat as staged snapshot
@@ -679,7 +744,10 @@ public TableMetadata maybeAppendSnapshots(
           appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
         }
       }
-    } else if (MapUtils.isNotEmpty(snapshotRefs)) {
+    }
+
+    // Handle ref updates (this can happen independently of snapshot append operations)
+    if (MapUtils.isNotEmpty(snapshotRefs)) {
       // Handle ref updates for all branches (fast-forward cherry-pick or rollback operations)
       for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
         String branchName = entry.getKey();
diff --git a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
index 30c9be7a0..c8a0f3e03 100644
--- a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
+++ b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
@@ -164,6 +164,1122 @@ public void testWapStagingWithBranches() throws Exception {
     }
   }
 
+  @Test
+  public void testWapIdAfterCreateTable() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_id_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      // Create table without any data (no snapshots exist)
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Enable WAP on the table
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Verify no snapshots exist yet
+      List<Row> initialSnapshots =
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
+      assertEquals(0, initialSnapshots.size(), "Newly created table should have no snapshots");
+
+      // Verify no branches exist yet (empty table has no branches)
+      List<Row> initialRefs = spark.sql("SELECT name FROM " + tableName + ".refs").collectAsList();
+      assertEquals(0, initialRefs.size(), "Empty table should have no branches initially");
+
+      // ===== WAP STAGING ON EMPTY TABLE =====
+
+      // 1. Create WAP staged data on empty table (should create staging snapshot)
+      spark.conf().set("spark.wap.id", "wap-stage-1");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_staged_data_1')");
+      spark.conf().unset("spark.wap.id");
+
+      // Verify WAP snapshot was created
+      List<Row> wapSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id, summary FROM "
+                      + tableName
+                      + ".snapshots "
+                      + "WHERE summary['wap.id'] = 'wap-stage-1'")
+              .collectAsList();
+      assertEquals(1, wapSnapshots.size(), "Should have 1 WAP staged snapshot");
+
+      // Verify no branches exist yet (WAP staging doesn't create branches)
+      List<Row> refsAfterWapStaging =
+          spark.sql("SELECT name FROM " + tableName + ".refs").collectAsList();
+      assertEquals(0, refsAfterWapStaging.size(), "WAP staging should not create branches");
+
+      // Verify WAP data is not visible in main queries (no branch exists)
+      assertEquals(
+          0,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Should see 0 rows - no branches exist, WAP data is staged");
+
+      // ===== WAP PUBLISHING TO CREATE MAIN BRANCH =====
+
+      // 2. Publish WAP data to create main branch
+      String wapSnapshotId = String.valueOf(wapSnapshots.get(0).getLong(0));
+      spark.sql(
+          "CALL openhouse.system.cherrypick_snapshot('"
+              + tableName.replace("openhouse.", "")
+              + "', "
+              + wapSnapshotId
+              + ")");
+
+      // Verify main branch now exists
+      List<Row> refsAfterPublishing =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(
+          1, refsAfterPublishing.size(), "Should have main branch after publishing WAP data");
+      assertEquals("main", refsAfterPublishing.get(0).getString(0), "Should have main branch");
+
+      // Verify WAP data is now visible in main branch
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 1 row after WAP publishing");
+
+      List<Row> mainData = spark.sql("SELECT name FROM " + tableName + "").collectAsList();
+      assertEquals(
+          "wap_staged_data_1", mainData.get(0).getString(0), "Should see published WAP data");
+
+      // ===== MULTI-WAP OPERATIONS =====
+
+      // 3. Create multiple WAP staged data sets
+      spark.conf().set("spark.wap.id", "wap-stage-2");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_staged_data_2')");
+      spark.conf().unset("spark.wap.id");
+
+      spark.conf().set("spark.wap.id", "wap-stage-3");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_staged_data_3')");
+      spark.conf().unset("spark.wap.id");
+
+      // Verify multiple WAP snapshots exist
+      List<Row> allWapSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots "
+                      + "WHERE summary['wap.id'] IS NOT NULL")
+              .collectAsList();
+      assertEquals(3, allWapSnapshots.size(), "Should have 3 WAP staged snapshots");
+
+      // Verify main branch is unchanged (WAP data is staged)
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should still have 1 row (staged WAP not visible)");
+
+      // ===== SELECTIVE WAP PUBLISHING =====
+
+      // 4. Publish second WAP data set only
+      List<Row> wap2Snapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots "
+                      + "WHERE summary['wap.id'] = 'wap-stage-2'")
+              .collectAsList();
+      String wap2SnapshotId = String.valueOf(wap2Snapshots.get(0).getLong(0));
+      spark.sql(
+          "CALL openhouse.system.cherrypick_snapshot('"
+              + tableName.replace("openhouse.", "")
+              + "', "
+              + wap2SnapshotId
+              + ")");
+
+      // Verify main branch now has both published datasets
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 2 rows after second WAP publishing");
+
+      List<Row> publishedData =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals(
+          "wap_staged_data_1",
+          publishedData.get(0).getString(0),
+          "First row should be first WAP data");
+      assertEquals(
+          "wap_staged_data_2",
+          publishedData.get(1).getString(0),
+          "Second row should be second WAP data");
+
+      // ===== UNPUBLISHED WAP DATA VERIFICATION =====
+
+      // 5. Verify third WAP data remains unpublished
+      List<Row> wap3Snapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots "
+                      + "WHERE summary['wap.id'] = 'wap-stage-3'")
+              .collectAsList();
+      assertEquals(1, wap3Snapshots.size(), "Third WAP snapshot should still exist");
+
+      // Verify unpublished WAP data is not visible
+      List<Row> currentData =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertFalse(
+          currentData.stream().anyMatch(row -> "wap_staged_data_3".equals(row.getString(0))),
+          "Unpublished WAP data should not be visible in main branch");
+
+      // ===== REGULAR DATA VS WAP DATA =====
+
+      // 6. Add regular (non-WAP) data to main branch
+      spark.sql("INSERT INTO " + tableName + " VALUES ('regular_data')");
+
+      // Verify main branch now has mixed data
+      assertEquals(
+          3,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 3 rows (2 published WAP + 1 regular)");
+
+      List<Row> finalData =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals("regular_data", finalData.get(0).getString(0), "Should contain regular data");
+      assertEquals(
+          "wap_staged_data_1", finalData.get(1).getString(0), "Should contain first WAP data");
+      assertEquals(
+          "wap_staged_data_2", finalData.get(2).getString(0), "Should contain second WAP data");
+
+      // ===== SNAPSHOT HISTORY VERIFICATION =====
+
+      // 7. Verify snapshot counts and types
+      List<Row> totalSnapshots =
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
+      assertTrue(
+          totalSnapshots.size() >= 4, "Should have at least 4 snapshots (3 WAP + 1 regular)");
+
+      // Verify WAP snapshots still exist in metadata
+      List<Row> remainingWapSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".snapshots "
+                      + "WHERE summary['wap.id'] IS NOT NULL")
+              .collectAsList();
+      assertEquals(
+          3, remainingWapSnapshots.size(), "All 3 WAP snapshots should still exist in metadata");
+
+      // Verify main branch has the latest published snapshot (points to regular INSERT snapshot)
+      List<Row> mainSnapshotRef =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .collectAsList();
+      assertEquals(1, mainSnapshotRef.size(), "Main branch should exist and point to a snapshot");
+    }
+  }
+
+  @Test
+  public void testBranchAfterCreateTable() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      // Create table without any data (no snapshots exist)
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Verify no snapshots exist yet
+      List<Row> initialSnapshots =
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
+      assertEquals(0, initialSnapshots.size(), "Newly created table should have no snapshots");
+
+      // Create branch on table with no existing snapshots
+      // According to Iceberg specification, this should succeed and create an empty snapshot
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_on_empty");
+
+      // Verify that an empty snapshot was created for the branch
+      List<Row> snapshotsAfterBranchCreation =
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
+      assertEquals(
+          1,
+          snapshotsAfterBranchCreation.size(),
+          "Should have 1 empty snapshot after branch creation");
+
+      // Verify the empty snapshot properties
+      Row emptySnapshot = snapshotsAfterBranchCreation.get(0);
+      // The parent_id should be null for the empty snapshot
+      assertNull(
+          emptySnapshot.get(emptySnapshot.fieldIndex("parent_id")),
+          "Empty snapshot should have no parent");
+
+      // Verify the branch was created successfully
+      List<Row> refsAfterBranchCreation =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(
+          1,
+          refsAfterBranchCreation.size(),
+          "Should have feature_on_empty branch (main doesn't exist yet)");
+      assertEquals(
+          "feature_on_empty",
+          refsAfterBranchCreation.get(0).getString(0),
+          "Should have feature_on_empty branch");
+
+      // Verify that main branch still doesn't exist (as expected)
+      boolean hasMainBranch =
+          refsAfterBranchCreation.stream().anyMatch(row -> "main".equals(row.getString(0)));
+      assertFalse(hasMainBranch, "Main branch should not exist on empty table");
+
+      // Now insert data to create a data snapshot
+      spark.sql("INSERT INTO " + tableName + " VALUES ('initial.data')");
+
+      // Verify we now have 2 snapshots (empty + data)
+      List<Row> snapshotsAfterInsert =
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
+      assertEquals(
+          2, snapshotsAfterInsert.size(), "Should have 2 snapshots after insert (empty + data)");
+
+      // Now we should have main branch as well
+      List<Row> refsAfterInsert =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(2, refsAfterInsert.size(), "Should have feature_on_empty and main branches");
+
+      // Create another branch after data exists - this should also succeed
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_after_snapshot");
+
+      // Verify we now have 3 branches (feature_on_empty, main, feature_after_snapshot)
+      List<Row> refs =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(3, refs.size(), "Should have 3 branches total");
+
+      // Verify all expected branches exist
+      Set<String> branchNames =
+          refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
+      assertTrue(branchNames.contains("feature_on_empty"), "feature_on_empty branch should exist");
+      assertTrue(branchNames.contains("main"), "main branch should exist");
+      assertTrue(
+          branchNames.contains("feature_after_snapshot"),
+          "feature_after_snapshot branch should exist");
+
+      // ===== BRANCH ISOLATION TESTING =====
+
+      // 1. Test initial state: main and feature_after_snapshot should have the same data
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 1 row");
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
+              .collectAsList()
+              .size(),
+          "feature_after_snapshot branch should have 1 row");
+
+      // 2. Test feature_on_empty branch should be empty (points to empty snapshot)
+      assertEquals(
+          0,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
+              .collectAsList()
+              .size(),
+          "feature_on_empty branch should have 0 rows (points to empty snapshot)");
+
+      // 3. Add data to feature_on_empty branch only
+      spark.sql(
+          "INSERT INTO " + tableName + ".branch_feature_on_empty VALUES ('empty_branch_data')");
+
+      // Verify isolation: feature_on_empty now has data, others unchanged
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
+              .collectAsList()
+              .size(),
+          "feature_on_empty branch should now have 1 row");
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should still have 1 row (unchanged)");
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
+              .collectAsList()
+              .size(),
+          "feature_after_snapshot branch should still have 1 row (unchanged)");
+
+      // 4. Add different data to feature_after_snapshot branch
+      spark.sql(
+          "INSERT INTO "
+              + tableName
+              + ".branch_feature_after_snapshot VALUES ('snapshot_branch_data')");
+
+      // Verify isolation: each branch has its own data
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
+              .collectAsList()
+              .size(),
+          "feature_on_empty branch should still have 1 row");
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should still have 1 row (unchanged)");
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
+              .collectAsList()
+              .size(),
+          "feature_after_snapshot branch should now have 2 rows");
+
+      // 5. Add data to main branch
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main_branch_data')");
+
+      // Verify complete isolation: each branch maintains its own data
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
+              .collectAsList()
+              .size(),
+          "feature_on_empty branch should still have 1 row");
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should now have 2 rows");
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
+              .collectAsList()
+              .size(),
+          "feature_after_snapshot branch should still have 2 rows (unchanged)");
+
+      // 6. Verify data content isolation
+      List<Row> featureOnEmptyData =
+          spark
+              .sql(
+                  "SELECT name FROM "
+                      + tableName
+                      + " VERSION AS OF 'feature_on_empty' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "empty_branch_data",
+          featureOnEmptyData.get(0).getString(0),
+          "feature_on_empty should contain its specific data");
+
+      List<Row> mainData =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals(
+          "initial.data", mainData.get(0).getString(0), "main should contain initial data");
+      assertEquals(
+          "main_branch_data",
+          mainData.get(1).getString(0),
+          "main should contain its specific data");
+
+      List<Row> featureAfterSnapshotData =
+          spark
+              .sql(
+                  "SELECT name FROM "
+                      + tableName
+                      + " VERSION AS OF 'feature_after_snapshot' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "initial.data",
+          featureAfterSnapshotData.get(0).getString(0),
+          "feature_after_snapshot should contain initial data");
+      assertEquals(
+          "snapshot_branch_data",
+          featureAfterSnapshotData.get(1).getString(0),
+          "feature_after_snapshot should contain its specific data");
+
+      // 7. Verify snapshot isolation: each branch should have different snapshot histories
+      List<Row> mainSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .collectAsList();
+      List<Row> featureOnEmptySnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_on_empty'")
+              .collectAsList();
+      List<Row> featureAfterSnapshotSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id FROM "
+                      + tableName
+                      + ".refs WHERE name = 'feature_after_snapshot'")
+              .collectAsList();
+
+      assertNotEquals(
+          mainSnapshots.get(0).getLong(0),
+          featureOnEmptySnapshots.get(0).getLong(0),
+          "main and feature_on_empty should point to different snapshots");
+      assertNotEquals(
+          mainSnapshots.get(0).getLong(0),
+          featureAfterSnapshotSnapshots.get(0).getLong(0),
+          "main and feature_after_snapshot should point to different snapshots");
+      assertNotEquals(
+          featureOnEmptySnapshots.get(0).getLong(0),
+          featureAfterSnapshotSnapshots.get(0).getLong(0),
+          "feature_on_empty and feature_after_snapshot should point to different snapshots");
+    }
+  }
+
+  @Test
+  public void testWapBranchAfterCreateTable() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      // Create table without any data (no snapshots exist)
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Enable WAP on the table
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Verify no snapshots exist yet
+      List<Row> initialSnapshots =
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
+      assertEquals(0, initialSnapshots.size(), "Newly created table should have no snapshots");
+
+      // Create branch on table with no existing snapshots
+      // According to Iceberg specification, this should succeed and create an empty snapshot
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_empty");
+
+      // Verify that an empty snapshot was created for the branch
+      List<Row> snapshotsAfterBranchCreation =
+          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
+      assertEquals(
+          1,
+          snapshotsAfterBranchCreation.size(),
+          "Should have 1 empty snapshot after branch creation");
+
+      // Verify the branch was created successfully
+      List<Row> refsAfterBranchCreation =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(
+          1,
+          refsAfterBranchCreation.size(),
+          "Should have feature_empty branch (main doesn't exist yet)");
+      assertEquals(
+          "feature_empty",
+          refsAfterBranchCreation.get(0).getString(0),
+          "Should have feature_empty branch");
+
+      // ===== WAP BRANCH TESTING =====
+
+      // 1. Set WAP branch and insert data - should go to the feature_empty branch
+      spark.conf().set("spark.wap.branch", "feature_empty");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_branch_data_1')");
+
+      // Verify WAP branch data is visible when spark.wap.branch is set
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Should see 1 row when spark.wap.branch=feature_empty");
+
+      List<Row> wapBranchData = spark.sql("SELECT name FROM " + tableName + "").collectAsList();
+      assertEquals(
+          "wap_branch_data_1", wapBranchData.get(0).getString(0), "Should see WAP branch data");
+
+      // Verify feature_empty branch directly
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_empty'")
+              .collectAsList()
+              .size(),
+          "feature_empty branch should have 1 row");
+
+      // Unset WAP branch - queries should now see main branch (which doesn't exist yet, so empty)
+      spark.conf().unset("spark.wap.branch");
+      assertEquals(
+          0,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Should see 0 rows when spark.wap.branch is unset (main doesn't exist)");
+
+      // ===== MULTI-BRANCH WAP TESTING =====
+
+      // 2. Create main branch with regular data
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main_data')");
+
+      // Now we should have main branch
+      List<Row> refs =
+          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
+      assertEquals(2, refs.size(), "Should have feature_empty and main branches");
+
+      // Verify main branch data when spark.wap.branch is unset
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 1 row");
+      List<Row> mainData = spark.sql("SELECT name FROM " + tableName + "").collectAsList();
+      assertEquals("main_data", mainData.get(0).getString(0), "Should see main branch data");
+
+      // 3. Create another branch and test WAP branch functionality
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_wap_test");
+
+      // Set WAP branch to feature_wap_test and add data
+      spark.conf().set("spark.wap.branch", "feature_wap_test");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_branch_data_2')");
+
+      // Verify WAP branch data is visible when spark.wap.branch=feature_wap_test
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Should see 2 rows when spark.wap.branch=feature_wap_test (main_data + wap_branch_data_2)");
+
+      // ===== COMPREHENSIVE WAP BRANCH ISOLATION VERIFICATION =====
+
+      // Verify each branch has independent data
+      spark.conf().unset("spark.wap.branch");
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 1 row when WAP branch is unset");
+
+      assertEquals(
+          1,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_empty'")
+              .collectAsList()
+              .size(),
+          "feature_empty branch should have 1 row");
+
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_wap_test'")
+              .collectAsList()
+              .size(),
+          "feature_wap_test branch should have 2 rows");
+
+      // Verify data content isolation
+      List<Row> finalMainData =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals("main_data", finalMainData.get(0).getString(0), "main should contain main_data");
+
+      List<Row> finalFeatureEmptyData =
+          spark
+              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'feature_empty' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "wap_branch_data_1",
+          finalFeatureEmptyData.get(0).getString(0),
+          "feature_empty should contain wap_branch_data_1");
+
+      List<Row> finalFeatureWapTestData =
+          spark
+              .sql(
+                  "SELECT name FROM "
+                      + tableName
+                      + " VERSION AS OF 'feature_wap_test' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "main_data",
+          finalFeatureWapTestData.get(0).getString(0),
+          "feature_wap_test should contain main_data");
+      assertEquals(
+          "wap_branch_data_2",
+          finalFeatureWapTestData.get(1).getString(0),
+          "feature_wap_test should contain wap_branch_data_2");
+
+      // ===== WAP BRANCH SWITCHING BEHAVIOR =====
+
+      // 4. Test switching between WAP branches
+      spark.conf().set("spark.wap.branch", "feature_empty");
+      List<Row> switchToFeatureEmpty =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals(
+          "wap_branch_data_1",
+          switchToFeatureEmpty.get(0).getString(0),
+          "Should see feature_empty data when switched");
+
+      spark.conf().set("spark.wap.branch", "feature_wap_test");
+      List<Row> switchToFeatureWapTest =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals(
+          2, switchToFeatureWapTest.size(), "Should see 2 rows when switched to feature_wap_test");
+      assertEquals(
+          "main_data", switchToFeatureWapTest.get(0).getString(0), "First row should be main_data");
+      assertEquals(
+          "wap_branch_data_2",
+          switchToFeatureWapTest.get(1).getString(0),
+          "Second row should be wap_branch_data_2");
+
+      // 5. Test INSERT behavior with WAP branch set
+      spark.conf().set("spark.wap.branch", "feature_empty");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('additional_wap_data')");
+
+      // Verify the insert went to the WAP branch
+      assertEquals(
+          2,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Should see 2 rows in feature_empty after additional insert");
+
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_empty'")
+              .collectAsList()
+              .size(),
+          "feature_empty branch should have 2 rows after additional insert");
+
+      // Verify other branches are unchanged
+      spark.conf().unset("spark.wap.branch");
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should still have 1 row (unchanged)");
+
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_wap_test'")
+              .collectAsList()
+              .size(),
+          "feature_wap_test branch should still have 2 rows (unchanged)");
+
+      // ===== SNAPSHOT HISTORY VERIFICATION =====
+
+      // 6. Verify that each branch points to different snapshots
+      List<Row> finalMainSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .collectAsList();
+      List<Row> finalFeatureEmptySnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_empty'")
+              .collectAsList();
+      List<Row> finalFeatureWapTestSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_wap_test'")
+              .collectAsList();
+
+      assertNotEquals(
+          finalMainSnapshots.get(0).getLong(0),
+          finalFeatureEmptySnapshots.get(0).getLong(0),
+          "main and feature_empty should point to different snapshots");
+      assertNotEquals(
+          finalMainSnapshots.get(0).getLong(0),
+          finalFeatureWapTestSnapshots.get(0).getLong(0),
+          "main and feature_wap_test should point to different snapshots");
+      assertNotEquals(
+          finalFeatureEmptySnapshots.get(0).getLong(0),
+          finalFeatureWapTestSnapshots.get(0).getLong(0),
+          "feature_empty and feature_wap_test should point to different snapshots");
+
+      // Clean up WAP branch configuration
+      spark.conf().unset("spark.wap.branch");
+    }
+  }
+
+  @Test
+  public void testWapBranchCommitWithMultipleBranches() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "wap_multi_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      // Create table and enable WAP
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
+
+      // Step 1: Start with main at snapshotX
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main_data')");
+
+      // Verify main branch exists and get its snapshot
+      List<Row> mainSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .collectAsList();
+      assertEquals(1, mainSnapshots.size(), "Main branch should exist");
+      long snapshotX = mainSnapshots.get(0).getLong(0);
+      System.out.println("SnapshotX (main): " + snapshotX);
+
+      // Step 2: Create branchA from main → branchA also points to snapshotX
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchA");
+
+      // Verify branchA points to same snapshot as main
+      List<Row> branchASnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
+              .collectAsList();
+      assertEquals(1, branchASnapshots.size(), "BranchA should exist");
+      long branchASnapshotAfterCreation = branchASnapshots.get(0).getLong(0);
+      assertEquals(
+          snapshotX, branchASnapshotAfterCreation, "BranchA should point to same snapshot as main");
+
+      // Step 3: Set branchA as the WAP branch and commit data
+      spark.conf().set("spark.wap.branch", "branchA");
+      spark.sql("INSERT INTO " + tableName + " VALUES ('branchA_data')");
+
+      // Step 4: Verify branchA now points to snapshotY (child of snapshotX)
+      List<Row> branchASnapshotsAfterCommit =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
+              .collectAsList();
+      long snapshotY = branchASnapshotsAfterCommit.get(0).getLong(0);
+      assertNotEquals(
+          snapshotX, snapshotY, "BranchA should now point to a new snapshot (snapshotY)");
+      System.out.println("SnapshotY (branchA after commit): " + snapshotY);
+
+      // Verify branchA has both main_data and branchA_data
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
+              .collectAsList()
+              .size(),
+          "BranchA should have 2 rows after commit");
+
+      // Verify main still points to snapshotX and has only main_data
+      spark.conf().unset("spark.wap.branch");
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should still have 1 row");
+
+      // Step 5: Create branchB from branchA → branchB points to snapshotY
+      // First create the branch, then set it to point to the same snapshot as branchA
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchB");
+      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'branchB', 'branchA')");
+
+      // Verify branchB points to snapshotY
+      List<Row> branchBSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
+              .collectAsList();
+      long branchBSnapshotAfterCreation = branchBSnapshots.get(0).getLong(0);
+      assertEquals(
+          snapshotY,
+          branchBSnapshotAfterCreation,
+          "BranchB should point to snapshotY (same as branchA)");
+
+      // Step 6: Make a commit on branchB → branchB now points to snapshotZ (child of snapshotY)
+      // Use direct branch syntax to target branchB specifically
+      spark.sql("INSERT INTO " + tableName + ".branch_branchB VALUES ('branchB_data')");
+
+      // Verify branchB now points to snapshotZ
+      List<Row> branchBSnapshotsAfterCommit =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
+              .collectAsList();
+      long snapshotZ = branchBSnapshotsAfterCommit.get(0).getLong(0);
+      assertNotEquals(
+          snapshotY, snapshotZ, "BranchB should now point to a new snapshot (snapshotZ)");
+      System.out.println("SnapshotZ (branchB after commit): " + snapshotZ);
+
+      // ===== VERIFICATION OF FINAL STATE =====
+
+      // Verify all three branches exist and point to different snapshots
+      List<Row> allRefs =
+          spark
+              .sql("SELECT name, snapshot_id FROM " + tableName + ".refs ORDER BY name")
+              .collectAsList();
+      assertEquals(3, allRefs.size(), "Should have 3 branches: main, branchA, branchB");
+
+      // Verify snapshot relationships
+      List<Row> mainFinalSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .collectAsList();
+      List<Row> branchAFinalSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
+              .collectAsList();
+      List<Row> branchBFinalSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
+              .collectAsList();
+
+      long finalSnapshotX = mainFinalSnapshots.get(0).getLong(0);
+      long finalSnapshotY = branchAFinalSnapshots.get(0).getLong(0);
+      long finalSnapshotZ = branchBFinalSnapshots.get(0).getLong(0);
+
+      assertEquals(snapshotX, finalSnapshotX, "Main should still point to snapshotX");
+      assertEquals(snapshotY, finalSnapshotY, "BranchA should still point to snapshotY");
+      assertEquals(snapshotZ, finalSnapshotZ, "BranchB should point to snapshotZ");
+
+      // Verify data isolation between branches
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 1 row");
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
+              .collectAsList()
+              .size(),
+          "BranchA should have 2 rows");
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchB'")
+              .collectAsList()
+              .size(),
+          "BranchB should have 3 rows");
+
+      // Verify content
+      List<Row> mainData =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals("main_data", mainData.get(0).getString(0), "Main should contain main_data");
+
+      List<Row> branchAData =
+          spark
+              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchA' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "branchA_data", branchAData.get(0).getString(0), "BranchA should contain branchA_data");
+      assertEquals(
+          "main_data", branchAData.get(1).getString(0), "BranchA should contain main_data");
+
+      List<Row> branchBData =
+          spark
+              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchB' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "branchA_data", branchBData.get(0).getString(0), "BranchB should contain branchA_data");
+      assertEquals(
+          "branchB_data", branchBData.get(1).getString(0), "BranchB should contain branchB_data");
+      assertEquals(
+          "main_data", branchBData.get(2).getString(0), "BranchB should contain main_data");
+
+      // Verify parent-child relationships in snapshot metadata
+      List<Row> allSnapshots =
+          spark
+              .sql(
+                  "SELECT snapshot_id, parent_id FROM "
+                      + tableName
+                      + ".snapshots ORDER BY committed_at")
+              .collectAsList();
+      assertTrue(allSnapshots.size() >= 3, "Should have at least 3 snapshots");
+
+      // Clean up WAP configuration
+      spark.conf().unset("spark.wap.branch");
+    }
+  }
+
+  @Test
+  public void testRegularCommitWithMultipleBranches() throws Exception {
+    try (SparkSession spark = getSparkSession()) {
+      String tableId = "regular_multi_branch_test_" + System.currentTimeMillis();
+      String tableName = "openhouse.d1." + tableId;
+
+      // Create table (no WAP needed for this test)
+      spark.sql("CREATE TABLE " + tableName + " (name string)");
+
+      // Step 1: Start with main at snapshotX
+      spark.sql("INSERT INTO " + tableName + " VALUES ('main_data')");
+
+      // Verify main branch exists and get its snapshot
+      List<Row> mainSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .collectAsList();
+      assertEquals(1, mainSnapshots.size(), "Main branch should exist");
+      long snapshotX = mainSnapshots.get(0).getLong(0);
+      System.out.println("SnapshotX (main): " + snapshotX);
+
+      // Step 2: Create branchA from main → branchA also points to snapshotX
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchA");
+
+      // Verify branchA points to same snapshot as main
+      List<Row> branchASnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
+              .collectAsList();
+      assertEquals(1, branchASnapshots.size(), "BranchA should exist");
+      long branchASnapshotAfterCreation = branchASnapshots.get(0).getLong(0);
+      assertEquals(
+          snapshotX, branchASnapshotAfterCreation, "BranchA should point to same snapshot as main");
+
+      // Step 3: Commit some data on branchA → branchA now points to snapshotY (child of snapshotX)
+      spark.sql("INSERT INTO " + tableName + ".branch_branchA VALUES ('branchA_data')");
+
+      // Verify branchA now points to snapshotY (child of snapshotX)
+      List<Row> branchASnapshotsAfterCommit =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
+              .collectAsList();
+      long snapshotY = branchASnapshotsAfterCommit.get(0).getLong(0);
+      assertNotEquals(
+          snapshotX, snapshotY, "BranchA should now point to a new snapshot (snapshotY)");
+      System.out.println("SnapshotY (branchA after commit): " + snapshotY);
+
+      // Verify branchA has both main_data and branchA_data
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
+              .collectAsList()
+              .size(),
+          "BranchA should have 2 rows after commit");
+
+      // Verify main still points to snapshotX and has only main_data
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should still have 1 row");
+
+      // Step 4: Create branchB from branchA → branchB points to snapshotY
+      // First create the branch, then set it to point to the same snapshot as branchA
+      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchB");
+      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'branchB', 'branchA')");
+
+      // Verify branchB points to snapshotY
+      List<Row> branchBSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
+              .collectAsList();
+      long branchBSnapshotAfterCreation = branchBSnapshots.get(0).getLong(0);
+      assertEquals(
+          snapshotY,
+          branchBSnapshotAfterCreation,
+          "BranchB should point to snapshotY (same as branchA)");
+
+      // Step 5: Make a commit on branchB → branchB now points to snapshotZ (child of snapshotY)
+      spark.sql("INSERT INTO " + tableName + ".branch_branchB VALUES ('branchB_data')");
+
+      // Verify branchB now points to snapshotZ
+      List<Row> branchBSnapshotsAfterCommit =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
+              .collectAsList();
+      long snapshotZ = branchBSnapshotsAfterCommit.get(0).getLong(0);
+      assertNotEquals(
+          snapshotY, snapshotZ, "BranchB should now point to a new snapshot (snapshotZ)");
+      System.out.println("SnapshotZ (branchB after commit): " + snapshotZ);
+
+      // ===== VERIFICATION OF FINAL STATE =====
+
+      // Verify all three branches exist and point to different snapshots
+      List<Row> allRefs =
+          spark
+              .sql("SELECT name, snapshot_id FROM " + tableName + ".refs ORDER BY name")
+              .collectAsList();
+      assertEquals(3, allRefs.size(), "Should have 3 branches: main, branchA, branchB");
+
+      // Verify snapshot relationships
+      List<Row> mainFinalSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
+              .collectAsList();
+      List<Row> branchAFinalSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
+              .collectAsList();
+      List<Row> branchBFinalSnapshots =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
+              .collectAsList();
+
+      long finalSnapshotX = mainFinalSnapshots.get(0).getLong(0);
+      long finalSnapshotY = branchAFinalSnapshots.get(0).getLong(0);
+      long finalSnapshotZ = branchBFinalSnapshots.get(0).getLong(0);
+
+      assertEquals(snapshotX, finalSnapshotX, "Main should still point to snapshotX");
+      assertEquals(snapshotY, finalSnapshotY, "BranchA should still point to snapshotY");
+      assertEquals(snapshotZ, finalSnapshotZ, "BranchB should point to snapshotZ");
+
+      // Verify all snapshots are different
+      assertNotEquals(
+          finalSnapshotX, finalSnapshotY, "SnapshotX and snapshotY should be different");
+      assertNotEquals(
+          finalSnapshotY, finalSnapshotZ, "SnapshotY and snapshotZ should be different");
+      assertNotEquals(
+          finalSnapshotX, finalSnapshotZ, "SnapshotX and snapshotZ should be different");
+
+      // Verify data isolation between branches
+      assertEquals(
+          1,
+          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
+          "Main branch should have 1 row");
+      assertEquals(
+          2,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
+              .collectAsList()
+              .size(),
+          "BranchA should have 2 rows");
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchB'")
+              .collectAsList()
+              .size(),
+          "BranchB should have 3 rows");
+
+      // Verify content
+      List<Row> mainData =
+          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
+      assertEquals("main_data", mainData.get(0).getString(0), "Main should contain main_data");
+
+      List<Row> branchAData =
+          spark
+              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchA' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "branchA_data", branchAData.get(0).getString(0), "BranchA should contain branchA_data");
+      assertEquals(
+          "main_data", branchAData.get(1).getString(0), "BranchA should contain main_data");
+
+      List<Row> branchBData =
+          spark
+              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchB' ORDER BY name")
+              .collectAsList();
+      assertEquals(
+          "branchA_data", branchBData.get(0).getString(0), "BranchB should contain branchA_data");
+      assertEquals(
+          "branchB_data", branchBData.get(1).getString(0), "BranchB should contain branchB_data");
+      assertEquals(
+          "main_data", branchBData.get(2).getString(0), "BranchB should contain main_data");
+
+      // ===== TEST THE SPECIFIC SCENARIO THAT WOULD HAVE BEEN AMBIGUOUS =====
+
+      // At this point, we have:
+      // - main points to snapshotX
+      // - branchA points to snapshotY
+      // - branchB points to snapshotZ
+      //
+      // If we were to commit a new snapshot as child of snapshotY, our fixed logic should work
+      // because only the explicitly targeted branch (via branch-specific insert syntax) should be
+      // considered
+
+      // Verify that we can still commit to branchA even though multiple branches exist
+      spark.sql("INSERT INTO " + tableName + ".branch_branchA VALUES ('additional_branchA_data')");
+
+      // Verify branchA advanced but branchB didn't
+      List<Row> branchAFinalSnapshots2 =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
+              .collectAsList();
+      List<Row> branchBFinalSnapshots2 =
+          spark
+              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
+              .collectAsList();
+
+      long finalSnapshotY2 = branchAFinalSnapshots2.get(0).getLong(0);
+      long finalSnapshotZ2 = branchBFinalSnapshots2.get(0).getLong(0);
+
+      assertNotEquals(snapshotY, finalSnapshotY2, "BranchA should have advanced to a new snapshot");
+      assertEquals(snapshotZ, finalSnapshotZ2, "BranchB should remain at the same snapshot");
+
+      // Verify data counts
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
+              .collectAsList()
+              .size(),
+          "BranchA should now have 3 rows");
+      assertEquals(
+          3,
+          spark
+              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchB'")
+              .collectAsList()
+              .size(),
+          "BranchB should still have 3 rows (unchanged)");
+    }
+  }
+
   // ===== CHERRY PICKING BETWEEN BRANCHES =====
 
   @Test

From 554a3c3ddb33ccf3ee74fe7cf2baba37683d8497 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Wed, 1 Oct 2025 18:34:48 -0700
Subject: [PATCH 06/35] refactoring for readability

---
 .../OpenHouseInternalTableOperations.java     | 472 +++++++++++-------
 .../OpenHouseInternalTableOperationsTest.java |   4 +-
 .../spark/catalogtest/BranchTestSpark3_5.java |   4 +-
 3 files changed, 307 insertions(+), 173 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index 5ed27c62b..d486d1b63 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -292,7 +292,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) {
         // Multi-branch support is now enabled with snapshot ID matching
 
         updatedMetadata =
-            maybeAppendSnapshots(updatedMetadata, appendedSnapshots, snapshotRefs, true);
+            applySnapshotOperations(updatedMetadata, appendedSnapshots, snapshotRefs, true);
         updatedMetadata = maybeDeleteSnapshots(updatedMetadata, deletedSnapshots);
       }
 
@@ -571,97 +571,120 @@ private String determineTargetBranch(
   }
 
   /**
-   * Determines the target branch for snapshot commits using explicit branch targeting information.
-   * The snapshotRefs parameter contains the explicit branch targeting from the client commit
-   * operation.
+   * Returns the single target branch when only one branch is explicitly specified. This is the most
+   * common case - client explicitly specified which branch to commit to.
    */
-  private String determineTargetBranch(
-      Map<String, SnapshotRef> snapshotRefs, List<Snapshot> newSnapshots, String defaultBranch) {
-    if (MapUtils.isEmpty(snapshotRefs)) {
-      return defaultBranch;
+  private String getSingleTargetBranch(Map<String, SnapshotRef> snapshotRefs) {
+    String targetBranch = snapshotRefs.keySet().iterator().next();
+    log.debug("Using explicit target branch from commit context: {}", targetBranch);
+    return targetBranch;
+  }
+
+  /**
+   * Finds branches that exactly match the given snapshot ID. Returns the single matching branch, or
+   * null if there are zero or multiple matches.
+   */
+  private String findExactSnapshotMatch(Map<String, SnapshotRef> snapshotRefs, long snapshotId) {
+    List<String> exactMatches = new ArrayList<>();
+    for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
+      String branchName = entry.getKey();
+      long branchSnapshotId = entry.getValue().snapshotId();
+
+      if (branchSnapshotId == snapshotId) {
+        exactMatches.add(branchName);
+      }
     }
 
-    // If there's only one branch in the refs, use that as the target
-    // This is the most common case - client explicitly specified which branch to commit to
-    if (snapshotRefs.size() == 1) {
-      String targetBranch = snapshotRefs.keySet().iterator().next();
-      log.debug("Using explicit target branch from commit context: {}", targetBranch);
+    if (exactMatches.size() == 1) {
+      String targetBranch = exactMatches.get(0);
+      log.info(
+          "Determined target branch '{}' by exact snapshot ID match within commit context: {}",
+          targetBranch,
+          snapshotId);
+      return targetBranch;
+    } else if (exactMatches.size() > 1) {
+      log.error("Multiple branches point to same snapshot {}: {}", snapshotId, exactMatches);
+      throw new IllegalStateException(
+          String.format(
+              "Multiple explicitly targeted branches point to the same snapshot %s: %s. "
+                  + "This indicates an invalid commit state.",
+              snapshotId, exactMatches));
+    }
+
+    // No exact match or zero matches
+    return null;
+  }
+
+  /**
+   * Finds branches that match parent-child relationship with the given snapshot. Returns the single
+   * matching branch, or null if there are zero or multiple matches.
+   */
+  private String findParentChildMatch(
+      Map<String, SnapshotRef> snapshotRefs, long parentSnapshotId, long childSnapshotId) {
+    List<String> parentMatches = new ArrayList<>();
+    for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
+      String branchName = entry.getKey();
+      long branchSnapshotId = entry.getValue().snapshotId();
+
+      if (branchSnapshotId == parentSnapshotId) {
+        parentMatches.add(branchName);
+        log.info("Branch '{}' matches parent snapshot {}", branchName, parentSnapshotId);
+      }
+    }
+
+    if (parentMatches.size() == 1) {
+      String targetBranch = parentMatches.get(0);
+      log.info(
+          "Determined target branch '{}' by parent-child relationship within commit context: new snapshot {} is child of branch snapshot {}",
+          targetBranch,
+          childSnapshotId,
+          parentSnapshotId);
       return targetBranch;
+    } else if (parentMatches.size() > 1) {
+      log.error(
+          "Multiple branches point to parent snapshot {}: {}", parentSnapshotId, parentMatches);
+      throw new IllegalStateException(
+          String.format(
+              "Multiple explicitly targeted branches point to parent snapshot %s: %s. "
+                  + "Cannot determine which branch should receive child snapshot %s. "
+                  + "This indicates ambiguous commit targeting - the client should specify a single target branch.",
+              parentSnapshotId, parentMatches, childSnapshotId));
     }
 
-    // Multiple branches specified in commit - need to determine which one based on snapshot
-    // relationships
+    // No parent match or zero matches - could happen in cherry-pick or other non-linear operations
+    return null;
+  }
+
+  /**
+   * Determines target branch when multiple branches are specified by analyzing snapshot
+   * relationships.
+   */
+  private String determineTargetFromMultipleBranches(
+      Map<String, SnapshotRef> snapshotRefs, List<Snapshot> newSnapshots) {
+
     log.info(
         "Multiple branches in snapshotRefs ({}), analyzing snapshot relationships",
         snapshotRefs.size());
+
     if (!newSnapshots.isEmpty()) {
       Snapshot latestSnapshot = newSnapshots.get(newSnapshots.size() - 1);
       long latestSnapshotId = latestSnapshot.snapshotId();
       log.info("Latest snapshot ID: {}", latestSnapshotId);
 
-      // First try: exact snapshot ID match within the explicitly targeted branches
-      List<String> exactMatches = new ArrayList<>();
-      for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
-        String branchName = entry.getKey();
-        long branchSnapshotId = entry.getValue().snapshotId();
-
-        if (branchSnapshotId == latestSnapshotId) {
-          exactMatches.add(branchName);
-        }
+      // First try: exact snapshot ID match
+      String exactMatch = findExactSnapshotMatch(snapshotRefs, latestSnapshotId);
+      if (exactMatch != null) {
+        return exactMatch;
       }
 
-      if (exactMatches.size() == 1) {
-        String targetBranch = exactMatches.get(0);
-        log.info(
-            "Determined target branch '{}' by exact snapshot ID match within commit context: {}",
-            targetBranch,
-            latestSnapshotId);
-        return targetBranch;
-      } else if (exactMatches.size() > 1) {
-        log.error(
-            "Multiple branches point to same snapshot {}: {}", latestSnapshotId, exactMatches);
-        throw new IllegalStateException(
-            String.format(
-                "Multiple explicitly targeted branches point to the same snapshot %s: %s. "
-                    + "This indicates an invalid commit state.",
-                latestSnapshotId, exactMatches));
-      }
-
-      // Second try: parent-child relationship match within the explicitly targeted branches
+      // Second try: parent-child relationship match
       Long parentSnapshotId = latestSnapshot.parentId();
       log.info("Parent snapshot ID: {}", parentSnapshotId);
       if (parentSnapshotId != null) {
-        List<String> parentMatches = new ArrayList<>();
-        for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
-          String branchName = entry.getKey();
-          long branchSnapshotId = entry.getValue().snapshotId();
-
-          if (branchSnapshotId == parentSnapshotId) {
-            parentMatches.add(branchName);
-            log.info("Branch '{}' matches parent snapshot {}", branchName, parentSnapshotId);
-          }
-        }
-
-        if (parentMatches.size() == 1) {
-          String targetBranch = parentMatches.get(0);
-          log.info(
-              "Determined target branch '{}' by parent-child relationship within commit context: new snapshot {} is child of branch snapshot {}",
-              targetBranch,
-              latestSnapshotId,
-              parentSnapshotId);
-          return targetBranch;
-        } else if (parentMatches.size() > 1) {
-          log.error(
-              "Multiple branches point to parent snapshot {}: {}", parentSnapshotId, parentMatches);
-          throw new IllegalStateException(
-              String.format(
-                  "Multiple explicitly targeted branches point to parent snapshot %s: %s. "
-                      + "Cannot determine which branch should receive child snapshot %s. "
-                      + "This indicates ambiguous commit targeting - the client should specify a single target branch.",
-                  parentSnapshotId, parentMatches, latestSnapshotId));
+        String parentMatch = findParentChildMatch(snapshotRefs, parentSnapshotId, latestSnapshotId);
+        if (parentMatch != null) {
+          return parentMatch;
         }
-        // If parentMatches.size() == 0, none of the explicitly targeted branches are parents
-        // This could happen in cherry-pick or other non-linear operations
       }
     }
 
@@ -680,19 +703,197 @@ private String determineTargetBranch(
             snapshotRefs.keySet()));
   }
 
-  public TableMetadata maybeAppendSnapshots(
-      TableMetadata metadata,
-      List<Snapshot> snapshotsToAppend,
+  /**
+   * Determines the target branch for snapshot commits using explicit branch targeting information.
+   * The snapshotRefs parameter contains the explicit branch targeting from the client commit
+   * operation.
+   */
+  private String determineTargetBranch(
+      Map<String, SnapshotRef> snapshotRefs, List<Snapshot> newSnapshots, String defaultBranch) {
+
+    // Handle simple case: no explicit branch targeting
+    if (MapUtils.isEmpty(snapshotRefs)) {
+      return defaultBranch;
+    }
+
+    // Handle simple case: single branch explicitly specified
+    if (snapshotRefs.size() == 1) {
+      return getSingleTargetBranch(snapshotRefs);
+    }
+
+    // Handle complex case: multiple branches with snapshot relationship analysis
+    return determineTargetFromMultipleBranches(snapshotRefs, newSnapshots);
+  }
+
+  /**
+   * Applies a regular (non-WAP, non-cherry-picked) snapshot by assigning it to a branch or staging
+   * it.
+   */
+  private void applyRegularSnapshot(
+      Snapshot snapshot,
       Map<String, SnapshotRef> snapshotRefs,
-      boolean recordAction) {
-    TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
+      TableMetadata.Builder metadataBuilder) {
+
+    if (MapUtils.isNotEmpty(snapshotRefs)) {
+      // We have explicit branch information, use it to assign snapshot
+      String targetBranch =
+          determineTargetBranch(
+              snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
+      metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
+    } else {
+      // No branch information provided - add snapshot without assigning to any branch
+      // The snapshot will exist in metadata but won't be the HEAD of any branch
+      // Branch refs can be updated later via separate calls to applySnapshotOperations with
+      // snapshotRefs
+      metadataBuilder.addSnapshot(snapshot);
+    }
+  }
+
+  /** Applies a WAP staged snapshot - not committed to any branch. */
+  private void applyStagedSnapshot(Snapshot snapshot, TableMetadata.Builder metadataBuilder) {
+    metadataBuilder.addSnapshot(snapshot);
+  }
+
+  /** Applies a cherry-picked snapshot - non fast-forward cherry pick. */
+  private void applyCherryPickedSnapshot(
+      Snapshot snapshot,
+      Map<String, SnapshotRef> snapshotRefs,
+      TableMetadata.Builder metadataBuilder) {
+    String targetBranch =
+        determineTargetBranch(
+            snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
+    metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
+  }
+
+  /** Result of categorizing and applying snapshots. */
+  private static class SnapshotOperationResult {
+    final List<String> appendedSnapshots;
+    final List<String> stagedSnapshots;
+    final List<String> cherryPickedSnapshots;
+
+    SnapshotOperationResult(
+        List<String> appendedSnapshots,
+        List<String> stagedSnapshots,
+        List<String> cherryPickedSnapshots) {
+      this.appendedSnapshots = new ArrayList<>(appendedSnapshots);
+      this.stagedSnapshots = new ArrayList<>(stagedSnapshots);
+      this.cherryPickedSnapshots = new ArrayList<>(cherryPickedSnapshots);
+    }
+  }
+
+  /** Categorizes snapshots by type and applies them to the metadata builder. */
+  private SnapshotOperationResult categorizeAndApplySnapshots(
+      List<Snapshot> snapshots,
+      Map<String, SnapshotRef> snapshotRefs,
+      TableMetadata.Builder metadataBuilder) {
+
     List<String> appendedSnapshots = new ArrayList<>();
     List<String> stagedSnapshots = new ArrayList<>();
     List<String> cherryPickedSnapshots = new ArrayList<>();
 
+    for (Snapshot snapshot : snapshots) {
+      snapshotInspector.validateSnapshot(snapshot);
+
+      if (snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)) {
+        applyStagedSnapshot(snapshot, metadataBuilder);
+        stagedSnapshots.add(String.valueOf(snapshot.snapshotId()));
+
+      } else if (snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
+        applyCherryPickedSnapshot(snapshot, snapshotRefs, metadataBuilder);
+        appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
+        cherryPickedSnapshots.add(
+            String.valueOf(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)));
+
+      } else {
+        applyRegularSnapshot(snapshot, snapshotRefs, metadataBuilder);
+        appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
+      }
+    }
+
+    return new SnapshotOperationResult(appendedSnapshots, stagedSnapshots, cherryPickedSnapshots);
+  }
+
+  /**
+   * Updates branch references for fast-forward cherry-pick or rollback operations. Returns list of
+   * cherry-picked snapshot IDs.
+   */
+  private List<String> updateBranchReferences(
+      TableMetadata metadata,
+      Map<String, SnapshotRef> snapshotRefs,
+      TableMetadata.Builder metadataBuilder) {
+
+    List<String> cherryPickedSnapshots = new ArrayList<>();
+
+    for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
+      String branchName = entry.getKey();
+      long newSnapshotId = entry.getValue().snapshotId();
+
+      if (needsBranchUpdate(metadata, branchName, newSnapshotId)) {
+        metadataBuilder.setBranchSnapshot(newSnapshotId, branchName);
+        cherryPickedSnapshots.add(String.valueOf(newSnapshotId));
+      }
+    }
+
+    return cherryPickedSnapshots;
+  }
+
+  /** Checks if a branch needs to be updated based on current refs and new snapshot ID. */
+  private boolean needsBranchUpdate(TableMetadata metadata, String branchName, long newSnapshotId) {
+    if (MapUtils.isEmpty(metadata.refs())) {
+      // No refs exist yet, this is a new branch
+      return true;
+    }
+
+    SnapshotRef currentRef = metadata.refs().get(branchName);
+    return currentRef == null || currentRef.snapshotId() != newSnapshotId;
+  }
+
+  /** Records snapshot actions in table properties and reports metrics. */
+  private void recordSnapshotActions(
+      TableMetadata metadata,
+      TableMetadata.Builder metadataBuilder,
+      List<String> appendedSnapshots,
+      List<String> stagedSnapshots,
+      List<String> cherryPickedSnapshots) {
+
+    Map<String, String> updatedProperties = new HashMap<>(metadata.properties());
+
+    if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
+      updatedProperties.put(
+          getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
+          appendedSnapshots.stream().collect(Collectors.joining(",")));
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedSnapshots.size());
+    }
+
+    if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
+      updatedProperties.put(
+          getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
+          stagedSnapshots.stream().collect(Collectors.joining(",")));
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
+    }
+
+    if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
+      updatedProperties.put(
+          getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
+          cherryPickedSnapshots.stream().collect(Collectors.joining(",")));
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, cherryPickedSnapshots.size());
+    }
+
+    metadataBuilder.setProperties(updatedProperties);
+  }
+
+  public TableMetadata applySnapshotOperations(
+      TableMetadata metadata,
+      List<Snapshot> snapshots,
+      Map<String, SnapshotRef> snapshotRefs,
+      boolean recordAction) {
+    TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
+
     /**
-     * First check if there are new snapshots to be appended to current TableMetadata. If yes,
-     * following are the cases to be handled:
+     * Apply snapshots to current TableMetadata. The following cases are handled:
      *
      * <p>[1] A regular (non-wap) snapshot is being added to any branch.
      *
@@ -704,99 +905,32 @@ public TableMetadata maybeAppendSnapshots(
      * created on. Recognized by SOURCE_SNAPSHOT_ID_PROP. This case is called non-fast forward
      * cherry pick.
      *
-     * <p>In case no new snapshots are to be appended to current TableMetadata, there could be a
-     * cherrypick of a staged (wap) snapshot on top of the current snapshot in any branch which is
-     * the same as the base snapshot the staged (wap) snapshot was created on. This case is called
-     * fast forward cherry pick.
+     * <p>Additionally, branch ref updates can occur independently for fast-forward cherry-pick or
+     * rollback operations where existing snapshots are assigned to branches.
      */
-    if (CollectionUtils.isNotEmpty(snapshotsToAppend)) {
-      for (Snapshot snapshot : snapshotsToAppend) {
-        snapshotInspector.validateSnapshot(snapshot);
-        if (snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)) {
-          // a stage only snapshot using wap.id - not committed to any branch
-          metadataBuilder.addSnapshot(snapshot);
-          stagedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-        } else if (snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
-          // a snapshot created on a non fast-forward cherry-pick snapshot
-          // Determine target branch from snapshotRefs or default to MAIN_BRANCH
-          // Pass only the current snapshot being processed, not the entire list
-          String targetBranch =
-              determineTargetBranch(
-                  snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
-          metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
-          appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-          cherryPickedSnapshots.add(
-              String.valueOf(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)));
-        } else {
-          // a regular snapshot - assign to appropriate branch using snapshotRefs context
-          if (MapUtils.isNotEmpty(snapshotRefs)) {
-            // We have explicit branch information, use it to assign snapshot
-            // Pass only the current snapshot being processed, not the entire list
-            String targetBranch =
-                determineTargetBranch(
-                    snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
-            metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
-          } else {
-            // No explicit branch refs - treat as staged snapshot
-            // This maintains isolation until refs are explicitly updated
-            metadataBuilder.addSnapshot(snapshot);
-          }
-          appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-        }
-      }
-    }
+    SnapshotOperationResult snapshotResult =
+        CollectionUtils.isNotEmpty(snapshots)
+            ? categorizeAndApplySnapshots(snapshots, snapshotRefs, metadataBuilder)
+            : new SnapshotOperationResult(
+                Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
+
+    // Handle ref updates (this can happen independently of snapshot processing operations)
+    List<String> refUpdateResults =
+        MapUtils.isNotEmpty(snapshotRefs)
+            ? updateBranchReferences(metadata, snapshotRefs, metadataBuilder)
+            : Collections.emptyList();
 
-    // Handle ref updates (this can happen independently of snapshot append operations)
-    if (MapUtils.isNotEmpty(snapshotRefs)) {
-      // Handle ref updates for all branches (fast-forward cherry-pick or rollback operations)
-      for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
-        String branchName = entry.getKey();
-        long newSnapshotId = entry.getValue().snapshotId();
-
-        // Check if this is an actual update for this branch
-        boolean isUpdate = false;
-        if (MapUtils.isEmpty(metadata.refs())) {
-          // No refs exist yet, this is a new branch
-          isUpdate = true;
-        } else {
-          SnapshotRef currentRef = metadata.refs().get(branchName);
-          if (currentRef == null || currentRef.snapshotId() != newSnapshotId) {
-            // Branch doesn't exist or snapshot is different
-            isUpdate = true;
-          }
-        }
-
-        if (isUpdate) {
-          metadataBuilder.setBranchSnapshot(newSnapshotId, branchName);
-          cherryPickedSnapshots.add(String.valueOf(newSnapshotId));
-        }
-      }
-    }
     if (recordAction) {
-      Map<String, String> updatedProperties = new HashMap<>(metadata.properties());
-      if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
-        updatedProperties.put(
-            getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-            appendedSnapshots.stream().collect(Collectors.joining(",")));
-        metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedSnapshots.size());
-      }
-      if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
-        updatedProperties.put(
-            getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-            stagedSnapshots.stream().collect(Collectors.joining(",")));
-        metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
-      }
-      if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
-        updatedProperties.put(
-            getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-            cherryPickedSnapshots.stream().collect(Collectors.joining(",")));
-        metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
-            cherryPickedSnapshots.size());
-      }
-      metadataBuilder.setProperties(updatedProperties);
+      // Combine cherry-picked snapshots from both operations
+      List<String> allCherryPickedSnapshots = new ArrayList<>(snapshotResult.cherryPickedSnapshots);
+      allCherryPickedSnapshots.addAll(refUpdateResults);
+
+      recordSnapshotActions(
+          metadata,
+          metadataBuilder,
+          snapshotResult.appendedSnapshots,
+          snapshotResult.stagedSnapshots,
+          allCherryPickedSnapshots);
     }
     return metadataBuilder.build();
   }
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index bcec8377b..125966bf5 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -599,12 +599,12 @@ void testAppendSnapshotsWithOldSnapshots() throws IOException {
     Assertions.assertThrows(
         IllegalArgumentException.class,
         () ->
-            openHouseInternalTableOperations.maybeAppendSnapshots(
+            openHouseInternalTableOperations.applySnapshotOperations(
                 metadata, snapshots, ImmutableMap.of(), false));
     // the latest snapshots have larger timestamp than the previous metadata timestamp, so it should
     // pass the validation
     snapshots.addAll(IcebergTestUtil.getFutureSnapshots());
-    openHouseInternalTableOperations.maybeAppendSnapshots(
+    openHouseInternalTableOperations.applySnapshotOperations(
         metadata, snapshots, ImmutableMap.of(), false);
   }
 
diff --git a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
index c8a0f3e03..488750620 100644
--- a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
+++ b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
@@ -17,7 +17,7 @@
 
 /**
  * Comprehensive tests for multi-branch WAP operations in Spark 3.5. Tests validate the enhanced
- * maybeAppendSnapshots functionality that supports: - Non-main branch operations (add/expire
+ * applySnapshotOperations functionality that supports: - Non-main branch operations (add/expire
  * snapshots from any branch) - WAP.id staging with multi-branch support - Cherry picking between
  * any branches - Fast forward merges for all branches - Backward compatibility with main-only
  * workflows - Forward compatibility for future wap.branch features
@@ -1311,7 +1311,7 @@ public void testCherryPickToMainWithFeatureBranch() throws Exception {
       spark.conf().unset("spark.wap.id");
       spark.sql("INSERT INTO " + tableName + " VALUES ('main.advance')");
 
-      // Cherry-pick WAP to main branch (this tests our enhanced maybeAppendSnapshots)
+      // Cherry-pick WAP to main branch (this tests our enhanced applySnapshotOperations)
       // Main should have 2 rows now (main.base + main.advance)
       assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
       spark.sql(

From ea5ff0ee93f66525dd40bb5e61606ebc1eea78d7 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Wed, 1 Oct 2025 18:50:06 -0700
Subject: [PATCH 07/35] fixed edge case

---
 .../OpenHouseInternalTableOperations.java     | 95 +++++++++++++------
 1 file changed, 65 insertions(+), 30 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index d486d1b63..4f4f6871c 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -779,6 +779,11 @@ private static class SnapshotOperationResult {
       this.stagedSnapshots = new ArrayList<>(stagedSnapshots);
       this.cherryPickedSnapshots = new ArrayList<>(cherryPickedSnapshots);
     }
+
+    static SnapshotOperationResult empty() {
+      return new SnapshotOperationResult(
+          Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
+    }
   }
 
   /** Categorizes snapshots by type and applies them to the metadata builder. */
@@ -814,29 +819,53 @@ private SnapshotOperationResult categorizeAndApplySnapshots(
   }
 
   /**
-   * Updates branch references for fast-forward cherry-pick or rollback operations. Returns list of
-   * cherry-picked snapshot IDs.
+   * Updates branch references to point to specific snapshots.
+   *
+   * <p>This handles two scenarios:
+   *
+   * <ul>
+   *   <li>Standalone ref operations: Moving branches to existing snapshots (fast-forward/rollback)
+   *   <li>Guided snapshot assignment: Using refs to guide where new snapshots should be assigned
+   * </ul>
+   *
+   * @param recordAsCherryPicks whether to record ref updates as cherry-pick operations
+   * @return list of snapshot IDs that were cherry-picked (only when recordAsCherryPicks is true)
    */
   private List<String> updateBranchReferences(
       TableMetadata metadata,
       Map<String, SnapshotRef> snapshotRefs,
-      TableMetadata.Builder metadataBuilder) {
+      TableMetadata.Builder metadataBuilder,
+      boolean recordAsCherryPicks) {
 
     List<String> cherryPickedSnapshots = new ArrayList<>();
 
     for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
       String branchName = entry.getKey();
-      long newSnapshotId = entry.getValue().snapshotId();
+      long targetSnapshotId = entry.getValue().snapshotId();
+
+      if (needsBranchUpdate(metadata, branchName, targetSnapshotId)) {
+        metadataBuilder.setBranchSnapshot(targetSnapshotId, branchName);
 
-      if (needsBranchUpdate(metadata, branchName, newSnapshotId)) {
-        metadataBuilder.setBranchSnapshot(newSnapshotId, branchName);
-        cherryPickedSnapshots.add(String.valueOf(newSnapshotId));
+        if (recordAsCherryPicks) {
+          cherryPickedSnapshots.add(String.valueOf(targetSnapshotId));
+        }
       }
     }
 
     return cherryPickedSnapshots;
   }
 
+  /**
+   * Combines cherry-picked snapshot IDs from both snapshot processing and standalone ref
+   * operations.
+   */
+  private List<String> combineCherryPickedSnapshots(
+      List<String> fromSnapshotProcessing, List<String> fromStandaloneRefUpdates) {
+    List<String> allCherryPicks = new ArrayList<>(fromSnapshotProcessing);
+    allCherryPicks.addAll(fromStandaloneRefUpdates);
+    return allCherryPicks;
+  }
+
   /** Checks if a branch needs to be updated based on current refs and new snapshot ID. */
   private boolean needsBranchUpdate(TableMetadata metadata, String branchName, long newSnapshotId) {
     if (MapUtils.isEmpty(metadata.refs())) {
@@ -893,43 +922,49 @@ public TableMetadata applySnapshotOperations(
     TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
 
     /**
-     * Apply snapshots to current TableMetadata. The following cases are handled:
+     * Process snapshots and branch reference updates. Two main operation types:
      *
-     * <p>[1] A regular (non-wap) snapshot is being added to any branch.
+     * <p><b>Snapshot Processing:</b> When snapshots list is non-empty:
      *
-     * <p>[2] A staged (wap) snapshot is being created on top of current snapshot as its base.
-     * Recognized by STAGED_WAP_ID_PROP. These are stage-only and not committed to any branch.
+     * <ul>
+     *   <li>[1] Regular snapshots - committed to branches (if snapshotRefs provided) or staged
+     *   <li>[2] WAP staged snapshots (STAGED_WAP_ID_PROP) - staged but not committed to branches
+     *   <li>[3] Cherry-picked snapshots (SOURCE_SNAPSHOT_ID_PROP) - committed to target branches
+     * </ul>
      *
-     * <p>[3] A staged (wap) snapshot is being cherry picked to any branch wherein current snapshot
-     * in the target branch is not the same as the base snapshot the staged (wap) snapshot was
-     * created on. Recognized by SOURCE_SNAPSHOT_ID_PROP. This case is called non-fast forward
-     * cherry pick.
+     * <p><b>Branch Reference Updates:</b> When snapshotRefs is non-empty:
      *
-     * <p>Additionally, branch ref updates can occur independently for fast-forward cherry-pick or
-     * rollback operations where existing snapshots are assigned to branches.
+     * <ul>
+     *   <li>If snapshots are also provided: snapshotRefs guides branch assignment during processing
+     *   <li>If only snapshotRefs provided: standalone fast-forward/rollback operations on existing
+     *       snapshots
+     * </ul>
      */
-    SnapshotOperationResult snapshotResult =
+    SnapshotOperationResult snapshotProcessingResults =
         CollectionUtils.isNotEmpty(snapshots)
             ? categorizeAndApplySnapshots(snapshots, snapshotRefs, metadataBuilder)
-            : new SnapshotOperationResult(
-                Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
+            : SnapshotOperationResult.empty();
 
-    // Handle ref updates (this can happen independently of snapshot processing operations)
-    List<String> refUpdateResults =
-        MapUtils.isNotEmpty(snapshotRefs)
-            ? updateBranchReferences(metadata, snapshotRefs, metadataBuilder)
-            : Collections.emptyList();
+    // Update branch references (for standalone fast-forward/rollback operations)
+    List<String> standaloneRefCherryPicks = Collections.emptyList();
+    if (MapUtils.isNotEmpty(snapshotRefs)) {
+      boolean recordRefUpdatesAsCherryPicks = CollectionUtils.isEmpty(snapshots);
+      standaloneRefCherryPicks =
+          updateBranchReferences(
+              metadata, snapshotRefs, metadataBuilder, recordRefUpdatesAsCherryPicks);
+    }
 
     if (recordAction) {
-      // Combine cherry-picked snapshots from both operations
-      List<String> allCherryPickedSnapshots = new ArrayList<>(snapshotResult.cherryPickedSnapshots);
-      allCherryPickedSnapshots.addAll(refUpdateResults);
+      // Combine cherry-picked snapshots from both snapshot processing and standalone ref updates
+      List<String> allCherryPickedSnapshots =
+          combineCherryPickedSnapshots(
+              snapshotProcessingResults.cherryPickedSnapshots, standaloneRefCherryPicks);
 
       recordSnapshotActions(
           metadata,
           metadataBuilder,
-          snapshotResult.appendedSnapshots,
-          snapshotResult.stagedSnapshots,
+          snapshotProcessingResults.appendedSnapshots,
+          snapshotProcessingResults.stagedSnapshots,
           allCherryPickedSnapshots);
     }
     return metadataBuilder.build();

From 4fc379215db713fc370865c4c9d6bee6cc2e71f1 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Thu, 2 Oct 2025 12:49:52 -0700
Subject: [PATCH 08/35] refactoring to make more simple

---
 .../OpenHouseInternalTableOperations.java     | 571 +++++++-----------
 1 file changed, 229 insertions(+), 342 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index 4f4f6871c..19dee8435 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -558,301 +558,22 @@ public TableMetadata maybeDeleteSnapshots(
     return result;
   }
 
-  /**
-   * Determines the target branch for a snapshot commit based on the provided snapshotRefs.
-   *
-   * @param snapshotRefs map of branch names to snapshot references
-   * @param defaultBranch default branch to use if no specific branch can be determined
-   * @return target branch name for the snapshot commit
-   */
-  private String determineTargetBranch(
-      Map<String, SnapshotRef> snapshotRefs, String defaultBranch) {
-    return determineTargetBranch(snapshotRefs, Collections.emptyList(), defaultBranch);
-  }
-
-  /**
-   * Returns the single target branch when only one branch is explicitly specified. This is the most
-   * common case - client explicitly specified which branch to commit to.
-   */
-  private String getSingleTargetBranch(Map<String, SnapshotRef> snapshotRefs) {
-    String targetBranch = snapshotRefs.keySet().iterator().next();
-    log.debug("Using explicit target branch from commit context: {}", targetBranch);
-    return targetBranch;
-  }
-
-  /**
-   * Finds branches that exactly match the given snapshot ID. Returns the single matching branch, or
-   * null if there are zero or multiple matches.
-   */
-  private String findExactSnapshotMatch(Map<String, SnapshotRef> snapshotRefs, long snapshotId) {
-    List<String> exactMatches = new ArrayList<>();
-    for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
-      String branchName = entry.getKey();
-      long branchSnapshotId = entry.getValue().snapshotId();
-
-      if (branchSnapshotId == snapshotId) {
-        exactMatches.add(branchName);
-      }
-    }
-
-    if (exactMatches.size() == 1) {
-      String targetBranch = exactMatches.get(0);
-      log.info(
-          "Determined target branch '{}' by exact snapshot ID match within commit context: {}",
-          targetBranch,
-          snapshotId);
-      return targetBranch;
-    } else if (exactMatches.size() > 1) {
-      log.error("Multiple branches point to same snapshot {}: {}", snapshotId, exactMatches);
-      throw new IllegalStateException(
-          String.format(
-              "Multiple explicitly targeted branches point to the same snapshot %s: %s. "
-                  + "This indicates an invalid commit state.",
-              snapshotId, exactMatches));
-    }
-
-    // No exact match or zero matches
-    return null;
-  }
-
-  /**
-   * Finds branches that match parent-child relationship with the given snapshot. Returns the single
-   * matching branch, or null if there are zero or multiple matches.
-   */
-  private String findParentChildMatch(
-      Map<String, SnapshotRef> snapshotRefs, long parentSnapshotId, long childSnapshotId) {
-    List<String> parentMatches = new ArrayList<>();
-    for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
-      String branchName = entry.getKey();
-      long branchSnapshotId = entry.getValue().snapshotId();
-
-      if (branchSnapshotId == parentSnapshotId) {
-        parentMatches.add(branchName);
-        log.info("Branch '{}' matches parent snapshot {}", branchName, parentSnapshotId);
-      }
-    }
-
-    if (parentMatches.size() == 1) {
-      String targetBranch = parentMatches.get(0);
-      log.info(
-          "Determined target branch '{}' by parent-child relationship within commit context: new snapshot {} is child of branch snapshot {}",
-          targetBranch,
-          childSnapshotId,
-          parentSnapshotId);
-      return targetBranch;
-    } else if (parentMatches.size() > 1) {
-      log.error(
-          "Multiple branches point to parent snapshot {}: {}", parentSnapshotId, parentMatches);
-      throw new IllegalStateException(
-          String.format(
-              "Multiple explicitly targeted branches point to parent snapshot %s: %s. "
-                  + "Cannot determine which branch should receive child snapshot %s. "
-                  + "This indicates ambiguous commit targeting - the client should specify a single target branch.",
-              parentSnapshotId, parentMatches, childSnapshotId));
-    }
+  /** Represents the semantic difference between current server state and client-desired state. */
+  private static class StateDiff {
+    final List<Snapshot> newSnapshots;
+    final Map<String, Long> branchUpdates; // branch -> snapshotId
+    final Map<String, Snapshot> snapshotLookup; // snapshotId -> Snapshot for efficiency
 
-    // No parent match or zero matches - could happen in cherry-pick or other non-linear operations
-    return null;
-  }
-
-  /**
-   * Determines target branch when multiple branches are specified by analyzing snapshot
-   * relationships.
-   */
-  private String determineTargetFromMultipleBranches(
-      Map<String, SnapshotRef> snapshotRefs, List<Snapshot> newSnapshots) {
-
-    log.info(
-        "Multiple branches in snapshotRefs ({}), analyzing snapshot relationships",
-        snapshotRefs.size());
-
-    if (!newSnapshots.isEmpty()) {
-      Snapshot latestSnapshot = newSnapshots.get(newSnapshots.size() - 1);
-      long latestSnapshotId = latestSnapshot.snapshotId();
-      log.info("Latest snapshot ID: {}", latestSnapshotId);
-
-      // First try: exact snapshot ID match
-      String exactMatch = findExactSnapshotMatch(snapshotRefs, latestSnapshotId);
-      if (exactMatch != null) {
-        return exactMatch;
-      }
-
-      // Second try: parent-child relationship match
-      Long parentSnapshotId = latestSnapshot.parentId();
-      log.info("Parent snapshot ID: {}", parentSnapshotId);
-      if (parentSnapshotId != null) {
-        String parentMatch = findParentChildMatch(snapshotRefs, parentSnapshotId, latestSnapshotId);
-        if (parentMatch != null) {
-          return parentMatch;
-        }
-      }
+    StateDiff(
+        List<Snapshot> newSnapshots,
+        Map<String, Long> branchUpdates,
+        List<Snapshot> allClientSnapshots) {
+      this.newSnapshots = List.copyOf(newSnapshots);
+      this.branchUpdates = Map.copyOf(branchUpdates);
+      this.snapshotLookup =
+          allClientSnapshots.stream()
+              .collect(Collectors.toMap(s -> String.valueOf(s.snapshotId()), s -> s));
     }
-
-    // If we reach here, we have multiple explicitly targeted branches but couldn't determine
-    // the target based on snapshot relationships. This suggests the commit operation itself
-    // is ambiguous or invalid.
-    log.error(
-        "Cannot determine target branch from explicitly targeted branches: {}",
-        snapshotRefs.keySet());
-    throw new IllegalStateException(
-        String.format(
-            "Cannot determine target branch from explicitly targeted branches: %s. "
-                + "The commit specifies multiple target branches but snapshot relationships "
-                + "don't clearly indicate which branch should receive the new snapshots. "
-                + "This suggests an invalid or ambiguous commit operation.",
-            snapshotRefs.keySet()));
-  }
-
-  /**
-   * Determines the target branch for snapshot commits using explicit branch targeting information.
-   * The snapshotRefs parameter contains the explicit branch targeting from the client commit
-   * operation.
-   */
-  private String determineTargetBranch(
-      Map<String, SnapshotRef> snapshotRefs, List<Snapshot> newSnapshots, String defaultBranch) {
-
-    // Handle simple case: no explicit branch targeting
-    if (MapUtils.isEmpty(snapshotRefs)) {
-      return defaultBranch;
-    }
-
-    // Handle simple case: single branch explicitly specified
-    if (snapshotRefs.size() == 1) {
-      return getSingleTargetBranch(snapshotRefs);
-    }
-
-    // Handle complex case: multiple branches with snapshot relationship analysis
-    return determineTargetFromMultipleBranches(snapshotRefs, newSnapshots);
-  }
-
-  /**
-   * Applies a regular (non-WAP, non-cherry-picked) snapshot by assigning it to a branch or staging
-   * it.
-   */
-  private void applyRegularSnapshot(
-      Snapshot snapshot,
-      Map<String, SnapshotRef> snapshotRefs,
-      TableMetadata.Builder metadataBuilder) {
-
-    if (MapUtils.isNotEmpty(snapshotRefs)) {
-      // We have explicit branch information, use it to assign snapshot
-      String targetBranch =
-          determineTargetBranch(
-              snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
-      metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
-    } else {
-      // No branch information provided - add snapshot without assigning to any branch
-      // The snapshot will exist in metadata but won't be the HEAD of any branch
-      // Branch refs can be updated later via separate calls to applySnapshotOperations with
-      // snapshotRefs
-      metadataBuilder.addSnapshot(snapshot);
-    }
-  }
-
-  /** Applies a WAP staged snapshot - not committed to any branch. */
-  private void applyStagedSnapshot(Snapshot snapshot, TableMetadata.Builder metadataBuilder) {
-    metadataBuilder.addSnapshot(snapshot);
-  }
-
-  /** Applies a cherry-picked snapshot - non fast-forward cherry pick. */
-  private void applyCherryPickedSnapshot(
-      Snapshot snapshot,
-      Map<String, SnapshotRef> snapshotRefs,
-      TableMetadata.Builder metadataBuilder) {
-    String targetBranch =
-        determineTargetBranch(
-            snapshotRefs, Collections.singletonList(snapshot), SnapshotRef.MAIN_BRANCH);
-    metadataBuilder.setBranchSnapshot(snapshot, targetBranch);
-  }
-
-  /** Result of categorizing and applying snapshots. */
-  private static class SnapshotOperationResult {
-    final List<String> appendedSnapshots;
-    final List<String> stagedSnapshots;
-    final List<String> cherryPickedSnapshots;
-
-    SnapshotOperationResult(
-        List<String> appendedSnapshots,
-        List<String> stagedSnapshots,
-        List<String> cherryPickedSnapshots) {
-      this.appendedSnapshots = new ArrayList<>(appendedSnapshots);
-      this.stagedSnapshots = new ArrayList<>(stagedSnapshots);
-      this.cherryPickedSnapshots = new ArrayList<>(cherryPickedSnapshots);
-    }
-
-    static SnapshotOperationResult empty() {
-      return new SnapshotOperationResult(
-          Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
-    }
-  }
-
-  /** Categorizes snapshots by type and applies them to the metadata builder. */
-  private SnapshotOperationResult categorizeAndApplySnapshots(
-      List<Snapshot> snapshots,
-      Map<String, SnapshotRef> snapshotRefs,
-      TableMetadata.Builder metadataBuilder) {
-
-    List<String> appendedSnapshots = new ArrayList<>();
-    List<String> stagedSnapshots = new ArrayList<>();
-    List<String> cherryPickedSnapshots = new ArrayList<>();
-
-    for (Snapshot snapshot : snapshots) {
-      snapshotInspector.validateSnapshot(snapshot);
-
-      if (snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)) {
-        applyStagedSnapshot(snapshot, metadataBuilder);
-        stagedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-
-      } else if (snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
-        applyCherryPickedSnapshot(snapshot, snapshotRefs, metadataBuilder);
-        appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-        cherryPickedSnapshots.add(
-            String.valueOf(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)));
-
-      } else {
-        applyRegularSnapshot(snapshot, snapshotRefs, metadataBuilder);
-        appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-      }
-    }
-
-    return new SnapshotOperationResult(appendedSnapshots, stagedSnapshots, cherryPickedSnapshots);
-  }
-
-  /**
-   * Updates branch references to point to specific snapshots.
-   *
-   * <p>This handles two scenarios:
-   *
-   * <ul>
-   *   <li>Standalone ref operations: Moving branches to existing snapshots (fast-forward/rollback)
-   *   <li>Guided snapshot assignment: Using refs to guide where new snapshots should be assigned
-   * </ul>
-   *
-   * @param recordAsCherryPicks whether to record ref updates as cherry-pick operations
-   * @return list of snapshot IDs that were cherry-picked (only when recordAsCherryPicks is true)
-   */
-  private List<String> updateBranchReferences(
-      TableMetadata metadata,
-      Map<String, SnapshotRef> snapshotRefs,
-      TableMetadata.Builder metadataBuilder,
-      boolean recordAsCherryPicks) {
-
-    List<String> cherryPickedSnapshots = new ArrayList<>();
-
-    for (Map.Entry<String, SnapshotRef> entry : snapshotRefs.entrySet()) {
-      String branchName = entry.getKey();
-      long targetSnapshotId = entry.getValue().snapshotId();
-
-      if (needsBranchUpdate(metadata, branchName, targetSnapshotId)) {
-        metadataBuilder.setBranchSnapshot(targetSnapshotId, branchName);
-
-        if (recordAsCherryPicks) {
-          cherryPickedSnapshots.add(String.valueOf(targetSnapshotId));
-        }
-      }
-    }
-
-    return cherryPickedSnapshots;
   }
 
   /**
@@ -914,60 +635,226 @@ private void recordSnapshotActions(
     metadataBuilder.setProperties(updatedProperties);
   }
 
+  /**
+   * Applies client-requested changes to server state using functional approach.
+   *
+   * <p>Contract: currentState + clientDesiredState -> newState + metrics
+   *
+   * <p>Client sends desired final state, server computes semantic diff and applies it.
+   */
   public TableMetadata applySnapshotOperations(
-      TableMetadata metadata,
-      List<Snapshot> snapshots,
-      Map<String, SnapshotRef> snapshotRefs,
+      TableMetadata currentMetadata,
+      List<Snapshot> clientSnapshots,
+      Map<String, SnapshotRef> clientRefs,
       boolean recordAction) {
-    TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
 
-    /**
-     * Process snapshots and branch reference updates. Two main operation types:
-     *
-     * <p><b>Snapshot Processing:</b> When snapshots list is non-empty:
-     *
-     * <ul>
-     *   <li>[1] Regular snapshots - committed to branches (if snapshotRefs provided) or staged
-     *   <li>[2] WAP staged snapshots (STAGED_WAP_ID_PROP) - staged but not committed to branches
-     *   <li>[3] Cherry-picked snapshots (SOURCE_SNAPSHOT_ID_PROP) - committed to target branches
-     * </ul>
-     *
-     * <p><b>Branch Reference Updates:</b> When snapshotRefs is non-empty:
-     *
-     * <ul>
-     *   <li>If snapshots are also provided: snapshotRefs guides branch assignment during processing
-     *   <li>If only snapshotRefs provided: standalone fast-forward/rollback operations on existing
-     *       snapshots
-     * </ul>
-     */
-    SnapshotOperationResult snapshotProcessingResults =
-        CollectionUtils.isNotEmpty(snapshots)
-            ? categorizeAndApplySnapshots(snapshots, snapshotRefs, metadataBuilder)
-            : SnapshotOperationResult.empty();
-
-    // Update branch references (for standalone fast-forward/rollback operations)
-    List<String> standaloneRefCherryPicks = Collections.emptyList();
-    if (MapUtils.isNotEmpty(snapshotRefs)) {
-      boolean recordRefUpdatesAsCherryPicks = CollectionUtils.isEmpty(snapshots);
-      standaloneRefCherryPicks =
-          updateBranchReferences(
-              metadata, snapshotRefs, metadataBuilder, recordRefUpdatesAsCherryPicks);
-    }
+    return computeStateDiff(currentMetadata, clientSnapshots, clientRefs)
+        .map(
+            diff -> {
+              TableMetadata newMetadata = applyStateDiff(currentMetadata, diff);
+              return recordAction
+                  ? recordTransition(currentMetadata, newMetadata, diff)
+                  : newMetadata;
+            })
+        .orElse(currentMetadata);
+  }
+
+  /** Computes semantic difference between current server state and client-desired state. */
+  private Optional<StateDiff> computeStateDiff(
+      TableMetadata currentMetadata,
+      List<Snapshot> clientSnapshots,
+      Map<String, SnapshotRef> clientRefs) {
+
+    if (CollectionUtils.isEmpty(clientSnapshots) && MapUtils.isEmpty(clientRefs)) {
+      return Optional.empty(); // No changes requested
+    }
+
+    Set<Long> currentSnapshotIds =
+        currentMetadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+
+    // Find truly new snapshots (not in current metadata)
+    List<Snapshot> newSnapshots =
+        Optional.ofNullable(clientSnapshots).orElse(Collections.emptyList()).stream()
+            .filter(s -> !currentSnapshotIds.contains(s.snapshotId()))
+            .collect(Collectors.toList());
+
+    // Find branch updates needed
+    Map<String, Long> branchUpdates =
+        Optional.ofNullable(clientRefs).orElse(Collections.emptyMap()).entrySet().stream()
+            .filter(
+                entry ->
+                    needsBranchUpdate(
+                        currentMetadata, entry.getKey(), entry.getValue().snapshotId()))
+            .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().snapshotId()));
+
+    return Optional.of(
+        new StateDiff(
+            newSnapshots,
+            branchUpdates,
+            Optional.ofNullable(clientSnapshots).orElse(Collections.emptyList())));
+  }
+
+  /** Applies the computed state diff to create new metadata. */
+  private TableMetadata applyStateDiff(TableMetadata currentMetadata, StateDiff diff) {
+    TableMetadata.Builder builder = TableMetadata.buildFrom(currentMetadata);
+
+    // Add new snapshots (respecting Iceberg semantics)
+    diff.newSnapshots.forEach(
+        snapshot -> {
+          snapshotInspector.validateSnapshot(snapshot);
+
+          if (isWapStaged(snapshot)) {
+            // WAP snapshots are always staged (never assigned to branches initially)
+            builder.addSnapshot(snapshot);
+          } else {
+            // All other snapshots: assign to branch if specified, otherwise stage
+            findTargetBranchForSnapshot(snapshot, diff.branchUpdates)
+                .ifPresentOrElse(
+                    targetBranch -> builder.setBranchSnapshot(snapshot, targetBranch),
+                    () -> builder.addSnapshot(snapshot));
+          }
+        });
+
+    // Update branch pointers to existing snapshots
+    diff.branchUpdates.entrySet().stream()
+        .filter(entry -> !isNewSnapshot(entry.getValue(), diff.newSnapshots))
+        .forEach(entry -> builder.setBranchSnapshot(entry.getValue(), entry.getKey()));
+
+    return builder.build();
+  }
+
+  /** Checks if snapshot is WAP staged (should not be assigned to any branch). */
+  private boolean isWapStaged(Snapshot snapshot) {
+    return snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
+  }
+
+  /** Checks if snapshot is cherry-picked (should go directly to target branch). */
+  private boolean isCherryPicked(Snapshot snapshot) {
+    return snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP);
+  }
 
-    if (recordAction) {
-      // Combine cherry-picked snapshots from both snapshot processing and standalone ref updates
-      List<String> allCherryPickedSnapshots =
-          combineCherryPickedSnapshots(
-              snapshotProcessingResults.cherryPickedSnapshots, standaloneRefCherryPicks);
-
-      recordSnapshotActions(
-          metadata,
-          metadataBuilder,
-          snapshotProcessingResults.appendedSnapshots,
-          snapshotProcessingResults.stagedSnapshots,
-          allCherryPickedSnapshots);
+  /**
+   * Finds which branch this snapshot should be assigned to based on branch updates. Fails fast if
+   * multiple branches want the same snapshot (ambiguous commit).
+   */
+  private Optional<String> findTargetBranchForSnapshot(
+      Snapshot snapshot, Map<String, Long> branchUpdates) {
+    List<String> matchingBranches =
+        branchUpdates.entrySet().stream()
+            .filter(entry -> entry.getValue() == snapshot.snapshotId())
+            .map(Map.Entry::getKey)
+            .toList();
+
+    if (matchingBranches.size() > 1) {
+      throw new IllegalStateException(
+          "Multiple branches (%s) specify the same target snapshot %d. "
+              + "This indicates an ambiguous commit operation - each snapshot can only be assigned to one branch."
+                  .formatted(matchingBranches, snapshot.snapshotId()));
+    }
+
+    return matchingBranches.stream().findFirst();
+  }
+
+  /** Checks if this snapshot ID is in the list of new snapshots being added. */
+  private boolean isNewSnapshot(Long snapshotId, List<Snapshot> newSnapshots) {
+    return newSnapshots.stream().anyMatch(s -> s.snapshotId() == snapshotId);
+  }
+
+  /** Records metrics and properties about the state transition that occurred. */
+  private TableMetadata recordTransition(
+      TableMetadata originalMetadata, TableMetadata newMetadata, StateDiff diff) {
+
+    Map<String, String> properties = new HashMap<>(newMetadata.properties());
+
+    // Categorize new snapshots by their semantic type for metrics
+    Map<String, List<String>> snapshotsByType =
+        diff.newSnapshots.stream()
+            .collect(
+                Collectors.groupingBy(
+                    this::getSnapshotCategory,
+                    Collectors.mapping(s -> String.valueOf(s.snapshotId()), Collectors.toList())));
+
+    // Record snapshot metrics by type
+    recordIfPresent(
+        properties,
+        snapshotsByType,
+        "appended",
+        CatalogConstants.APPENDED_SNAPSHOTS,
+        InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR);
+    recordIfPresent(
+        properties,
+        snapshotsByType,
+        "staged",
+        CatalogConstants.STAGED_SNAPSHOTS,
+        InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR);
+
+    // For cherry-picked snapshots, record the SOURCE snapshot IDs that were cherry-picked
+    List<String> cherryPickSourceIds =
+        diff.newSnapshots.stream()
+            .filter(this::isCherryPicked)
+            .map(this::getCherryPickSourceId)
+            .filter(Optional::isPresent)
+            .map(Optional::get)
+            .collect(Collectors.toList());
+
+    if (!cherryPickSourceIds.isEmpty()) {
+      properties.put(
+          getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
+          String.join(",", cherryPickSourceIds));
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, cherryPickSourceIds.size());
+    }
+
+    // Record branch updates that don't involve new snapshots (pure ref moves)
+    List<String> refOnlyCherryPicks =
+        diff.branchUpdates.entrySet().stream()
+            .filter(entry -> !isNewSnapshot(entry.getValue(), diff.newSnapshots))
+            .map(entry -> String.valueOf(entry.getValue()))
+            .collect(Collectors.toList());
+
+    if (!refOnlyCherryPicks.isEmpty()) {
+      String existing =
+          properties.get(getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS));
+      String combined =
+          existing != null
+              ? existing + "," + String.join(",", refOnlyCherryPicks)
+              : String.join(",", refOnlyCherryPicks);
+      properties.put(getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS), combined);
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, refOnlyCherryPicks.size());
     }
-    return metadataBuilder.build();
+
+    return TableMetadata.buildFrom(newMetadata).setProperties(properties).build();
+  }
+
+  /** Categorizes snapshot for metrics based on its semantic type. */
+  private String getSnapshotCategory(Snapshot snapshot) {
+    if (isWapStaged(snapshot)) return "staged";
+    if (isCherryPicked(snapshot))
+      return "appended"; // Cherry-picked snapshots are NEW, so they're "appended"
+    return "appended";
+  }
+
+  /** Extracts the source snapshot ID for cherry-picked snapshots. */
+  private Optional<String> getCherryPickSourceId(Snapshot snapshot) {
+    return Optional.ofNullable(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP));
+  }
+
+  /** Records snapshot category in properties if snapshots exist. */
+  private void recordIfPresent(
+      Map<String, String> properties,
+      Map<String, List<String>> categorized,
+      String category,
+      String propertyKey,
+      String metricKey) {
+
+    Optional.ofNullable(categorized.get(category))
+        .filter(CollectionUtils::isNotEmpty)
+        .ifPresent(
+            snapshots -> {
+              properties.put(getCanonicalFieldName(propertyKey), String.join(",", snapshots));
+              metricsReporter.count(metricKey, snapshots.size());
+            });
   }
 
   /** Helper function to dump contents for map in debugging mode. */

From 9d6aec0278527bc3d62546b4819a67c563c1f49f Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Thu, 2 Oct 2025 14:40:32 -0700
Subject: [PATCH 09/35] removing unused function

---
 .../catalog/OpenHouseInternalTableOperations.java    | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index 19dee8435..16577bdbd 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -23,7 +23,6 @@
 import java.io.IOException;
 import java.time.Clock;
 import java.time.Instant;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -576,17 +575,6 @@ private static class StateDiff {
     }
   }
 
-  /**
-   * Combines cherry-picked snapshot IDs from both snapshot processing and standalone ref
-   * operations.
-   */
-  private List<String> combineCherryPickedSnapshots(
-      List<String> fromSnapshotProcessing, List<String> fromStandaloneRefUpdates) {
-    List<String> allCherryPicks = new ArrayList<>(fromSnapshotProcessing);
-    allCherryPicks.addAll(fromStandaloneRefUpdates);
-    return allCherryPicks;
-  }
-
   /** Checks if a branch needs to be updated based on current refs and new snapshot ID. */
   private boolean needsBranchUpdate(TableMetadata metadata, String branchName, long newSnapshotId) {
     if (MapUtils.isEmpty(metadata.refs())) {

From bf5a474725ad6639d42126af44ccaba1eb11984f Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 6 Oct 2025 18:01:55 -0700
Subject: [PATCH 10/35] workign tests for ambiguous commits

---
 .../OpenHouseInternalTableOperations.java     |  62 +-
 .../OpenHouseInternalTableOperationsTest.java | 567 +++++++++++++++++-
 2 files changed, 622 insertions(+), 7 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index 16577bdbd..dc9ab7ead 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -23,6 +23,7 @@
 import java.io.IOException;
 import java.time.Clock;
 import java.time.Instant;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -538,6 +539,9 @@ public TableMetadata maybeDeleteSnapshots(
       TableMetadata metadata, List<Snapshot> snapshotsToDelete) {
     TableMetadata result = metadata;
     if (CollectionUtils.isNotEmpty(snapshotsToDelete)) {
+      // Validate that snapshots to delete are not referenced by any branches or tags
+      validateSnapshotsNotReferenced(metadata, snapshotsToDelete);
+
       Set<Long> snapshotIds =
           snapshotsToDelete.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
       Map<String, String> updatedProperties = new HashMap<>(result.properties());
@@ -552,7 +556,7 @@ public TableMetadata maybeDeleteSnapshots(
               .build()
               .removeSnapshotsIf(s -> snapshotIds.contains(s.snapshotId()));
       metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, snapshotsToDelete.size());
+          InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, (double) snapshotsToDelete.size());
     }
     return result;
   }
@@ -586,6 +590,59 @@ private boolean needsBranchUpdate(TableMetadata metadata, String branchName, lon
     return currentRef == null || currentRef.snapshotId() != newSnapshotId;
   }
 
+  /** Validates that no two branches are trying to point to the same snapshot (ambiguous commit). */
+  private void validateNoBranchConflicts(Map<String, Long> branchUpdates) {
+    // Group branches by target snapshot ID
+    Map<Long, List<String>> snapshotToBranches = new HashMap<>();
+    for (Map.Entry<String, Long> entry : branchUpdates.entrySet()) {
+      snapshotToBranches
+          .computeIfAbsent(entry.getValue(), k -> new ArrayList<>())
+          .add(entry.getKey());
+    }
+
+    // Check for conflicts (multiple branches pointing to same snapshot)
+    for (Map.Entry<Long, List<String>> entry : snapshotToBranches.entrySet()) {
+      List<String> branches = entry.getValue();
+      if (branches.size() > 1) {
+        throw new IllegalStateException(
+            String.format(
+                "Multiple branches (%s) specify the same target snapshot %d. "
+                    + "This indicates an ambiguous commit operation - each snapshot can only be assigned to one branch.",
+                branches, entry.getKey()));
+      }
+    }
+  }
+
+  /** Validates that snapshots to be deleted are not referenced by any branches or tags. */
+  private void validateSnapshotsNotReferenced(
+      TableMetadata metadata, List<Snapshot> snapshotsToDelete) {
+    if (MapUtils.isEmpty(metadata.refs()) || CollectionUtils.isEmpty(snapshotsToDelete)) {
+      return; // No refs to check or no snapshots to delete
+    }
+
+    Set<Long> snapshotIdsToDelete =
+        snapshotsToDelete.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+
+    // Check if any snapshot to delete is referenced by branches or tags
+    for (Map.Entry<String, SnapshotRef> refEntry : metadata.refs().entrySet()) {
+      String refName = refEntry.getKey();
+      SnapshotRef ref = refEntry.getValue();
+
+      if (snapshotIdsToDelete.contains(ref.snapshotId())) {
+        List<String> referencingRefs =
+            metadata.refs().entrySet().stream()
+                .filter(entry -> snapshotIdsToDelete.contains(entry.getValue().snapshotId()))
+                .map(Map.Entry::getKey)
+                .collect(Collectors.toList());
+
+        throw new IllegalArgumentException(
+            String.format(
+                "Cannot expire %d. Still referenced by refs: %s",
+                ref.snapshotId(), referencingRefs));
+      }
+    }
+  }
+
   /** Records snapshot actions in table properties and reports metrics. */
   private void recordSnapshotActions(
       TableMetadata metadata,
@@ -675,6 +732,9 @@ private Optional<StateDiff> computeStateDiff(
                         currentMetadata, entry.getKey(), entry.getValue().snapshotId()))
             .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().snapshotId()));
 
+    // Check for ambiguous commits: multiple branches trying to point to the same snapshot
+    validateNoBranchConflicts(branchUpdates);
+
     return Optional.of(
         new StateDiff(
             newSnapshots,
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index 125966bf5..e09e45d0d 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -29,6 +29,7 @@
 import java.util.UUID;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import lombok.SneakyThrows;
 import org.apache.commons.compress.utils.Lists;
 import org.apache.hadoop.conf.Configuration;
@@ -480,35 +481,95 @@ void testDoCommitExceptionHandling() {
   }
 
   @Test
-  void testDoCommitSnapshotsValidationExceptionHandling() throws IOException {
+  void testDoCommitWithValidSnapshotDeletion() throws IOException {
     TableMetadata metadata =
         BASE_TABLE_METADATA.replaceProperties(ImmutableMap.of("random", "value"));
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
     Map<String, String> properties = new HashMap<>(metadata.properties());
+
+    // The key insight: SNAPSHOTS_JSON_KEY determines what snapshots SHOULD exist after commit
+    // Only include snapshot 2 - this means snapshots 0 and 1 should be deleted
     properties.put(
         CatalogConstants.SNAPSHOTS_JSON_KEY,
-        SnapshotsUtil.serializedSnapshots(testSnapshots.subList(1, 3)));
+        SnapshotsUtil.serializedSnapshots(testSnapshots.subList(2, 3))); // Only snapshot 2
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
         SnapshotsUtil.serializeMap(
             IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
-                testSnapshots.get(testSnapshots.size() - 1))));
+                testSnapshots.get(2)))); // snapshot 2 -> main
     properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
     metadata = metadata.replaceProperties(properties);
+
+    // Create initial metadata with snapshots 0, 1, 2 where only snapshot 2 is referenced
     TableMetadata metadataWithSnapshots =
         TableMetadata.buildFrom(metadata)
-            .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-            .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH)
+            .addSnapshot(testSnapshots.get(0)) // Unreferenced - will be deleted
+            .addSnapshot(testSnapshots.get(1)) // Unreferenced - will be deleted
+            .setBranchSnapshot(
+                testSnapshots.get(2), SnapshotRef.MAIN_BRANCH) // Referenced - will be kept
             .build();
+
+    // Target metadata: same branch setup but snapshots 0,1 removed via SNAPSHOTS_JSON_KEY
     TableMetadata metadataWithSnapshotsDeleted =
         TableMetadata.buildFrom(metadata)
-            .setBranchSnapshot(testSnapshots.get(3), SnapshotRef.MAIN_BRANCH)
+            .setBranchSnapshot(
+                testSnapshots.get(2), SnapshotRef.MAIN_BRANCH) // Only snapshot 2 remains
             .build();
 
+    // This should succeed because snapshots 0 and 1 are unreferenced and can be safely deleted
     Assertions.assertDoesNotThrow(
         () ->
             openHouseInternalTableOperations.doCommit(
                 metadataWithSnapshots, metadataWithSnapshotsDeleted));
+
+    // ideally we also verify that snapshots 0 and 1 are deleted, but doCommit doesn't return the
+    // metadata with the deleted snapshots
+  }
+
+  @Test
+  void testDoCommitSnapshotsValidationThrowsException() throws IOException {
+    TableMetadata metadata =
+        BASE_TABLE_METADATA.replaceProperties(ImmutableMap.of("random", "value"));
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+    Map<String, String> properties = new HashMap<>(metadata.properties());
+
+    // The key issue: SNAPSHOTS_JSON_KEY says to keep only snapshot 2, but snapshot 1 is referenced
+    // by main
+    // This creates a conflict - we're trying to delete snapshot 1 but it's still referenced
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(
+            testSnapshots.subList(2, 3))); // Only snapshot 2 should remain
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+                testSnapshots.get(1)))); // But main refs snapshot 1
+    properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
+    metadata = metadata.replaceProperties(properties);
+
+    // Create initial metadata with snapshots 1 and 2, where snapshot 1 is referenced by main
+    TableMetadata metadataWithSnapshots =
+        TableMetadata.buildFrom(metadata)
+            .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH) // snapshot 1 -> main
+            .addSnapshot(testSnapshots.get(2)) // snapshot 2 exists but unreferenced initially
+            .build();
+
+    // Target metadata tries to delete snapshot 1 (not in SNAPSHOTS_JSON_KEY) but main still refs it
+    TableMetadata metadataWithSnapshotsDeleted =
+        TableMetadata.buildFrom(metadata)
+            .setBranchSnapshot(
+                testSnapshots.get(1), SnapshotRef.MAIN_BRANCH) // main still points to snapshot 1
+            .build();
+
+    // This should throw exception because snapshot 1 is marked for deletion but still referenced by
+    // main
+    Assertions.assertThrows(
+        CommitStateUnknownException.class,
+        () ->
+            openHouseInternalTableOperations.doCommit(
+                metadataWithSnapshots, metadataWithSnapshotsDeleted),
+        "Should throw exception when trying to delete referenced snapshots");
   }
 
   @Test
@@ -1202,4 +1263,498 @@ private void verifyMetricHistogramBuckets(
     Assertions.assertFalse(Double.isNaN(totalTime), "Timer total time should not be NaN");
     Assertions.assertFalse(Double.isNaN(maxTime), "Timer max time should not be NaN");
   }
+
+  // ===== SNAPSHOT DELETION SAFETY TESTS =====
+
+  @Test
+  void testDeleteSnapshotWithMainReference() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata with multiple snapshots
+    TableMetadata baseMetadata =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .addSnapshot(testSnapshots.get(0)) // Unreferenced - can be deleted
+            .addSnapshot(testSnapshots.get(1)) // Unreferenced - can be deleted
+            .addSnapshot(testSnapshots.get(2)) // Unreferenced - can be deleted
+            .setBranchSnapshot(
+                testSnapshots.get(3), SnapshotRef.MAIN_BRANCH) // Referenced - cannot be deleted
+            .build();
+
+    // Get the current head snapshot that is referenced by main branch
+    Snapshot referencedSnapshot = testSnapshots.get(testSnapshots.size() - 1);
+
+    // Attempt to delete a snapshot that is currently referenced by a branch
+    List<Snapshot> snapshotsToDelete = List.of(referencedSnapshot);
+
+    // Capture final variables for lambda
+    final TableMetadata finalBase = baseMetadata;
+    final List<Snapshot> finalSnapshotsToDelete = snapshotsToDelete;
+
+    // This MUST throw IllegalArgumentException for referenced snapshots
+    IllegalArgumentException exception =
+        Assertions.assertThrows(
+            IllegalArgumentException.class,
+            () ->
+                openHouseInternalTableOperations.maybeDeleteSnapshots(
+                    finalBase, finalSnapshotsToDelete),
+            "Should throw IllegalArgumentException when trying to delete referenced snapshot");
+
+    // Verify error message mentions the reference
+    String expectedMessage =
+        "Cannot expire " + referencedSnapshot.snapshotId() + ". Still referenced by refs:";
+    Assertions.assertTrue(
+        exception.getMessage().contains(expectedMessage)
+            || exception.getMessage().contains("Still referenced by")
+            || exception.getMessage().contains("referenced"),
+        "Error message should indicate snapshot is still referenced: " + exception.getMessage());
+  }
+
+  @Test
+  void testDeleteSnapshotWithNoReference() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata with multiple snapshots
+    TableMetadata base =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .addSnapshot(testSnapshots.get(0)) // Unreferenced - can be deleted
+            .addSnapshot(testSnapshots.get(1)) // Unreferenced - can be deleted
+            .addSnapshot(testSnapshots.get(2)) // Unreferenced - can be deleted
+            .setBranchSnapshot(
+                testSnapshots.get(3), SnapshotRef.MAIN_BRANCH) // Referenced - cannot be deleted
+            .build();
+
+    // Delete unreferenced snapshots (first two snapshots)
+    List<Snapshot> unreferencedSnapshots = testSnapshots.subList(0, 2);
+
+    TableMetadata result =
+        openHouseInternalTableOperations.maybeDeleteSnapshots(base, unreferencedSnapshots);
+
+    // Verify unreferenced snapshots were removed
+    for (Snapshot unreferenced : unreferencedSnapshots) {
+      boolean snapshotExists =
+          result.snapshots().stream().anyMatch(s -> s.snapshotId() == unreferenced.snapshotId());
+      Assertions.assertFalse(
+          snapshotExists,
+          "Unreferenced snapshot " + unreferenced.snapshotId() + " should be deleted");
+    }
+
+    // Verify referenced snapshot still exists
+    Snapshot referencedSnapshot = testSnapshots.get(3);
+    boolean referencedExists =
+        result.snapshots().stream()
+            .anyMatch(s -> s.snapshotId() == referencedSnapshot.snapshotId());
+    Assertions.assertTrue(referencedExists, "Referenced snapshot should still exist");
+
+    // Verify deletion tracking
+    Map<String, String> properties = result.properties();
+    String deletedSnapshots =
+        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+    Assertions.assertNotNull(deletedSnapshots);
+
+    for (Snapshot unreferenced : unreferencedSnapshots) {
+      Assertions.assertTrue(
+          deletedSnapshots.contains(Long.toString(unreferenced.snapshotId())),
+          "Unreferenced snapshot should be tracked as deleted");
+    }
+  }
+
+  @Test
+  void testDeleteSnapshotWithMultipleReference() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create metadata with snapshot referenced by multiple branches
+    // Reference the same snapshot from multiple branches
+    Snapshot sharedSnapshot = testSnapshots.get(1);
+    TableMetadata baseMetadata =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .addSnapshot(sharedSnapshot) // Add snapshot first
+            .setRef(
+                SnapshotRef.MAIN_BRANCH,
+                SnapshotRef.branchBuilder(sharedSnapshot.snapshotId()).build())
+            .setRef(
+                "feature_branch", SnapshotRef.branchBuilder(sharedSnapshot.snapshotId()).build())
+            .build();
+    // Add other snapshots to the metadata (skip index 1 - shared snapshot already added)
+    List<Snapshot> snapshotsToAdd =
+        IntStream.range(0, testSnapshots.size())
+            .filter(i -> i != 1)
+            .mapToObj(testSnapshots::get)
+            .collect(Collectors.toList());
+
+    for (Snapshot snapshot : snapshotsToAdd) {
+      baseMetadata = TableMetadata.buildFrom(baseMetadata).addSnapshot(snapshot).build();
+    }
+
+    // Attempt to delete the shared snapshot
+    List<Snapshot> snapshotsToDelete = List.of(sharedSnapshot);
+
+    // Capture final variables for lambda
+    final TableMetadata finalBase = baseMetadata;
+    final List<Snapshot> finalSnapshotsToDelete = snapshotsToDelete;
+
+    // This MUST throw IllegalArgumentException for snapshots referenced by multiple branches
+    IllegalArgumentException exception =
+        Assertions.assertThrows(
+            IllegalArgumentException.class,
+            () ->
+                openHouseInternalTableOperations.maybeDeleteSnapshots(
+                    finalBase, finalSnapshotsToDelete),
+            "Should throw IllegalArgumentException when trying to delete snapshot referenced by multiple branches");
+
+    // Verify error message mentions multiple references
+    String exceptionMessage = exception.getMessage();
+    Assertions.assertTrue(
+        exceptionMessage.contains("Still referenced by refs"),
+        "Error message should indicate snapshot is still referenced by branches: "
+            + exceptionMessage);
+  }
+
+  @Test
+  void testDeleteSnapshotWithBranchReference() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata with snapshots - add the tagged snapshot first
+    Snapshot taggedSnapshot = testSnapshots.get(0);
+    TableMetadata baseMetadata =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .addSnapshot(taggedSnapshot) // Add the snapshot first so it exists
+            .setBranchSnapshot(testSnapshots.get(testSnapshots.size() - 1), SnapshotRef.MAIN_BRANCH)
+            .setRef(
+                "feature_branch",
+                SnapshotRef.tagBuilder(taggedSnapshot.snapshotId()).build()) // Now create the tag
+            .build();
+    // Add remaining snapshots
+    for (int i = 1; i < testSnapshots.size() - 1; i++) {
+      baseMetadata =
+          TableMetadata.buildFrom(baseMetadata).addSnapshot(testSnapshots.get(i)).build();
+    }
+
+    // Attempt to delete snapshot that has a tag reference
+    List<Snapshot> snapshotsToDelete = List.of(taggedSnapshot);
+
+    // Capture final variables for lambda
+    final TableMetadata finalBase = baseMetadata;
+    final List<Snapshot> finalSnapshotsToDelete = snapshotsToDelete;
+
+    // This MUST throw IllegalArgumentException for snapshots referenced by tags
+    IllegalArgumentException exception =
+        Assertions.assertThrows(
+            IllegalArgumentException.class,
+            () ->
+                openHouseInternalTableOperations.maybeDeleteSnapshots(
+                    finalBase, finalSnapshotsToDelete),
+            "Should throw IllegalArgumentException when trying to delete snapshot referenced by tag");
+
+    // Verify error message mentions tag reference
+    String exceptionMessage = exception.getMessage();
+    Assertions.assertTrue(
+        exceptionMessage.contains("Still referenced by refs"),
+        "Error message should indicate snapshot is still referenced by branches: "
+            + exceptionMessage);
+  }
+
+  @Test
+  void testDeleteEmptySnapshotList() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata
+    TableMetadata base = BASE_TABLE_METADATA;
+    for (Snapshot snapshot : testSnapshots) {
+      base =
+          TableMetadata.buildFrom(base)
+              .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
+              .build();
+    }
+
+    // Delete empty list
+    List<Snapshot> emptyList = List.of();
+
+    TableMetadata result = openHouseInternalTableOperations.maybeDeleteSnapshots(base, emptyList);
+
+    // Verify no changes were made
+    Assertions.assertEquals(
+        base.snapshots().size(),
+        result.snapshots().size(),
+        "No snapshots should be deleted when list is empty");
+
+    // Verify no deletion tracking properties were added
+    Map<String, String> properties = result.properties();
+    String deletedSnapshots =
+        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+    Assertions.assertNull(deletedSnapshots, "No deleted snapshots property should be set");
+  }
+
+  @Test
+  void testDeleteNullSnapshotList() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata
+    TableMetadata base = BASE_TABLE_METADATA;
+    for (Snapshot snapshot : testSnapshots) {
+      base =
+          TableMetadata.buildFrom(base)
+              .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
+              .build();
+    }
+
+    // Delete null list
+    TableMetadata result = openHouseInternalTableOperations.maybeDeleteSnapshots(base, null);
+
+    // Verify no changes were made
+    Assertions.assertEquals(
+        base.snapshots().size(),
+        result.snapshots().size(),
+        "No snapshots should be deleted when list is null");
+
+    // Verify no deletion tracking properties were added
+    Map<String, String> properties = result.properties();
+    String deletedSnapshots =
+        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+    Assertions.assertNull(deletedSnapshots, "No deleted snapshots property should be set");
+  }
+
+  @Test
+  void testDeleteNonExistentSnapshot() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata
+    TableMetadata base = BASE_TABLE_METADATA;
+    for (Snapshot snapshot : testSnapshots) {
+      base =
+          TableMetadata.buildFrom(base)
+              .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
+              .build();
+    }
+
+    // Create a snapshot that doesn't exist in the metadata
+    List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
+    Snapshot nonExistentSnapshot = extraSnapshots.get(0);
+
+    List<Snapshot> snapshotsToDelete = List.of(nonExistentSnapshot);
+
+    TableMetadata result =
+        openHouseInternalTableOperations.maybeDeleteSnapshots(base, snapshotsToDelete);
+
+    // Verify original snapshots are unchanged
+    Assertions.assertEquals(
+        base.snapshots().size(),
+        result.snapshots().size(),
+        "Snapshot count should be unchanged when deleting non-existent snapshot");
+
+    // Verify deletion is still tracked (documenting current behavior)
+    Map<String, String> properties = result.properties();
+    String deletedSnapshots =
+        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+    Assertions.assertNotNull(deletedSnapshots);
+    Assertions.assertTrue(
+        deletedSnapshots.contains(Long.toString(nonExistentSnapshot.snapshotId())),
+        "Non-existent snapshot should still be tracked as deleted");
+  }
+
+  @Test
+  void testDeleteSnapshotMetricsRecorded() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata
+    TableMetadata base = BASE_TABLE_METADATA;
+    for (Snapshot snapshot : testSnapshots) {
+      base = TableMetadata.buildFrom(base).addSnapshot(snapshot).build();
+    }
+
+    // Delete some snapshots
+    List<Snapshot> snapshotsToDelete = testSnapshots.subList(0, 2);
+
+    // Use the operations instance with mock metrics reporter
+    openHouseInternalTableOperationsWithMockMetrics.maybeDeleteSnapshots(base, snapshotsToDelete);
+
+    // Verify metrics were recorded
+    Mockito.verify(mockMetricsReporter)
+        .count(
+            eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR),
+            eq((double) snapshotsToDelete.size()));
+  }
+
+  @Test
+  void testDeleteSnapshotMetricsRecordedBranch() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata with snapshots that have branch references
+    TableMetadata base =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .addSnapshot(testSnapshots.get(0)) // Unreferenced - can be deleted
+            .addSnapshot(testSnapshots.get(1)) // Unreferenced - can be deleted
+            .setBranchSnapshot(
+                testSnapshots.get(2), SnapshotRef.MAIN_BRANCH) // Referenced - cannot be deleted
+            .build();
+
+    // Delete unreferenced snapshots (emits metrics for basic deletion)
+    List<Snapshot> snapshotsToDelete = testSnapshots.subList(0, 2);
+
+    // Use the operations instance with mock metrics reporter
+    openHouseInternalTableOperationsWithMockMetrics.maybeDeleteSnapshots(base, snapshotsToDelete);
+
+    // Verify metrics were recorded for the basic deletion
+    Mockito.verify(mockMetricsReporter)
+        .count(
+            eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR),
+            eq((double) snapshotsToDelete.size()));
+  }
+
+  @Test
+  void testDeleteSnapshotMetricsRecordedNonExistent() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata
+    TableMetadata base = BASE_TABLE_METADATA;
+    for (Snapshot snapshot : testSnapshots) {
+      base =
+          TableMetadata.buildFrom(base)
+              .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
+              .build();
+    }
+
+    // Create a snapshot that doesn't exist in the metadata
+    List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
+    Snapshot nonExistentSnapshot = extraSnapshots.get(0);
+    List<Snapshot> snapshotsToDelete = List.of(nonExistentSnapshot);
+
+    // Use the operations instance with mock metrics reporter
+    openHouseInternalTableOperationsWithMockMetrics.maybeDeleteSnapshots(base, snapshotsToDelete);
+
+    // Verify metrics are still recorded even for non-existent snapshots
+    Mockito.verify(mockMetricsReporter)
+        .count(
+            eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR),
+            eq((double) snapshotsToDelete.size()));
+  }
+
+  @Test
+  void testDeleteAllSnapshotsFailsWhenMainBranchReferenced() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata with all snapshots, where the last one is referenced by main branch
+    TableMetadata base =
+        testSnapshots.subList(0, testSnapshots.size() - 1).stream()
+            .reduce(
+                BASE_TABLE_METADATA,
+                (metadata, snapshot) ->
+                    TableMetadata.buildFrom(metadata).addSnapshot(snapshot).build(),
+                (m1, m2) -> m2);
+    base =
+        TableMetadata.buildFrom(base)
+            .setBranchSnapshot(testSnapshots.get(testSnapshots.size() - 1), SnapshotRef.MAIN_BRANCH)
+            .build();
+
+    // Attempt to delete ALL snapshots (including the one referenced by main)
+    List<Snapshot> allSnapshots = new ArrayList<>(testSnapshots);
+
+    // This should fail because we cannot delete the snapshot referenced by main branch
+    IllegalArgumentException exception =
+        Assertions.assertThrows(
+            IllegalArgumentException.class,
+            () -> openHouseInternalTableOperations.maybeDeleteSnapshots(base, allSnapshots),
+            "Should throw IllegalArgumentException when trying to delete all snapshots including main branch reference");
+
+    // Verify error message indicates the snapshot is still referenced
+    String exceptionMessage = exception.getMessage();
+    Assertions.assertTrue(
+        exceptionMessage.contains("Still referenced by refs")
+            || exceptionMessage.contains("referenced")
+            || exceptionMessage.contains("Cannot expire"),
+        "Error message should indicate snapshot is still referenced: " + exceptionMessage);
+  }
+
+  @Test
+  void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata with unreferenced snapshots only (no main branch or other refs)
+    TableMetadata base = BASE_TABLE_METADATA;
+    for (Snapshot snapshot : testSnapshots) {
+      base = TableMetadata.buildFrom(base).addSnapshot(snapshot).build();
+    }
+    // Note: No setBranchSnapshot or setRef calls - all snapshots are unreferenced
+
+    // Attempt to delete all unreferenced snapshots
+    List<Snapshot> allSnapshots = new ArrayList<>(testSnapshots);
+
+    // This should succeed since no snapshots are referenced by any branch/tag
+    TableMetadata result =
+        Assertions.assertDoesNotThrow(
+            () -> openHouseInternalTableOperations.maybeDeleteSnapshots(base, allSnapshots),
+            "Should succeed when deleting all unreferenced snapshots");
+
+    // Verify all snapshots were removed from the metadata
+    Assertions.assertEquals(
+        0,
+        result.snapshots().size(),
+        "All unreferenced snapshots should be deleted, resulting in empty snapshots list");
+
+    // Verify deletion tracking shows all snapshots were deleted
+    Map<String, String> properties = result.properties();
+    String deletedSnapshots =
+        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+    Assertions.assertNotNull(deletedSnapshots, "Deleted snapshots should be tracked");
+
+    for (Snapshot snapshot : allSnapshots) {
+      Assertions.assertTrue(
+          deletedSnapshots.contains(Long.toString(snapshot.snapshotId())),
+          "Snapshot " + snapshot.snapshotId() + " should be tracked as deleted");
+    }
+  }
+
+  @Test
+  void testValidMultipleBranchesWithDifferentSnapshots() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base metadata
+    TableMetadata base =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+            .build();
+
+    // Add multiple new snapshots
+    List<Snapshot> newSnapshots = testSnapshots.subList(1, 4); // snapshots 1, 2, 3
+
+    // Create snapshotRefs where each branch points to a DIFFERENT snapshot (valid scenario)
+    Map<String, SnapshotRef> validRefs = new HashMap<>();
+    validRefs.put("branch_a", SnapshotRef.branchBuilder(testSnapshots.get(1).snapshotId()).build());
+    validRefs.put("branch_b", SnapshotRef.branchBuilder(testSnapshots.get(2).snapshotId()).build());
+    validRefs.put("branch_c", SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build());
+
+    // This should NOT throw an exception
+    Assertions.assertDoesNotThrow(
+        () ->
+            openHouseInternalTableOperations.applySnapshotOperations(
+                base, newSnapshots, validRefs, false),
+        "Should NOT throw exception when branches target different snapshots");
+  }
+
+  @Test
+  void testStandardWAPScenario() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+    List<Snapshot> wapSnapshots = IcebergTestUtil.getWapSnapshots();
+
+    // Create base with existing snapshots and a WAP snapshot
+    TableMetadata base =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+            .addSnapshot(wapSnapshots.get(0)) // WAP snapshot (not referenced by any branch)
+            .build();
+
+    // Standard WAP scenario: pull the WAP snapshot into main branch
+    Snapshot wapSnapshot = wapSnapshots.get(0);
+    List<Snapshot> newSnapshots = List.of(); // No new snapshots, just referencing the existing WAP
+
+    // Create refs to pull WAP snapshot into main branch
+    Map<String, SnapshotRef> refs = new HashMap<>();
+    refs.put(SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(wapSnapshot.snapshotId()).build());
+
+    // Should succeed - standard WAP workflow where WAP snapshot becomes the new main
+    Assertions.assertDoesNotThrow(
+        () ->
+            openHouseInternalTableOperations.applySnapshotOperations(
+                base, newSnapshots, refs, false),
+        "Should successfully pull WAP snapshot into main branch");
+  }
 }

From 4d9dae02bc1fcd838e5c666be6238257b458fad0 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 7 Oct 2025 13:15:48 -0700
Subject: [PATCH 11/35] tests for the replication use case

---
 .../OpenHouseInternalTableOperationsTest.java | 345 +++++++++++++++++-
 1 file changed, 340 insertions(+), 5 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index e09e45d0d..7bb945c44 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -26,6 +26,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
 import java.util.UUID;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
@@ -42,6 +43,7 @@
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.SnapshotRef;
+import org.apache.iceberg.SnapshotRefParser;
 import org.apache.iceberg.SortDirection;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.TableMetadata;
@@ -1633,15 +1635,15 @@ void testDeleteAllSnapshotsFailsWhenMainBranchReferenced() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata with all snapshots, where the last one is referenced by main branch
-    TableMetadata base =
+    TableMetadata tempBase =
         testSnapshots.subList(0, testSnapshots.size() - 1).stream()
             .reduce(
                 BASE_TABLE_METADATA,
                 (metadata, snapshot) ->
                     TableMetadata.buildFrom(metadata).addSnapshot(snapshot).build(),
                 (m1, m2) -> m2);
-    base =
-        TableMetadata.buildFrom(base)
+    final TableMetadata base =
+        TableMetadata.buildFrom(tempBase)
             .setBranchSnapshot(testSnapshots.get(testSnapshots.size() - 1), SnapshotRef.MAIN_BRANCH)
             .build();
 
@@ -1669,10 +1671,11 @@ void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata with unreferenced snapshots only (no main branch or other refs)
-    TableMetadata base = BASE_TABLE_METADATA;
+    TableMetadata tempBase = BASE_TABLE_METADATA;
     for (Snapshot snapshot : testSnapshots) {
-      base = TableMetadata.buildFrom(base).addSnapshot(snapshot).build();
+      tempBase = TableMetadata.buildFrom(tempBase).addSnapshot(snapshot).build();
     }
+    final TableMetadata base = tempBase;
     // Note: No setBranchSnapshot or setRef calls - all snapshots are unreferenced
 
     // Attempt to delete all unreferenced snapshots
@@ -1757,4 +1760,336 @@ void testStandardWAPScenario() throws IOException {
                 base, newSnapshots, refs, false),
         "Should successfully pull WAP snapshot into main branch");
   }
+
+  /**
+   * Integration test that verifies committing with base and metadata that are at least two commits
+   * divergent. This simulates scenarios where:
+   *
+   * <ul>
+   *   <li>Base metadata is at version N
+   *   <li>New metadata represents state at version N+2 or later (skipping intermediate versions)
+   *   <li>The commit should still succeed and write complete metadata
+   * </ul>
+   *
+   * <p>This test validates that Iceberg can handle "jump" commits where the metadata being
+   * committed has evolved significantly from the base.
+   */
+  @Test
+  void testMultipleDiffCommit() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
+        Mockito.mockStatic(TableMetadataParser.class)) {
+
+      // ========== Create base at N with 1 snapshot ==========
+      TableMetadata baseAtN =
+          TableMetadata.buildFrom(BASE_TABLE_METADATA)
+              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      // ========== Create divergent metadata at N+3 with 4 snapshots ==========
+      // Simulate evolving through N+1 and N+2 without committing
+      TableMetadata intermediate1 =
+          TableMetadata.buildFrom(baseAtN)
+              .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      TableMetadata intermediate2 =
+          TableMetadata.buildFrom(intermediate1)
+              .setBranchSnapshot(testSnapshots.get(2), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      TableMetadata metadataAtNPlus3 =
+          TableMetadata.buildFrom(intermediate2)
+              .setBranchSnapshot(testSnapshots.get(3), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      // Add custom properties for commit
+      Map<String, String> divergentProperties = new HashMap<>(metadataAtNPlus3.properties());
+      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_REFS_KEY,
+          SnapshotsUtil.serializeMap(
+              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots4.get(3))));
+
+      TableMetadata finalDivergentMetadata =
+          metadataAtNPlus3.replaceProperties(divergentProperties);
+
+      // ========== COMMIT: Base at N, Metadata at N+3 (divergent by 3 commits) ==========
+      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
+      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
+
+      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
+
+      // Verify the divergent commit contains all 4 snapshots
+      Assertions.assertEquals(
+          4,
+          capturedMetadata.snapshots().size(),
+          "Divergent commit should contain all 4 snapshots despite jumping from base with 1 snapshot");
+
+      Set<Long> expectedSnapshotIds =
+          snapshots4.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+      Set<Long> actualSnapshotIds =
+          capturedMetadata.snapshots().stream()
+              .map(Snapshot::snapshotId)
+              .collect(Collectors.toSet());
+      Assertions.assertEquals(
+          expectedSnapshotIds,
+          actualSnapshotIds,
+          "All snapshot IDs should be present after divergent commit");
+
+      // Verify main ref points to the expected snapshot (the 4th snapshot)
+      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
+      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
+      Assertions.assertEquals(
+          testSnapshots.get(3).snapshotId(),
+          mainRef.snapshotId(),
+          "Main branch should point to the 4th snapshot after divergent commit");
+    }
+  }
+
+  /**
+   * Test committing with divergent metadata and multiple valid branches. Base is at N with MAIN,
+   * metadata is at N+3 with both MAIN and feature_a branches pointing to different snapshots.
+   */
+  @Test
+  void testMultipleDiffCommitWithValidBranch() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
+        Mockito.mockStatic(TableMetadataParser.class)) {
+
+      // ========== Create base at N with 1 snapshot ==========
+      TableMetadata baseAtN =
+          TableMetadata.buildFrom(BASE_TABLE_METADATA)
+              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      // ========== Create divergent metadata at N+3 with 4 snapshots and 2 branches ==========
+      TableMetadata intermediate1 =
+          TableMetadata.buildFrom(baseAtN)
+              .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      TableMetadata intermediate2 =
+          TableMetadata.buildFrom(intermediate1)
+              .setBranchSnapshot(testSnapshots.get(2), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      TableMetadata metadataAtNPlus3 =
+          TableMetadata.buildFrom(intermediate2)
+              .setBranchSnapshot(testSnapshots.get(3), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      // Add custom properties for commit with multiple branches
+      Map<String, String> divergentProperties = new HashMap<>(metadataAtNPlus3.properties());
+      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
+
+      // Create refs for both MAIN (pointing to snapshot 3) and feature_a (pointing to snapshot 2)
+      Map<String, String> multipleRefs = new HashMap<>();
+      multipleRefs.put(
+          SnapshotRef.MAIN_BRANCH,
+          SnapshotRefParser.toJson(
+              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
+      multipleRefs.put(
+          "feature_a",
+          SnapshotRefParser.toJson(
+              SnapshotRef.branchBuilder(testSnapshots.get(2).snapshotId()).build()));
+
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(multipleRefs));
+
+      TableMetadata finalDivergentMetadata =
+          metadataAtNPlus3.replaceProperties(divergentProperties);
+
+      // ========== COMMIT: Should succeed with multiple valid branches ==========
+      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
+      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
+
+      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
+
+      // Verify all 4 snapshots are present
+      Assertions.assertEquals(
+          4,
+          capturedMetadata.snapshots().size(),
+          "Divergent commit with multiple branches should contain all 4 snapshots");
+
+      // Verify main ref points to the expected snapshot
+      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
+      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
+      Assertions.assertEquals(
+          testSnapshots.get(3).snapshotId(),
+          mainRef.snapshotId(),
+          "Main branch should point to the 4th snapshot");
+
+      // Verify feature_a ref points to the expected snapshot
+      SnapshotRef featureRef = capturedMetadata.ref("feature_a");
+      Assertions.assertNotNull(featureRef, "Feature_a branch ref should exist");
+      Assertions.assertEquals(
+          testSnapshots.get(2).snapshotId(),
+          featureRef.snapshotId(),
+          "Feature_a branch should point to the 3rd snapshot");
+    }
+  }
+
+  /**
+   * Test committing with divergent metadata where multiple branches point to the same snapshot.
+   * This is VALID when done through setBranchSnapshot() - the end state is allowed.
+   */
+  @Test
+  void testMultipleDiffCommitWithMultipleBranchesPointingToSameSnapshot() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
+        Mockito.mockStatic(TableMetadataParser.class)) {
+
+      // ========== Create base at N with 1 snapshot ==========
+      TableMetadata baseAtN =
+          TableMetadata.buildFrom(BASE_TABLE_METADATA)
+              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      // ========== Create divergent metadata with MAIN and feature_a both pointing to snapshot 3
+      // ==========
+      TableMetadata.Builder builder = TableMetadata.buildFrom(baseAtN);
+      // Add snapshots 1, 2, 3 without assigning to branches
+      builder.addSnapshot(testSnapshots.get(1));
+      builder.addSnapshot(testSnapshots.get(2));
+      builder.addSnapshot(testSnapshots.get(3));
+      // Set BOTH branches to point to the same existing snapshot (using snapshot ID)
+      builder.setBranchSnapshot(testSnapshots.get(3).snapshotId(), SnapshotRef.MAIN_BRANCH);
+      builder.setBranchSnapshot(testSnapshots.get(3).snapshotId(), "feature_a");
+      TableMetadata metadataWithBothBranches = builder.build();
+
+      // Add custom properties with snapshots
+      Map<String, String> divergentProperties =
+          new HashMap<>(metadataWithBothBranches.properties());
+      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
+
+      // Create refs matching the setBranchSnapshot calls - both pointing to snapshot 3
+      Map<String, String> sameSnapshotRefs = new HashMap<>();
+      sameSnapshotRefs.put(
+          SnapshotRef.MAIN_BRANCH,
+          SnapshotRefParser.toJson(
+              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
+      sameSnapshotRefs.put(
+          "feature_a",
+          SnapshotRefParser.toJson(
+              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
+
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(sameSnapshotRefs));
+
+      TableMetadata finalDivergentMetadata =
+          metadataWithBothBranches.replaceProperties(divergentProperties);
+
+      // ========== COMMIT: Should SUCCEED - this is a valid end state ==========
+      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
+      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
+
+      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
+
+      // Verify all 4 snapshots are present
+      Assertions.assertEquals(
+          4,
+          capturedMetadata.snapshots().size(),
+          "Commit with multiple branches pointing to same snapshot should contain all 4 snapshots");
+
+      // Verify BOTH refs point to the same snapshot
+      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
+      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
+      Assertions.assertEquals(
+          testSnapshots.get(3).snapshotId(),
+          mainRef.snapshotId(),
+          "Main branch should point to the 4th snapshot");
+
+      SnapshotRef featureRef = capturedMetadata.ref("feature_a");
+      Assertions.assertNotNull(featureRef, "Feature_a branch ref should exist");
+      Assertions.assertEquals(
+          testSnapshots.get(3).snapshotId(),
+          featureRef.snapshotId(),
+          "Feature_a branch should also point to the 4th snapshot (same as main)");
+
+      // Verify they point to the SAME snapshot
+      Assertions.assertEquals(
+          mainRef.snapshotId(),
+          featureRef.snapshotId(),
+          "Both branches should point to the same snapshot ID");
+    }
+  }
+
+  /**
+   * Test committing with divergent metadata where multiple branches try to point to the same
+   * snapshot (ambiguous commit). This should throw an IllegalStateException.
+   */
+  @Test
+  void testMultipleDiffCommitWithInvalidBranch() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+
+    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
+        Mockito.mockStatic(TableMetadataParser.class)) {
+
+      // ========== Create base at N with 1 snapshot ==========
+      TableMetadata baseAtN =
+          TableMetadata.buildFrom(BASE_TABLE_METADATA)
+              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      // ========== Create metadata with 4 snapshots but only snapshot 0 in refs ==========
+      // Build metadata with all 4 snapshots added, but keep MAIN pointing to snapshot 0
+      TableMetadata.Builder builder = TableMetadata.buildFrom(baseAtN);
+      // Add snapshots 1, 2, 3 without assigning them to any branch
+      builder.addSnapshot(testSnapshots.get(1));
+      builder.addSnapshot(testSnapshots.get(2));
+      builder.addSnapshot(testSnapshots.get(3));
+      TableMetadata metadataWithAllSnapshots = builder.build();
+
+      // Add custom properties with AMBIGUOUS branch refs - both pointing to same snapshot
+      Map<String, String> divergentProperties =
+          new HashMap<>(metadataWithAllSnapshots.properties());
+      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
+
+      // Create INVALID refs: both MAIN and feature_a pointing to the SAME snapshot (ambiguous!)
+      Map<String, String> ambiguousRefs = new HashMap<>();
+      ambiguousRefs.put(
+          SnapshotRef.MAIN_BRANCH,
+          SnapshotRefParser.toJson(
+              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
+      ambiguousRefs.put(
+          "feature_a",
+          SnapshotRefParser.toJson(
+              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId())
+                  .build())); // Same snapshot!
+
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(ambiguousRefs));
+
+      TableMetadata finalDivergentMetadata =
+          metadataWithAllSnapshots.replaceProperties(divergentProperties);
+
+      // ========== COMMIT: Should throw CommitStateUnknownException due to ambiguous branches
+      // ==========
+      CommitStateUnknownException exception =
+          Assertions.assertThrows(
+              CommitStateUnknownException.class,
+              () -> openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata),
+              "Should throw CommitStateUnknownException when multiple branches point to same snapshot");
+
+      // Verify error message indicates the ambiguous commit
+      String exceptionMessage = exception.getMessage();
+      Assertions.assertTrue(
+          exceptionMessage.contains("Multiple branches")
+              && exceptionMessage.contains("same target snapshot"),
+          "Error message should indicate multiple branches targeting same snapshot: "
+              + exceptionMessage);
+    }
+  }
 }

From abdf335f3d848a3148720b9932b43577f7ce8a8f Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Wed, 8 Oct 2025 10:55:50 -0700
Subject: [PATCH 12/35] refactoring pipeline

---
 .../OpenHouseInternalTableOperations.java     | 872 ++++++++++--------
 1 file changed, 511 insertions(+), 361 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index dc9ab7ead..a89a5d570 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -23,7 +23,6 @@
 import java.io.IOException;
 import java.time.Clock;
 import java.time.Instant;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -35,8 +34,6 @@
 import java.util.stream.Collectors;
 import lombok.AllArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.commons.collections.MapUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.iceberg.BaseMetastoreTableOperations;
 import org.apache.iceberg.PartitionField;
@@ -61,7 +58,6 @@
 import org.apache.iceberg.expressions.Term;
 import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.relocated.com.google.common.base.Objects;
-import org.springframework.data.util.Pair;
 
 @AllArgsConstructor
 @Slf4j
@@ -230,6 +226,8 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) {
       metadata = rebuildTblMetaWithSchema(metadata, CatalogConstants.EVOLVED_SCHEMA_KEY, true);
     }
 
+    metadata = applySnapshots(base, metadata);
+
     int version = currentVersion() + 1;
     CommitStatus commitStatus = CommitStatus.FAILURE;
 
@@ -261,8 +259,6 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) {
       if (properties.containsKey(CatalogConstants.EVOLVED_SCHEMA_KEY)) {
         properties.remove(CatalogConstants.EVOLVED_SCHEMA_KEY);
       }
-      String serializedSnapshotsToPut = properties.remove(CatalogConstants.SNAPSHOTS_JSON_KEY);
-      String serializedSnapshotRefs = properties.remove(CatalogConstants.SNAPSHOTS_REFS_KEY);
       boolean isStageCreate =
           Boolean.parseBoolean(properties.remove(CatalogConstants.IS_STAGE_CREATE_KEY));
       String sortOrderJson = properties.remove(CatalogConstants.SORT_ORDER_KEY);
@@ -275,27 +271,6 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) {
         updatedMetadata = updatedMetadata.replaceSortOrder(sortOrder);
       }
 
-      if (serializedSnapshotsToPut != null) {
-        List<Snapshot> snapshotsToPut =
-            SnapshotsUtil.parseSnapshots(fileIO, serializedSnapshotsToPut);
-        Pair<List<Snapshot>, List<Snapshot>> snapshotsDiff =
-            SnapshotsUtil.symmetricDifferenceSplit(snapshotsToPut, updatedMetadata.snapshots());
-        List<Snapshot> appendedSnapshots = snapshotsDiff.getFirst();
-        List<Snapshot> deletedSnapshots = snapshotsDiff.getSecond();
-        snapshotInspector.validateSnapshotsUpdate(
-            updatedMetadata, appendedSnapshots, deletedSnapshots);
-        Map<String, SnapshotRef> snapshotRefs =
-            serializedSnapshotRefs == null
-                ? new HashMap<>()
-                : SnapshotsUtil.parseSnapshotRefs(serializedSnapshotRefs);
-
-        // Multi-branch support is now enabled with snapshot ID matching
-
-        updatedMetadata =
-            applySnapshotOperations(updatedMetadata, appendedSnapshots, snapshotRefs, true);
-        updatedMetadata = maybeDeleteSnapshots(updatedMetadata, deletedSnapshots);
-      }
-
       final TableMetadata updatedMtDataRef = updatedMetadata;
       long metadataUpdateStartTime = System.currentTimeMillis();
       try {
@@ -506,403 +481,578 @@ static SortOrder rebuildSortOrder(SortOrder originalSortOrder, Schema newSchema)
     return builder.build();
   }
 
+  // ==================== Functional Snapshot Application Pipeline ====================
+
   /**
-   * If this commit comes from Iceberg built-in retry in
-   * org.apache.iceberg.PropertiesUpdate#commit() Then throw fatal {@link CommitFailedException} to
-   * inform users.
+   * Immutable state object representing the complete snapshot diff and categorization. All fields
+   * are final and collections are unmodifiable.
    */
-  private void failIfRetryUpdate(Map<String, String> properties) {
-    if (properties.containsKey(CatalogConstants.COMMIT_KEY)) {
-      String userProvidedTblVer = properties.get(CatalogConstants.COMMIT_KEY);
-
-      // If the commit is ever seen in the past, that indicates this commit is a retry and should
-      // abort
-      if (CACHE.getIfPresent(userProvidedTblVer) != null) {
-        throw new CommitFailedException(
-            String.format(
-                "The user provided table version [%s] for table [%s] is stale, please consider retry from application",
-                userProvidedTblVer, tableIdentifier));
-      } else {
-        CACHE.put(userProvidedTblVer, 1);
-      }
+  @lombok.Value
+  @lombok.Builder
+  private static class SnapshotState {
+    List<Snapshot> providedSnapshots;
+    Map<String, SnapshotRef> providedRefs;
+    List<Snapshot> existingSnapshots;
+    Map<String, SnapshotRef> existingRefs;
+
+    // Categorization
+    List<Snapshot> wapSnapshots;
+    List<Snapshot> cherryPickedSnapshots;
+    List<Snapshot> regularSnapshots;
+
+    // Diff results
+    List<Snapshot> newSnapshots;
+    List<Snapshot> existingRetainedSnapshots;
+    List<Snapshot> deletedSnapshots;
+
+    // Branch updates
+    Map<String, SnapshotRef> branchUpdates;
+
+    // Metrics for recording
+    int appendedCount;
+    int stagedCount;
+    int cherryPickedCount;
+    int deletedCount;
+  }
 
-      properties.remove(CatalogConstants.COMMIT_KEY);
-    } else {
-      // This should never occur except table-creation. However, when table-creation hits
-      // concurrency issue
-      // it throw AlreadyExistsException and will not trigger retry.
-      metricsReporter.count(InternalCatalogMetricsConstant.MISSING_COMMIT_KEY);
+  /**
+   * Applies snapshot updates from metadata properties using a functional pipeline. This method
+   * follows principles: immutability, pure functions, and composition.
+   *
+   * <p>Pipeline stages: 1. Extract snapshots from properties 2. Parse snapshots from JSON 3. Parse
+   * references from JSON 4. Compute complete state diff (categorize, identify changes) 5. Validate
+   * entire operation 6. Apply state changes 7. Record metrics/properties
+   *
+   * @param base The base table metadata (may be null for table creation)
+   * @param metadata The new metadata with properties containing snapshot updates
+   * @return Updated metadata with snapshots applied
+   */
+  TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata) {
+    // Check if snapshots update is requested
+    if (!metadata.properties().containsKey(CatalogConstants.SNAPSHOTS_JSON_KEY)) {
+      // No snapshot updates requested, return unchanged
+      return metadata;
     }
+
+    return Optional.ofNullable(metadata.properties().get(CatalogConstants.SNAPSHOTS_JSON_KEY))
+        .map(
+            snapshotsJson -> {
+              // Stage 1-3: Extract and parse
+              SnapshotState.SnapshotStateBuilder stateBuilder = SnapshotState.builder();
+
+              // Extract and parse snapshots (Stage 1-2)
+              List<Snapshot> providedSnapshots = parseSnapshotsFromJson(snapshotsJson);
+              stateBuilder.providedSnapshots(Collections.unmodifiableList(providedSnapshots));
+
+              // Extract and parse references (Stage 3)
+              Map<String, SnapshotRef> providedRefs =
+                  Optional.ofNullable(
+                          metadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
+                      .map(this::parseReferencesFromJson)
+                      .orElse(Collections.emptyMap());
+              stateBuilder.providedRefs(Collections.unmodifiableMap(providedRefs));
+
+              // Get existing state from base
+              List<Snapshot> existingSnapshots =
+                  Optional.ofNullable(base)
+                      .map(TableMetadata::snapshots)
+                      .orElse(Collections.emptyList());
+              stateBuilder.existingSnapshots(Collections.unmodifiableList(existingSnapshots));
+
+              Map<String, SnapshotRef> existingRefs =
+                  Optional.ofNullable(base).map(TableMetadata::refs).orElse(Collections.emptyMap());
+              stateBuilder.existingRefs(Collections.unmodifiableMap(existingRefs));
+
+              // Stage 4: Compute complete state diff
+              SnapshotState state = computeStateDiff(stateBuilder);
+
+              // Stage 5: Validate entire operation
+              validateOperation(state, base);
+
+              // Stage 6: Apply state changes
+              TableMetadata updated = applyStateChanges(metadata, state);
+
+              // Stage 7: Record metrics/properties
+              return recordMetrics(updated, state);
+            })
+        .orElse(metadata); // No snapshot updates if key not present
   }
 
-  public TableMetadata maybeDeleteSnapshots(
-      TableMetadata metadata, List<Snapshot> snapshotsToDelete) {
-    TableMetadata result = metadata;
-    if (CollectionUtils.isNotEmpty(snapshotsToDelete)) {
-      // Validate that snapshots to delete are not referenced by any branches or tags
-      validateSnapshotsNotReferenced(metadata, snapshotsToDelete);
+  /** Stage 2: Parse snapshots from JSON string. Pure function - no side effects. */
+  private List<Snapshot> parseSnapshotsFromJson(String snapshotsJson) {
+    return SnapshotsUtil.parseSnapshots(fileIO, snapshotsJson);
+  }
 
-      Set<Long> snapshotIds =
-          snapshotsToDelete.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-      Map<String, String> updatedProperties = new HashMap<>(result.properties());
-      updatedProperties.put(
-          getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
-          snapshotsToDelete.stream()
-              .map(s -> Long.toString(s.snapshotId()))
-              .collect(Collectors.joining(",")));
-      result =
-          TableMetadata.buildFrom(result)
-              .setProperties(updatedProperties)
-              .build()
-              .removeSnapshotsIf(s -> snapshotIds.contains(s.snapshotId()));
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, (double) snapshotsToDelete.size());
-    }
-    return result;
-  }
-
-  /** Represents the semantic difference between current server state and client-desired state. */
-  private static class StateDiff {
-    final List<Snapshot> newSnapshots;
-    final Map<String, Long> branchUpdates; // branch -> snapshotId
-    final Map<String, Snapshot> snapshotLookup; // snapshotId -> Snapshot for efficiency
-
-    StateDiff(
-        List<Snapshot> newSnapshots,
-        Map<String, Long> branchUpdates,
-        List<Snapshot> allClientSnapshots) {
-      this.newSnapshots = List.copyOf(newSnapshots);
-      this.branchUpdates = Map.copyOf(branchUpdates);
-      this.snapshotLookup =
-          allClientSnapshots.stream()
-              .collect(Collectors.toMap(s -> String.valueOf(s.snapshotId()), s -> s));
-    }
+  /** Stage 3: Parse references from JSON string. Pure function - no side effects. */
+  private Map<String, SnapshotRef> parseReferencesFromJson(String refsJson) {
+    return SnapshotsUtil.parseSnapshotRefs(refsJson);
   }
 
-  /** Checks if a branch needs to be updated based on current refs and new snapshot ID. */
-  private boolean needsBranchUpdate(TableMetadata metadata, String branchName, long newSnapshotId) {
-    if (MapUtils.isEmpty(metadata.refs())) {
-      // No refs exist yet, this is a new branch
-      return true;
-    }
+  /**
+   * Stage 4: Compute complete state diff. Pure function that categorizes snapshots and identifies
+   * changes.
+   */
+  private SnapshotState computeStateDiff(SnapshotState.SnapshotStateBuilder builder) {
+    SnapshotState partial = builder.build();
+
+    Map<Long, Snapshot> providedById =
+        partial.getProvidedSnapshots().stream()
+            .collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
+    Map<Long, Snapshot> existingById =
+        partial.getExistingSnapshots().stream()
+            .collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
+
+    // Categorize all snapshots by type
+    SnapshotCategories categories =
+        categorizeAllSnapshots(partial.getProvidedSnapshots(), existingById);
+
+    // Identify snapshot changes (new, retained, deleted)
+    SnapshotChanges changes =
+        identifySnapshotChanges(
+            partial.getProvidedSnapshots(),
+            partial.getExistingSnapshots(),
+            providedById,
+            existingById);
+
+    // Identify branch updates
+    Map<String, SnapshotRef> branchUpdates =
+        computeBranchUpdates(partial.getProvidedRefs(), partial.getExistingRefs());
+
+    // Compute metrics
+    SnapshotMetrics metrics = computeSnapshotMetrics(categories, changes, existingById);
+
+    // Build complete state
+    return builder
+        .wapSnapshots(Collections.unmodifiableList(categories.wapSnapshots))
+        .cherryPickedSnapshots(Collections.unmodifiableList(categories.cherryPickedSnapshots))
+        .regularSnapshots(Collections.unmodifiableList(categories.regularSnapshots))
+        .newSnapshots(Collections.unmodifiableList(changes.newSnapshots))
+        .existingRetainedSnapshots(Collections.unmodifiableList(changes.existingRetainedSnapshots))
+        .deletedSnapshots(Collections.unmodifiableList(changes.deletedSnapshots))
+        .branchUpdates(Collections.unmodifiableMap(branchUpdates))
+        .appendedCount(metrics.appendedCount)
+        .stagedCount(metrics.stagedCount)
+        .cherryPickedCount(metrics.cherryPickedCount)
+        .deletedCount(metrics.deletedCount)
+        .build();
+  }
 
-    SnapshotRef currentRef = metadata.refs().get(branchName);
-    return currentRef == null || currentRef.snapshotId() != newSnapshotId;
+  /** Container for categorized snapshots. */
+  @lombok.Value
+  private static class SnapshotCategories {
+    List<Snapshot> wapSnapshots;
+    List<Snapshot> cherryPickedSnapshots;
+    List<Snapshot> regularSnapshots;
   }
 
-  /** Validates that no two branches are trying to point to the same snapshot (ambiguous commit). */
-  private void validateNoBranchConflicts(Map<String, Long> branchUpdates) {
-    // Group branches by target snapshot ID
-    Map<Long, List<String>> snapshotToBranches = new HashMap<>();
-    for (Map.Entry<String, Long> entry : branchUpdates.entrySet()) {
-      snapshotToBranches
-          .computeIfAbsent(entry.getValue(), k -> new ArrayList<>())
-          .add(entry.getKey());
-    }
+  /** Categorize all snapshots into WAP, cherry-picked, and regular. */
+  private SnapshotCategories categorizeAllSnapshots(
+      List<Snapshot> providedSnapshots, Map<Long, Snapshot> existingById) {
+    List<Snapshot> wapSnapshots = categorizeWapSnapshots(providedSnapshots);
+    List<Snapshot> cherryPickedSnapshots =
+        categorizeCherryPickedSnapshots(providedSnapshots, existingById);
+    List<Snapshot> regularSnapshots =
+        categorizeRegularSnapshots(providedSnapshots, wapSnapshots, cherryPickedSnapshots);
 
-    // Check for conflicts (multiple branches pointing to same snapshot)
-    for (Map.Entry<Long, List<String>> entry : snapshotToBranches.entrySet()) {
-      List<String> branches = entry.getValue();
-      if (branches.size() > 1) {
-        throw new IllegalStateException(
-            String.format(
-                "Multiple branches (%s) specify the same target snapshot %d. "
-                    + "This indicates an ambiguous commit operation - each snapshot can only be assigned to one branch.",
-                branches, entry.getKey()));
-      }
-    }
+    return new SnapshotCategories(wapSnapshots, cherryPickedSnapshots, regularSnapshots);
   }
 
-  /** Validates that snapshots to be deleted are not referenced by any branches or tags. */
-  private void validateSnapshotsNotReferenced(
-      TableMetadata metadata, List<Snapshot> snapshotsToDelete) {
-    if (MapUtils.isEmpty(metadata.refs()) || CollectionUtils.isEmpty(snapshotsToDelete)) {
-      return; // No refs to check or no snapshots to delete
-    }
-
-    Set<Long> snapshotIdsToDelete =
-        snapshotsToDelete.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+  /** Container for snapshot changes. */
+  @lombok.Value
+  private static class SnapshotChanges {
+    List<Snapshot> newSnapshots;
+    List<Snapshot> existingRetainedSnapshots;
+    List<Snapshot> deletedSnapshots;
+  }
 
-    // Check if any snapshot to delete is referenced by branches or tags
-    for (Map.Entry<String, SnapshotRef> refEntry : metadata.refs().entrySet()) {
-      String refName = refEntry.getKey();
-      SnapshotRef ref = refEntry.getValue();
+  /** Identify which snapshots are new, retained, or deleted. */
+  private SnapshotChanges identifySnapshotChanges(
+      List<Snapshot> providedSnapshots,
+      List<Snapshot> existingSnapshots,
+      Map<Long, Snapshot> providedById,
+      Map<Long, Snapshot> existingById) {
 
-      if (snapshotIdsToDelete.contains(ref.snapshotId())) {
-        List<String> referencingRefs =
-            metadata.refs().entrySet().stream()
-                .filter(entry -> snapshotIdsToDelete.contains(entry.getValue().snapshotId()))
-                .map(Map.Entry::getKey)
-                .collect(Collectors.toList());
+    List<Snapshot> newSnapshots =
+        providedSnapshots.stream()
+            .filter(s -> !existingById.containsKey(s.snapshotId()))
+            .collect(Collectors.toList());
 
-        throw new IllegalArgumentException(
-            String.format(
-                "Cannot expire %d. Still referenced by refs: %s",
-                ref.snapshotId(), referencingRefs));
-      }
-    }
-  }
+    List<Snapshot> existingRetainedSnapshots =
+        providedSnapshots.stream()
+            .filter(s -> existingById.containsKey(s.snapshotId()))
+            .collect(Collectors.toList());
 
-  /** Records snapshot actions in table properties and reports metrics. */
-  private void recordSnapshotActions(
-      TableMetadata metadata,
-      TableMetadata.Builder metadataBuilder,
-      List<String> appendedSnapshots,
-      List<String> stagedSnapshots,
-      List<String> cherryPickedSnapshots) {
+    List<Snapshot> deletedSnapshots =
+        existingSnapshots.stream()
+            .filter(s -> !providedById.containsKey(s.snapshotId()))
+            .collect(Collectors.toList());
 
-    Map<String, String> updatedProperties = new HashMap<>(metadata.properties());
+    return new SnapshotChanges(newSnapshots, existingRetainedSnapshots, deletedSnapshots);
+  }
 
-    if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
-      updatedProperties.put(
-          getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-          appendedSnapshots.stream().collect(Collectors.joining(",")));
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedSnapshots.size());
-    }
+  /** Container for snapshot metrics. */
+  @lombok.Value
+  private static class SnapshotMetrics {
+    int appendedCount;
+    int stagedCount;
+    int cherryPickedCount;
+    int deletedCount;
+  }
 
-    if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
-      updatedProperties.put(
-          getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-          stagedSnapshots.stream().collect(Collectors.joining(",")));
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
-    }
+  /** Compute metrics based on categorized snapshots and changes. */
+  private SnapshotMetrics computeSnapshotMetrics(
+      SnapshotCategories categories, SnapshotChanges changes, Map<Long, Snapshot> existingById) {
 
-    if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
-      updatedProperties.put(
-          getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-          cherryPickedSnapshots.stream().collect(Collectors.joining(",")));
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, cherryPickedSnapshots.size());
-    }
+    int appendedCount =
+        (int)
+            categories.regularSnapshots.stream()
+                .filter(s -> !existingById.containsKey(s.snapshotId()))
+                .count();
+    int stagedCount = categories.wapSnapshots.size();
+    int cherryPickedCount = categories.cherryPickedSnapshots.size();
+    int deletedCount = changes.deletedSnapshots.size();
 
-    metadataBuilder.setProperties(updatedProperties);
+    return new SnapshotMetrics(appendedCount, stagedCount, cherryPickedCount, deletedCount);
   }
 
   /**
-   * Applies client-requested changes to server state using functional approach.
-   *
-   * <p>Contract: currentState + clientDesiredState -> newState + metrics
-   *
-   * <p>Client sends desired final state, server computes semantic diff and applies it.
+   * Categorize WAP (Write-Audit-Publish) snapshots. A snapshot is WAP if it has the WAP ID in its
+   * summary.
    */
-  public TableMetadata applySnapshotOperations(
-      TableMetadata currentMetadata,
-      List<Snapshot> clientSnapshots,
-      Map<String, SnapshotRef> clientRefs,
-      boolean recordAction) {
+  private List<Snapshot> categorizeWapSnapshots(List<Snapshot> snapshots) {
+    return snapshots.stream()
+        .filter(
+            s -> s.summary() != null && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
+        .collect(Collectors.toList());
+  }
 
-    return computeStateDiff(currentMetadata, clientSnapshots, clientRefs)
-        .map(
-            diff -> {
-              TableMetadata newMetadata = applyStateDiff(currentMetadata, diff);
-              return recordAction
-                  ? recordTransition(currentMetadata, newMetadata, diff)
-                  : newMetadata;
+  /**
+   * Categorize cherry-picked snapshots. A snapshot is cherry-picked if it exists in the current
+   * metadata but has a different parent than in the provided snapshots (indicating it was moved to
+   * a different branch).
+   */
+  private List<Snapshot> categorizeCherryPickedSnapshots(
+      List<Snapshot> providedSnapshots, Map<Long, Snapshot> existingById) {
+
+    return providedSnapshots.stream()
+        .filter(
+            provided -> {
+              Snapshot existing = existingById.get(provided.snapshotId());
+              if (existing == null) {
+                return false; // New snapshot, not cherry-picked
+              }
+              // Check if parent changed (indicating cherry-pick to different branch)
+              Long providedParent = provided.parentId();
+              Long existingParent = existing.parentId();
+              return !Objects.equal(providedParent, existingParent);
             })
-        .orElse(currentMetadata);
+        .collect(Collectors.toList());
   }
 
-  /** Computes semantic difference between current server state and client-desired state. */
-  private Optional<StateDiff> computeStateDiff(
-      TableMetadata currentMetadata,
-      List<Snapshot> clientSnapshots,
-      Map<String, SnapshotRef> clientRefs) {
+  /**
+   * Categorize regular (appended) snapshots. Regular snapshots are those that are not WAP or
+   * cherry-picked.
+   */
+  private List<Snapshot> categorizeRegularSnapshots(
+      List<Snapshot> allSnapshots,
+      List<Snapshot> wapSnapshots,
+      List<Snapshot> cherryPickedSnapshots) {
+
+    Set<Long> wapIds = wapSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+    Set<Long> cherryPickedIds =
+        cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+
+    return allSnapshots.stream()
+        .filter(s -> !wapIds.contains(s.snapshotId()) && !cherryPickedIds.contains(s.snapshotId()))
+        .collect(Collectors.toList());
+  }
 
-    if (CollectionUtils.isEmpty(clientSnapshots) && MapUtils.isEmpty(clientRefs)) {
-      return Optional.empty(); // No changes requested
-    }
+  /** Compute branch updates by comparing provided and existing refs. */
+  private Map<String, SnapshotRef> computeBranchUpdates(
+      Map<String, SnapshotRef> providedRefs, Map<String, SnapshotRef> existingRefs) {
 
-    Set<Long> currentSnapshotIds =
-        currentMetadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+    return providedRefs.entrySet().stream()
+        .filter(
+            entry -> {
+              SnapshotRef existing = existingRefs.get(entry.getKey());
+              return existing == null || existing.snapshotId() != entry.getValue().snapshotId();
+            })
+        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+  }
 
-    // Find truly new snapshots (not in current metadata)
-    List<Snapshot> newSnapshots =
-        Optional.ofNullable(clientSnapshots).orElse(Collections.emptyList()).stream()
-            .filter(s -> !currentSnapshotIds.contains(s.snapshotId()))
-            .collect(Collectors.toList());
+  /** Stage 5: Validate entire operation. Throws exceptions for invalid operations. */
+  private void validateOperation(SnapshotState state, TableMetadata base) {
+    // Validation 1: Current snapshot not deleted without replacements
+    validateCurrentSnapshotNotDeleted(state, base);
 
-    // Find branch updates needed
-    Map<String, Long> branchUpdates =
-        Optional.ofNullable(clientRefs).orElse(Collections.emptyMap()).entrySet().stream()
-            .filter(
-                entry ->
-                    needsBranchUpdate(
-                        currentMetadata, entry.getKey(), entry.getValue().snapshotId()))
-            .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().snapshotId()));
-
-    // Check for ambiguous commits: multiple branches trying to point to the same snapshot
-    validateNoBranchConflicts(branchUpdates);
-
-    return Optional.of(
-        new StateDiff(
-            newSnapshots,
-            branchUpdates,
-            Optional.ofNullable(clientSnapshots).orElse(Collections.emptyList())));
-  }
-
-  /** Applies the computed state diff to create new metadata. */
-  private TableMetadata applyStateDiff(TableMetadata currentMetadata, StateDiff diff) {
-    TableMetadata.Builder builder = TableMetadata.buildFrom(currentMetadata);
-
-    // Add new snapshots (respecting Iceberg semantics)
-    diff.newSnapshots.forEach(
-        snapshot -> {
-          snapshotInspector.validateSnapshot(snapshot);
-
-          if (isWapStaged(snapshot)) {
-            // WAP snapshots are always staged (never assigned to branches initially)
-            builder.addSnapshot(snapshot);
-          } else {
-            // All other snapshots: assign to branch if specified, otherwise stage
-            findTargetBranchForSnapshot(snapshot, diff.branchUpdates)
-                .ifPresentOrElse(
-                    targetBranch -> builder.setBranchSnapshot(snapshot, targetBranch),
-                    () -> builder.addSnapshot(snapshot));
-          }
-        });
+    // Validation 2: No ambiguous commits (multiple branches → same snapshot)
+    validateNoAmbiguousCommits(state);
 
-    // Update branch pointers to existing snapshots
-    diff.branchUpdates.entrySet().stream()
-        .filter(entry -> !isNewSnapshot(entry.getValue(), diff.newSnapshots))
-        .forEach(entry -> builder.setBranchSnapshot(entry.getValue(), entry.getKey()));
+    // Validation 3: Deleted snapshots not referenced by branches/tags
+    validateDeletedSnapshotsNotReferenced(state);
 
-    return builder.build();
+    // Validation 4: Individual snapshot validation using SnapshotInspector
+    validateIndividualSnapshots(state);
   }
 
-  /** Checks if snapshot is WAP staged (should not be assigned to any branch). */
-  private boolean isWapStaged(Snapshot snapshot) {
-    return snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
+  /**
+   * Validate that current snapshot is not deleted without replacements. Package-private for
+   * testing.
+   */
+  void validateCurrentSnapshotNotDeleted(SnapshotState state, TableMetadata base) {
+    if (base == null || base.currentSnapshot() == null) {
+      return; // No current snapshot to validate
+    }
+
+    long currentSnapshotId = base.currentSnapshot().snapshotId();
+    boolean currentDeleted =
+        state.getDeletedSnapshots().stream().anyMatch(s -> s.snapshotId() == currentSnapshotId);
+
+    if (currentDeleted && state.getNewSnapshots().isEmpty()) {
+      throw new InvalidIcebergSnapshotException(
+          String.format(
+              "Cannot delete the current snapshot %s without adding replacement snapshots. "
+                  + "Deleted: [%s], New: [%s]",
+              currentSnapshotId,
+              state.getDeletedSnapshots().stream()
+                  .map(s -> Long.toString(s.snapshotId()))
+                  .collect(Collectors.joining(", ")),
+              state.getNewSnapshots().stream()
+                  .map(s -> Long.toString(s.snapshotId()))
+                  .collect(Collectors.joining(", "))));
+    }
   }
 
-  /** Checks if snapshot is cherry-picked (should go directly to target branch). */
-  private boolean isCherryPicked(Snapshot snapshot) {
-    return snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP);
+  /**
+   * Validate no ambiguous commits (multiple branches pointing to same snapshot in one commit).
+   * Package-private for testing.
+   */
+  void validateNoAmbiguousCommits(SnapshotState state) {
+    Map<Long, List<String>> snapshotToBranches =
+        state.getBranchUpdates().entrySet().stream()
+            .collect(
+                Collectors.groupingBy(
+                    e -> e.getValue().snapshotId(),
+                    Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
+
+    snapshotToBranches.forEach(
+        (snapshotId, branches) -> {
+          if (branches.size() > 1) {
+            throw new InvalidIcebergSnapshotException(
+                String.format(
+                    "Ambiguous commit: snapshot %s is referenced by multiple branches [%s] in a single commit. "
+                        + "Each snapshot can only be referenced by one branch per commit.",
+                    snapshotId, String.join(", ", branches)));
+          }
+        });
   }
 
   /**
-   * Finds which branch this snapshot should be assigned to based on branch updates. Fails fast if
-   * multiple branches want the same snapshot (ambiguous commit).
+   * Validate that deleted snapshots are not referenced by any branches or tags. Package-private for
+   * testing.
    */
-  private Optional<String> findTargetBranchForSnapshot(
-      Snapshot snapshot, Map<String, Long> branchUpdates) {
-    List<String> matchingBranches =
-        branchUpdates.entrySet().stream()
-            .filter(entry -> entry.getValue() == snapshot.snapshotId())
-            .map(Map.Entry::getKey)
-            .toList();
-
-    if (matchingBranches.size() > 1) {
-      throw new IllegalStateException(
-          "Multiple branches (%s) specify the same target snapshot %d. "
-              + "This indicates an ambiguous commit operation - each snapshot can only be assigned to one branch."
-                  .formatted(matchingBranches, snapshot.snapshotId()));
-    }
+  void validateDeletedSnapshotsNotReferenced(SnapshotState state) {
+    Set<Long> deletedIds =
+        state.getDeletedSnapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
 
-    return matchingBranches.stream().findFirst();
+    Map<Long, List<String>> referencedIdsToRefs =
+        state.getProvidedRefs().entrySet().stream()
+            .collect(
+                Collectors.groupingBy(
+                    e -> e.getValue().snapshotId(),
+                    Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
+
+    Map<Long, List<String>> invalidDeletes =
+        deletedIds.stream()
+            .filter(referencedIdsToRefs::containsKey)
+            .collect(Collectors.toMap(id -> id, referencedIdsToRefs::get));
+
+    if (!invalidDeletes.isEmpty()) {
+      String details =
+          invalidDeletes.entrySet().stream()
+              .map(
+                  e ->
+                      String.format(
+                          "snapshot %s (referenced by: %s)",
+                          e.getKey(), String.join(", ", e.getValue())))
+              .collect(Collectors.joining("; "));
+      throw new InvalidIcebergSnapshotException(
+          String.format(
+              "Cannot delete snapshots that are still referenced by branches/tags: %s", details));
+    }
   }
 
-  /** Checks if this snapshot ID is in the list of new snapshots being added. */
-  private boolean isNewSnapshot(Long snapshotId, List<Snapshot> newSnapshots) {
-    return newSnapshots.stream().anyMatch(s -> s.snapshotId() == snapshotId);
+  /**
+   * Validate individual snapshots using existing SnapshotInspector. Package-private for testing.
+   */
+  void validateIndividualSnapshots(SnapshotState state) {
+    state
+        .getNewSnapshots()
+        .forEach(
+            snapshot -> {
+              if (snapshotInspector != null) {
+                snapshotInspector.validateSnapshot(snapshot);
+              }
+            });
   }
 
-  /** Records metrics and properties about the state transition that occurred. */
-  private TableMetadata recordTransition(
-      TableMetadata originalMetadata, TableMetadata newMetadata, StateDiff diff) {
+  /**
+   * Stage 6: Apply state changes to create new TableMetadata. Pure function - creates new metadata
+   * without mutating existing.
+   *
+   * <p>This method uses Iceberg's proper APIs: - removeSnapshots() to delete snapshots -
+   * addSnapshot() to add new snapshots - setBranchSnapshot() to set branch references
+   *
+   * <p>The order of operations matters: 1. Start with base metadata (buildFrom copies all existing
+   * state) 2. Remove deleted snapshots first (using proper removeSnapshots API) 3. Remove stale
+   * branch references 4. Add new snapshots and set branch pointers
+   */
+  private TableMetadata applyStateChanges(TableMetadata metadata, SnapshotState state) {
+    TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
+
+    // Step 1: Remove deleted snapshots using proper Iceberg API
+    if (!state.getDeletedSnapshots().isEmpty()) {
+      Set<Long> deletedIds =
+          state.getDeletedSnapshots().stream()
+              .map(Snapshot::snapshotId)
+              .collect(Collectors.toSet());
+      builder.removeSnapshots(deletedIds);
+    }
 
-    Map<String, String> properties = new HashMap<>(newMetadata.properties());
+    // Step 2: Remove stale branch references (branches that are no longer in provided refs)
+    Set<String> providedRefNames = state.getProvidedRefs().keySet();
+    metadata.refs().keySet().stream()
+        .filter(refName -> !providedRefNames.contains(refName))
+        .forEach(builder::removeRef);
+
+    // Step 3: Identify existing snapshots (after deletions)
+    Set<Long> existingSnapshotIds =
+        metadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+    Set<Long> deletedIds =
+        state.getDeletedSnapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+    existingSnapshotIds.removeAll(deletedIds);
+
+    // Step 4: Identify snapshots referenced by branches
+    Set<Long> referencedByBranches =
+        state.getProvidedRefs().values().stream()
+            .map(SnapshotRef::snapshotId)
+            .collect(Collectors.toSet());
+
+    // Step 5: Add unreferenced new snapshots (referenced ones are added via setBranchSnapshot)
+    state.getProvidedSnapshots().stream()
+        .filter(s -> !existingSnapshotIds.contains(s.snapshotId()))
+        .filter(s -> !referencedByBranches.contains(s.snapshotId()))
+        .forEach(builder::addSnapshot);
+
+    // Step 6: Set branch pointers for all provided refs
+    state
+        .getProvidedRefs()
+        .forEach(
+            (branchName, ref) -> {
+              Snapshot snapshot =
+                  state.getProvidedSnapshots().stream()
+                      .filter(s -> s.snapshotId() == ref.snapshotId())
+                      .findFirst()
+                      .orElseThrow(
+                          () ->
+                              new InvalidIcebergSnapshotException(
+                                  String.format(
+                                      "Branch %s references non-existent snapshot %s",
+                                      branchName, ref.snapshotId())));
+
+              if (existingSnapshotIds.contains(snapshot.snapshotId())) {
+                // Snapshot already exists - just update the branch pointer if needed
+                SnapshotRef existingRef = metadata.refs().get(branchName);
+                if (existingRef == null || existingRef.snapshotId() != ref.snapshotId()) {
+                  builder.setRef(branchName, ref);
+                }
+              } else {
+                // Snapshot is new - setBranchSnapshot will add it and set the branch pointer
+                builder.setBranchSnapshot(snapshot, branchName);
+              }
+            });
 
-    // Categorize new snapshots by their semantic type for metrics
-    Map<String, List<String>> snapshotsByType =
-        diff.newSnapshots.stream()
-            .collect(
-                Collectors.groupingBy(
-                    this::getSnapshotCategory,
-                    Collectors.mapping(s -> String.valueOf(s.snapshotId()), Collectors.toList())));
-
-    // Record snapshot metrics by type
-    recordIfPresent(
-        properties,
-        snapshotsByType,
-        "appended",
-        CatalogConstants.APPENDED_SNAPSHOTS,
-        InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR);
-    recordIfPresent(
-        properties,
-        snapshotsByType,
-        "staged",
-        CatalogConstants.STAGED_SNAPSHOTS,
-        InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR);
-
-    // For cherry-picked snapshots, record the SOURCE snapshot IDs that were cherry-picked
-    List<String> cherryPickSourceIds =
-        diff.newSnapshots.stream()
-            .filter(this::isCherryPicked)
-            .map(this::getCherryPickSourceId)
-            .filter(Optional::isPresent)
-            .map(Optional::get)
-            .collect(Collectors.toList());
+    return builder.build();
+  }
 
-    if (!cherryPickSourceIds.isEmpty()) {
-      properties.put(
+  /**
+   * Stage 7: Record metrics and add properties to metadata. Returns new metadata with updated
+   * properties.
+   */
+  private TableMetadata recordMetrics(TableMetadata metadata, SnapshotState state) {
+    Map<String, String> newProperties = new HashMap<>(metadata.properties());
+
+    // Helper to format snapshot IDs as comma-separated string
+    java.util.function.Function<List<Snapshot>, String> formatIds =
+        snapshots ->
+            snapshots.stream()
+                .map(s -> Long.toString(s.snapshotId()))
+                .collect(Collectors.joining(","));
+
+    // Record categorization metrics as comma-separated snapshot IDs
+    if (!state.getRegularSnapshots().isEmpty()) {
+      List<Snapshot> newRegularSnapshots =
+          state.getRegularSnapshots().stream()
+              .filter(s -> state.getNewSnapshots().contains(s))
+              .collect(Collectors.toList());
+      if (!newRegularSnapshots.isEmpty()) {
+        newProperties.put(
+            getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
+            formatIds.apply(newRegularSnapshots));
+      }
+    }
+    if (!state.getWapSnapshots().isEmpty()) {
+      newProperties.put(
+          getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
+          formatIds.apply(state.getWapSnapshots()));
+    }
+    if (!state.getCherryPickedSnapshots().isEmpty()) {
+      newProperties.put(
           getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-          String.join(",", cherryPickSourceIds));
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, cherryPickSourceIds.size());
+          formatIds.apply(state.getCherryPickedSnapshots()));
     }
-
-    // Record branch updates that don't involve new snapshots (pure ref moves)
-    List<String> refOnlyCherryPicks =
-        diff.branchUpdates.entrySet().stream()
-            .filter(entry -> !isNewSnapshot(entry.getValue(), diff.newSnapshots))
-            .map(entry -> String.valueOf(entry.getValue()))
-            .collect(Collectors.toList());
-
-    if (!refOnlyCherryPicks.isEmpty()) {
-      String existing =
-          properties.get(getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS));
-      String combined =
-          existing != null
-              ? existing + "," + String.join(",", refOnlyCherryPicks)
-              : String.join(",", refOnlyCherryPicks);
-      properties.put(getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS), combined);
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, refOnlyCherryPicks.size());
+    if (!state.getDeletedSnapshots().isEmpty()) {
+      newProperties.put(
+          getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
+          formatIds.apply(state.getDeletedSnapshots()));
     }
 
-    return TableMetadata.buildFrom(newMetadata).setProperties(properties).build();
-  }
+    // Remove the transient snapshot keys from properties
+    newProperties.remove(CatalogConstants.SNAPSHOTS_JSON_KEY);
+    newProperties.remove(CatalogConstants.SNAPSHOTS_REFS_KEY);
 
-  /** Categorizes snapshot for metrics based on its semantic type. */
-  private String getSnapshotCategory(Snapshot snapshot) {
-    if (isWapStaged(snapshot)) return "staged";
-    if (isCherryPicked(snapshot))
-      return "appended"; // Cherry-picked snapshots are NEW, so they're "appended"
-    return "appended";
+    return metadata.replaceProperties(newProperties);
   }
 
-  /** Extracts the source snapshot ID for cherry-picked snapshots. */
-  private Optional<String> getCherryPickSourceId(Snapshot snapshot) {
-    return Optional.ofNullable(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP));
-  }
+  // ==================== End Functional Snapshot Application Pipeline ====================
 
-  /** Records snapshot category in properties if snapshots exist. */
-  private void recordIfPresent(
-      Map<String, String> properties,
-      Map<String, List<String>> categorized,
-      String category,
-      String propertyKey,
-      String metricKey) {
+  /**
+   * If this commit comes from Iceberg built-in retry in
+   * org.apache.iceberg.PropertiesUpdate#commit() Then throw fatal {@link CommitFailedException} to
+   * inform users.
+   */
+  private void failIfRetryUpdate(Map<String, String> properties) {
+    if (properties.containsKey(CatalogConstants.COMMIT_KEY)) {
+      String userProvidedTblVer = properties.get(CatalogConstants.COMMIT_KEY);
 
-    Optional.ofNullable(categorized.get(category))
-        .filter(CollectionUtils::isNotEmpty)
-        .ifPresent(
-            snapshots -> {
-              properties.put(getCanonicalFieldName(propertyKey), String.join(",", snapshots));
-              metricsReporter.count(metricKey, snapshots.size());
-            });
+      // If the commit is ever seen in the past, that indicates this commit is a retry and should
+      // abort
+      if (CACHE.getIfPresent(userProvidedTblVer) != null) {
+        throw new CommitFailedException(
+            String.format(
+                "The user provided table version [%s] for table [%s] is stale, please consider retry from application",
+                userProvidedTblVer, tableIdentifier));
+      } else {
+        CACHE.put(userProvidedTblVer, 1);
+      }
+
+      properties.remove(CatalogConstants.COMMIT_KEY);
+    } else {
+      // This should never occur except table-creation. However, when table-creation hits
+      // concurrency issue
+      // it throw AlreadyExistsException and will not trigger retry.
+      metricsReporter.count(InternalCatalogMetricsConstant.MISSING_COMMIT_KEY);
+    }
   }
 
   /** Helper function to dump contents for map in debugging mode. */

From 4087462e1462d11d5c21016bcc9fe4e2eb324f92 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Thu, 9 Oct 2025 11:10:16 -0700
Subject: [PATCH 13/35] working tests and restructured code

---
 .../OpenHouseInternalTableOperations.java     | 178 +++-
 .../OpenHouseInternalTableOperationsTest.java | 820 +++++++++++-------
 2 files changed, 630 insertions(+), 368 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index a89a5d570..6a2c43305 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -4,6 +4,7 @@
 
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
+import com.google.common.collect.Sets;
 import com.google.gson.Gson;
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.cluster.storage.Storage;
@@ -521,7 +522,8 @@ private static class SnapshotState {
    *
    * <p>Pipeline stages: 1. Extract snapshots from properties 2. Parse snapshots from JSON 3. Parse
    * references from JSON 4. Compute complete state diff (categorize, identify changes) 5. Validate
-   * entire operation 6. Apply state changes 7. Record metrics/properties
+   * entire operation 6. Apply state changes (returns builder) 7. Add metric properties to builder
+   * 8. Build once at top level to preserve lastUpdatedMillis from snapshot operations
    *
    * @param base The base table metadata (may be null for table creation)
    * @param metadata The new metadata with properties containing snapshot updates
@@ -569,11 +571,14 @@ TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata) {
               // Stage 5: Validate entire operation
               validateOperation(state, base);
 
-              // Stage 6: Apply state changes
-              TableMetadata updated = applyStateChanges(metadata, state);
+              // Stage 6: Apply state changes - returns builder
+              TableMetadata.Builder builder = applyStateChanges(metadata, state);
 
-              // Stage 7: Record metrics/properties
-              return recordMetrics(updated, state);
+              // Stage 7: Record metrics and add metric properties to builder
+              builder = recordMetrics(builder, state);
+
+              // Build once at the end to preserve lastUpdatedMillis from snapshot operations
+              return builder.build();
             })
         .orElse(metadata); // No snapshot updates if key not present
   }
@@ -604,7 +609,11 @@ private SnapshotState computeStateDiff(SnapshotState.SnapshotStateBuilder builde
 
     // Categorize all snapshots by type
     SnapshotCategories categories =
-        categorizeAllSnapshots(partial.getProvidedSnapshots(), existingById);
+        categorizeAllSnapshots(
+            partial.getProvidedSnapshots(),
+            existingById,
+            partial.getExistingRefs(),
+            partial.getProvidedRefs());
 
     // Identify snapshot changes (new, retained, deleted)
     SnapshotChanges changes =
@@ -647,10 +656,24 @@ private static class SnapshotCategories {
 
   /** Categorize all snapshots into WAP, cherry-picked, and regular. */
   private SnapshotCategories categorizeAllSnapshots(
-      List<Snapshot> providedSnapshots, Map<Long, Snapshot> existingById) {
-    List<Snapshot> wapSnapshots = categorizeWapSnapshots(providedSnapshots);
+      List<Snapshot> providedSnapshots,
+      Map<Long, Snapshot> existingById,
+      Map<String, SnapshotRef> existingRefs,
+      Map<String, SnapshotRef> providedRefs) {
+    List<Snapshot> wapSnapshots =
+        categorizeWapSnapshots(providedSnapshots, existingRefs, providedRefs);
     List<Snapshot> cherryPickedSnapshots =
-        categorizeCherryPickedSnapshots(providedSnapshots, existingById);
+        categorizeCherryPickedSnapshots(
+            providedSnapshots, existingById, existingRefs, providedRefs);
+
+    // Cherry-picked snapshots should not be considered WAP/staged anymore
+    Set<Long> cherryPickedIds =
+        cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+    wapSnapshots =
+        wapSnapshots.stream()
+            .filter(s -> !cherryPickedIds.contains(s.snapshotId()))
+            .collect(Collectors.toList());
+
     List<Snapshot> regularSnapshots =
         categorizeRegularSnapshots(providedSnapshots, wapSnapshots, cherryPickedSnapshots);
 
@@ -716,23 +739,60 @@ private SnapshotMetrics computeSnapshotMetrics(
   }
 
   /**
-   * Categorize WAP (Write-Audit-Publish) snapshots. A snapshot is WAP if it has the WAP ID in its
-   * summary.
+   * Categorize WAP (Write-Audit-Publish) snapshots. A snapshot is considered WAP/staged if it has
+   * the wap.id property AND is not on any branch in either the existing or provided metadata. This
+   * correctly handles: 1. Snapshots that were on branches in base - not WAP even if unreferenced in
+   * new metadata 2. Snapshots being published (staged -> branch) - not WAP as they're now on a
+   * branch
+   *
+   * @param snapshots List of provided snapshots
+   * @param existingRefs Existing snapshot refs from base metadata
+   * @param providedRefs Provided snapshot refs from new metadata
+   * @return List of WAP snapshots
    */
-  private List<Snapshot> categorizeWapSnapshots(List<Snapshot> snapshots) {
+  private List<Snapshot> categorizeWapSnapshots(
+      List<Snapshot> snapshots,
+      Map<String, SnapshotRef> existingRefs,
+      Map<String, SnapshotRef> providedRefs) {
+    // Get set of snapshot IDs that are/were on branches
+    Set<Long> branchSnapshotIds = new java.util.HashSet<>();
+    branchSnapshotIds.addAll(
+        existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet()));
+    branchSnapshotIds.addAll(
+        providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet()));
+
     return snapshots.stream()
         .filter(
             s -> s.summary() != null && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
+        .filter(s -> !branchSnapshotIds.contains(s.snapshotId()))
         .collect(Collectors.toList());
   }
 
   /**
-   * Categorize cherry-picked snapshots. A snapshot is cherry-picked if it exists in the current
+   * Categorize cherry-picked snapshots. A snapshot is cherry-picked if: 1. It exists in the current
    * metadata but has a different parent than in the provided snapshots (indicating it was moved to
-   * a different branch).
+   * a different branch), OR 2. It is referenced as the source of a cherry-pick by another
+   * snapshot's "source-snapshot-id", OR 3. It has wap.id AND was staged (not on a branch) in
+   * existing refs AND is now on a branch in provided refs (indicating it's being published)
    */
   private List<Snapshot> categorizeCherryPickedSnapshots(
-      List<Snapshot> providedSnapshots, Map<Long, Snapshot> existingById) {
+      List<Snapshot> providedSnapshots,
+      Map<Long, Snapshot> existingById,
+      Map<String, SnapshotRef> existingRefs,
+      Map<String, SnapshotRef> providedRefs) {
+
+    // Find snapshots that are sources of cherry-picks
+    Set<Long> cherryPickSourceIds =
+        providedSnapshots.stream()
+            .filter(s -> s.summary() != null && s.summary().containsKey("source-snapshot-id"))
+            .map(s -> Long.parseLong(s.summary().get("source-snapshot-id")))
+            .collect(Collectors.toSet());
+
+    // Get snapshot IDs on branches
+    Set<Long> existingBranchSnapshotIds =
+        existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
+    Set<Long> providedBranchSnapshotIds =
+        providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
 
     return providedSnapshots.stream()
         .filter(
@@ -744,7 +804,20 @@ private List<Snapshot> categorizeCherryPickedSnapshots(
               // Check if parent changed (indicating cherry-pick to different branch)
               Long providedParent = provided.parentId();
               Long existingParent = existing.parentId();
-              return !Objects.equal(providedParent, existingParent);
+              boolean parentChanged = !Objects.equal(providedParent, existingParent);
+
+              // Check if this snapshot is the source of a cherry-pick
+              boolean isCherryPickSource = cherryPickSourceIds.contains(provided.snapshotId());
+
+              // Check if this is a WAP snapshot being published (staged -> branch)
+              boolean hasWapId =
+                  provided.summary() != null
+                      && provided.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
+              boolean wasStaged = !existingBranchSnapshotIds.contains(provided.snapshotId());
+              boolean isNowOnBranch = providedBranchSnapshotIds.contains(provided.snapshotId());
+              boolean isBeingPublished = hasWapId && wasStaged && isNowOnBranch;
+
+              return parentChanged || isCherryPickSource || isBeingPublished;
             })
         .collect(Collectors.toList());
   }
@@ -897,8 +970,8 @@ void validateIndividualSnapshots(SnapshotState state) {
   }
 
   /**
-   * Stage 6: Apply state changes to create new TableMetadata. Pure function - creates new metadata
-   * without mutating existing.
+   * Stage 6: Apply state changes to create TableMetadata builder. Returns builder (not built) to
+   * allow metric properties to be added before the final build, preserving lastUpdatedMillis.
    *
    * <p>This method uses Iceberg's proper APIs: - removeSnapshots() to delete snapshots -
    * addSnapshot() to add new snapshots - setBranchSnapshot() to set branch references
@@ -906,8 +979,10 @@ void validateIndividualSnapshots(SnapshotState state) {
    * <p>The order of operations matters: 1. Start with base metadata (buildFrom copies all existing
    * state) 2. Remove deleted snapshots first (using proper removeSnapshots API) 3. Remove stale
    * branch references 4. Add new snapshots and set branch pointers
+   *
+   * @return Builder with all snapshot changes applied but not yet built
    */
-  private TableMetadata applyStateChanges(TableMetadata metadata, SnapshotState state) {
+  private TableMetadata.Builder applyStateChanges(TableMetadata metadata, SnapshotState state) {
     TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
 
     // Step 1: Remove deleted snapshots using proper Iceberg API
@@ -972,15 +1047,36 @@ private TableMetadata applyStateChanges(TableMetadata metadata, SnapshotState st
               }
             });
 
-    return builder.build();
+    return builder;
   }
 
   /**
-   * Stage 7: Record metrics and add properties to metadata. Returns new metadata with updated
-   * properties.
+   * Stage 7: Add metric properties to builder. Returns the builder for final build in
+   * applySnapshots. This allows the single build to preserve lastUpdatedMillis from snapshot
+   * operations.
+   *
+   * @param builder Builder with snapshot changes already applied
+   * @param state Snapshot state containing metrics to record
+   * @return Builder with metric properties added, ready to be built
    */
-  private TableMetadata recordMetrics(TableMetadata metadata, SnapshotState state) {
-    Map<String, String> newProperties = new HashMap<>(metadata.properties());
+  private TableMetadata.Builder recordMetrics(TableMetadata.Builder builder, SnapshotState state) {
+    // Emit metrics to reporter
+    if (state.getAppendedCount() > 0) {
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, state.getAppendedCount());
+    }
+    if (state.getStagedCount() > 0) {
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, state.getStagedCount());
+    }
+    if (state.getCherryPickedCount() > 0) {
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, state.getCherryPickedCount());
+    }
+    if (state.getDeletedCount() > 0) {
+      metricsReporter.count(
+          InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, state.getDeletedCount());
+    }
 
     // Helper to format snapshot IDs as comma-separated string
     java.util.function.Function<List<Snapshot>, String> formatIds =
@@ -996,32 +1092,36 @@ private TableMetadata recordMetrics(TableMetadata metadata, SnapshotState state)
               .filter(s -> state.getNewSnapshots().contains(s))
               .collect(Collectors.toList());
       if (!newRegularSnapshots.isEmpty()) {
-        newProperties.put(
-            getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-            formatIds.apply(newRegularSnapshots));
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
+                formatIds.apply(newRegularSnapshots)));
       }
     }
     if (!state.getWapSnapshots().isEmpty()) {
-      newProperties.put(
-          getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-          formatIds.apply(state.getWapSnapshots()));
+      builder.setProperties(
+          Collections.singletonMap(
+              getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
+              formatIds.apply(state.getWapSnapshots())));
     }
     if (!state.getCherryPickedSnapshots().isEmpty()) {
-      newProperties.put(
-          getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-          formatIds.apply(state.getCherryPickedSnapshots()));
+      builder.setProperties(
+          Collections.singletonMap(
+              getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
+              formatIds.apply(state.getCherryPickedSnapshots())));
     }
     if (!state.getDeletedSnapshots().isEmpty()) {
-      newProperties.put(
-          getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
-          formatIds.apply(state.getDeletedSnapshots()));
+      builder.setProperties(
+          Collections.singletonMap(
+              getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
+              formatIds.apply(state.getDeletedSnapshots())));
     }
 
     // Remove the transient snapshot keys from properties
-    newProperties.remove(CatalogConstants.SNAPSHOTS_JSON_KEY);
-    newProperties.remove(CatalogConstants.SNAPSHOTS_REFS_KEY);
+    builder.removeProperties(
+        Sets.newHashSet(CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY));
 
-    return metadata.replaceProperties(newProperties);
+    return builder;
   }
 
   // ==================== End Functional Snapshot Application Pipeline ====================
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index 7bb945c44..2ff4d1e21 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -8,6 +8,7 @@
 import com.linkedin.openhouse.cluster.storage.StorageType;
 import com.linkedin.openhouse.cluster.storage.local.LocalStorage;
 import com.linkedin.openhouse.cluster.storage.local.LocalStorageClient;
+import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
 import com.linkedin.openhouse.internal.catalog.fileio.FileIOManager;
 import com.linkedin.openhouse.internal.catalog.mapper.HouseTableMapper;
 import com.linkedin.openhouse.internal.catalog.model.HouseTable;
@@ -30,7 +31,6 @@
 import java.util.UUID;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
-import java.util.stream.IntStream;
 import lombok.SneakyThrows;
 import org.apache.commons.compress.utils.Lists;
 import org.apache.hadoop.conf.Configuration;
@@ -482,52 +482,6 @@ void testDoCommitExceptionHandling() {
         () -> openHouseInternalTableOperations.doCommit(base, metadata));
   }
 
-  @Test
-  void testDoCommitWithValidSnapshotDeletion() throws IOException {
-    TableMetadata metadata =
-        BASE_TABLE_METADATA.replaceProperties(ImmutableMap.of("random", "value"));
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-    Map<String, String> properties = new HashMap<>(metadata.properties());
-
-    // The key insight: SNAPSHOTS_JSON_KEY determines what snapshots SHOULD exist after commit
-    // Only include snapshot 2 - this means snapshots 0 and 1 should be deleted
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY,
-        SnapshotsUtil.serializedSnapshots(testSnapshots.subList(2, 3))); // Only snapshot 2
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
-                testSnapshots.get(2)))); // snapshot 2 -> main
-    properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
-    metadata = metadata.replaceProperties(properties);
-
-    // Create initial metadata with snapshots 0, 1, 2 where only snapshot 2 is referenced
-    TableMetadata metadataWithSnapshots =
-        TableMetadata.buildFrom(metadata)
-            .addSnapshot(testSnapshots.get(0)) // Unreferenced - will be deleted
-            .addSnapshot(testSnapshots.get(1)) // Unreferenced - will be deleted
-            .setBranchSnapshot(
-                testSnapshots.get(2), SnapshotRef.MAIN_BRANCH) // Referenced - will be kept
-            .build();
-
-    // Target metadata: same branch setup but snapshots 0,1 removed via SNAPSHOTS_JSON_KEY
-    TableMetadata metadataWithSnapshotsDeleted =
-        TableMetadata.buildFrom(metadata)
-            .setBranchSnapshot(
-                testSnapshots.get(2), SnapshotRef.MAIN_BRANCH) // Only snapshot 2 remains
-            .build();
-
-    // This should succeed because snapshots 0 and 1 are unreferenced and can be safely deleted
-    Assertions.assertDoesNotThrow(
-        () ->
-            openHouseInternalTableOperations.doCommit(
-                metadataWithSnapshots, metadataWithSnapshotsDeleted));
-
-    // ideally we also verify that snapshots 0 and 1 are deleted, but doCommit doesn't return the
-    // metadata with the deleted snapshots
-  }
-
   @Test
   void testDoCommitSnapshotsValidationThrowsException() throws IOException {
     TableMetadata metadata =
@@ -567,7 +521,7 @@ void testDoCommitSnapshotsValidationThrowsException() throws IOException {
     // This should throw exception because snapshot 1 is marked for deletion but still referenced by
     // main
     Assertions.assertThrows(
-        CommitStateUnknownException.class,
+        InvalidIcebergSnapshotException.class,
         () ->
             openHouseInternalTableOperations.doCommit(
                 metadataWithSnapshots, metadataWithSnapshotsDeleted),
@@ -595,12 +549,10 @@ void testDoCommitAppendStageOnlySnapshotsInitialVersion() throws IOException {
               .map(s -> Long.toString(s.snapshotId()))
               .collect(Collectors.joining(",")),
           updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
+      Assertions.assertNull(
+          updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
       Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
     }
   }
@@ -640,35 +592,52 @@ void testDoCommitAppendStageOnlySnapshotsExistingVersion() throws IOException {
               .map(s -> Long.toString(s.snapshotId()))
               .collect(Collectors.joining(",")),
           updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
+      Assertions.assertNull(
+          updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
       Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
     }
   }
 
   @Test
   void testAppendSnapshotsWithOldSnapshots() throws IOException {
-    TableMetadata metadata =
+    // Create base metadata (existing table state)
+    TableMetadata baseMetadata =
         TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .setPreviousFileLocation("tmp_location")
+            .setPreviousFileLocation("tmp_location") // this is key
             .setLocation(BASE_TABLE_METADATA.metadataFileLocation())
             .build();
+
     // all snapshots are from the past and snapshots add should fail the validation
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
     Assertions.assertThrows(
         IllegalArgumentException.class,
-        () ->
-            openHouseInternalTableOperations.applySnapshotOperations(
-                metadata, snapshots, ImmutableMap.of(), false));
+        () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata));
+
     // the latest snapshots have larger timestamp than the previous metadata timestamp, so it should
     // pass the validation
     snapshots.addAll(IcebergTestUtil.getFutureSnapshots());
-    openHouseInternalTableOperations.applySnapshotOperations(
-        metadata, snapshots, ImmutableMap.of(), false);
+    Map<String, String> propertiesWithFuture = new HashMap<>(baseMetadata.properties());
+    propertiesWithFuture.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    propertiesWithFuture.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
+
+    TableMetadata newMetadataWithFuture = baseMetadata.replaceProperties(propertiesWithFuture);
+    openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadataWithFuture);
   }
 
   @Test
@@ -702,15 +671,12 @@ void testDoCommitCherryPickSnapshotBaseUnchanged() throws IOException {
       Map<String, String> updatedProperties = tblMetadataCaptor.getValue().properties();
 
       // verify the staged snapshot is cherry picked by use the existing one
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
       Assertions.assertEquals(
           Long.toString(testWapSnapshots.get(0).snapshotId()),
           updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
       Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
     }
   }
@@ -727,7 +693,7 @@ void testDoCommitCherryPickSnapshotBaseChanged() throws IOException {
     Map<String, String> properties = new HashMap<>(base.properties());
     try (MockedStatic<TableMetadataParser> ignoreWriteMock =
         Mockito.mockStatic(TableMetadataParser.class)) {
-      // cherry pick the staged snapshot whose base has changed
+      // cherry-pick the staged snapshot whose base has changed
       properties.put(
           CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(testWapSnapshots));
       properties.put(
@@ -742,17 +708,15 @@ void testDoCommitCherryPickSnapshotBaseChanged() throws IOException {
       Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
       Map<String, String> updatedProperties = tblMetadataCaptor.getValue().properties();
 
-      // verify the staged snapshot is cherry picked by creating a new snapshot and append it
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
+      // verify the staged snapshot is cherry-picked by creating a new snapshot and append it
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
       Assertions.assertEquals(
           Long.toString(testWapSnapshots.get(2).snapshotId()),
           updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
       Assertions.assertEquals(
           Long.toString(testWapSnapshots.get(1).snapshotId()),
           updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
       Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
     }
   }
@@ -781,15 +745,12 @@ void testDoCommitCherryPickFirstSnapshot() throws IOException {
       Map<String, String> updatedProperties = tblMetadataCaptor.getValue().properties();
 
       // verify the staged snapshot is cherry picked by using the existing one
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
       Assertions.assertEquals(
           Long.toString(testWapSnapshots.get(0).snapshotId()),
           updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
       Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
     }
   }
@@ -812,14 +773,11 @@ void testDoCommitDeleteLastStagedSnapshotWhenNoRefs() throws IOException {
       Map<String, String> updatedProperties = tblMetadataCaptor.getValue().properties();
 
       // verify nothing happens
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
-      Assertions.assertEquals(
-          null, updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("staged_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("appended_snapshots")));
+      Assertions.assertNull(
+          updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
+      Assertions.assertNull(updatedProperties.get(getCanonicalFieldName("deleted_snapshots")));
       Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
     }
   }
@@ -1283,31 +1241,38 @@ void testDeleteSnapshotWithMainReference() throws IOException {
             .build();
 
     // Get the current head snapshot that is referenced by main branch
-    Snapshot referencedSnapshot = testSnapshots.get(testSnapshots.size() - 1);
+    Snapshot referencedSnapshot = testSnapshots.get(3);
 
-    // Attempt to delete a snapshot that is currently referenced by a branch
-    List<Snapshot> snapshotsToDelete = List.of(referencedSnapshot);
+    // Create new metadata that attempts to delete the referenced snapshot
+    // The SNAPSHOTS_JSON_KEY will only include first 3 snapshots (excluding the referenced one)
+    // But SNAPSHOTS_REFS_KEY will still reference snapshot 3, causing a conflict
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(
+            testSnapshots.subList(0, 3))); // Only snapshots 0-2, excluding referenced snapshot 3
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+                referencedSnapshot))); // Still references snapshot 3
 
-    // Capture final variables for lambda
-    final TableMetadata finalBase = baseMetadata;
-    final List<Snapshot> finalSnapshotsToDelete = snapshotsToDelete;
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     // This MUST throw IllegalArgumentException for referenced snapshots
-    IllegalArgumentException exception =
+    InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
-            IllegalArgumentException.class,
-            () ->
-                openHouseInternalTableOperations.maybeDeleteSnapshots(
-                    finalBase, finalSnapshotsToDelete),
-            "Should throw IllegalArgumentException when trying to delete referenced snapshot");
+            InvalidIcebergSnapshotException.class,
+            () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+            "Should throw InvalidIcebergSnapshotException when trying to delete referenced snapshot");
 
     // Verify error message mentions the reference
     String expectedMessage =
-        "Cannot expire " + referencedSnapshot.snapshotId() + ". Still referenced by refs:";
+        "Cannot delete the current snapshot "
+            + referencedSnapshot.snapshotId()
+            + " without adding replacement snapshots";
     Assertions.assertTrue(
-        exception.getMessage().contains(expectedMessage)
-            || exception.getMessage().contains("Still referenced by")
-            || exception.getMessage().contains("referenced"),
+        exception.getMessage().contains(expectedMessage),
         "Error message should indicate snapshot is still referenced: " + exception.getMessage());
   }
 
@@ -1316,7 +1281,7 @@ void testDeleteSnapshotWithNoReference() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata with multiple snapshots
-    TableMetadata base =
+    TableMetadata baseMetadata =
         TableMetadata.buildFrom(BASE_TABLE_METADATA)
             .addSnapshot(testSnapshots.get(0)) // Unreferenced - can be deleted
             .addSnapshot(testSnapshots.get(1)) // Unreferenced - can be deleted
@@ -1326,12 +1291,25 @@ void testDeleteSnapshotWithNoReference() throws IOException {
             .build();
 
     // Delete unreferenced snapshots (first two snapshots)
-    List<Snapshot> unreferencedSnapshots = testSnapshots.subList(0, 2);
+    // New metadata keeps snapshots 2 and 3
+    Snapshot referencedSnapshot = testSnapshots.get(3);
+    List<Snapshot> remainingSnapshots = testSnapshots.subList(2, 4);
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(referencedSnapshot)));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     TableMetadata result =
-        openHouseInternalTableOperations.maybeDeleteSnapshots(base, unreferencedSnapshots);
+        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
 
     // Verify unreferenced snapshots were removed
+    List<Snapshot> unreferencedSnapshots = testSnapshots.subList(0, 2);
     for (Snapshot unreferenced : unreferencedSnapshots) {
       boolean snapshotExists =
           result.snapshots().stream().anyMatch(s -> s.snapshotId() == unreferenced.snapshotId());
@@ -1341,21 +1319,20 @@ void testDeleteSnapshotWithNoReference() throws IOException {
     }
 
     // Verify referenced snapshot still exists
-    Snapshot referencedSnapshot = testSnapshots.get(3);
     boolean referencedExists =
         result.snapshots().stream()
             .anyMatch(s -> s.snapshotId() == referencedSnapshot.snapshotId());
     Assertions.assertTrue(referencedExists, "Referenced snapshot should still exist");
 
     // Verify deletion tracking
-    Map<String, String> properties = result.properties();
-    String deletedSnapshots =
-        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
-    Assertions.assertNotNull(deletedSnapshots);
+    Map<String, String> resultProperties = result.properties();
+    String deletedSnapshotsStr =
+        resultProperties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+    Assertions.assertNotNull(deletedSnapshotsStr);
 
     for (Snapshot unreferenced : unreferencedSnapshots) {
       Assertions.assertTrue(
-          deletedSnapshots.contains(Long.toString(unreferenced.snapshotId())),
+          deletedSnapshotsStr.contains(Long.toString(unreferenced.snapshotId())),
           "Unreferenced snapshot should be tracked as deleted");
     }
   }
@@ -1364,49 +1341,55 @@ void testDeleteSnapshotWithNoReference() throws IOException {
   void testDeleteSnapshotWithMultipleReference() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
-    // Create metadata with snapshot referenced by multiple branches
-    // Reference the same snapshot from multiple branches
-    Snapshot sharedSnapshot = testSnapshots.get(1);
+    // Create metadata with 2 snapshots: one referenced by multiple branches, one unreferenced
+    Snapshot sharedSnapshot = testSnapshots.get(0); // This will be referenced by both branches
+    Snapshot mainSnapshot = testSnapshots.get(1); // This one stays but is not referenced
+
     TableMetadata baseMetadata =
         TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .addSnapshot(sharedSnapshot) // Add snapshot first
+            .addSnapshot(sharedSnapshot)
+            .addSnapshot(mainSnapshot)
             .setRef(
                 SnapshotRef.MAIN_BRANCH,
-                SnapshotRef.branchBuilder(sharedSnapshot.snapshotId()).build())
+                SnapshotRef.branchBuilder(mainSnapshot.snapshotId()).build())
             .setRef(
                 "feature_branch", SnapshotRef.branchBuilder(sharedSnapshot.snapshotId()).build())
+            .setRef(
+                "feature_branch1", SnapshotRef.branchBuilder(sharedSnapshot.snapshotId()).build())
             .build();
-    // Add other snapshots to the metadata (skip index 1 - shared snapshot already added)
-    List<Snapshot> snapshotsToAdd =
-        IntStream.range(0, testSnapshots.size())
-            .filter(i -> i != 1)
-            .mapToObj(testSnapshots::get)
-            .collect(Collectors.toList());
 
-    for (Snapshot snapshot : snapshotsToAdd) {
-      baseMetadata = TableMetadata.buildFrom(baseMetadata).addSnapshot(snapshot).build();
-    }
+    // Attempt to delete the shared snapshot by creating new metadata without it
+    // Keep the unreferenced snapshot so we're not deleting everything
+    List<Snapshot> remainingSnapshots = List.of(mainSnapshot);
+
+    // Keep refs pointing to the shared snapshot (causing conflict)
+    Map<String, SnapshotRef> refs = baseMetadata.refs();
+    Map<String, String> serializedRefs =
+        refs.entrySet().stream()
+            .collect(
+                Collectors.toMap(
+                    Map.Entry::getKey,
+                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
 
-    // Attempt to delete the shared snapshot
-    List<Snapshot> snapshotsToDelete = List.of(sharedSnapshot);
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
 
-    // Capture final variables for lambda
-    final TableMetadata finalBase = baseMetadata;
-    final List<Snapshot> finalSnapshotsToDelete = snapshotsToDelete;
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
-    // This MUST throw IllegalArgumentException for snapshots referenced by multiple branches
-    IllegalArgumentException exception =
+    // This MUST throw InvalidIcebergSnapshotException for snapshots referenced by multiple branches
+    InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
-            IllegalArgumentException.class,
-            () ->
-                openHouseInternalTableOperations.maybeDeleteSnapshots(
-                    finalBase, finalSnapshotsToDelete),
-            "Should throw IllegalArgumentException when trying to delete snapshot referenced by multiple branches");
+            InvalidIcebergSnapshotException.class,
+            () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+            "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by multiple branches");
 
-    // Verify error message mentions multiple references
+    // Verify error message mentions the snapshot is still referenced
     String exceptionMessage = exception.getMessage();
     Assertions.assertTrue(
-        exceptionMessage.contains("Still referenced by refs"),
+        exceptionMessage.contains("Still referenced by refs")
+            || exceptionMessage.contains("still referenced"),
         "Error message should indicate snapshot is still referenced by branches: "
             + exceptionMessage);
   }
@@ -1431,26 +1414,44 @@ void testDeleteSnapshotWithBranchReference() throws IOException {
           TableMetadata.buildFrom(baseMetadata).addSnapshot(testSnapshots.get(i)).build();
     }
 
-    // Attempt to delete snapshot that has a tag reference
-    List<Snapshot> snapshotsToDelete = List.of(taggedSnapshot);
+    // Make baseMetadata effectively final for lambda usage
+    final TableMetadata finalBaseMetadata = baseMetadata;
 
-    // Capture final variables for lambda
-    final TableMetadata finalBase = baseMetadata;
-    final List<Snapshot> finalSnapshotsToDelete = snapshotsToDelete;
+    // Attempt to delete snapshot that has a tag reference by creating new metadata without it
+    List<Snapshot> remainingSnapshots =
+        finalBaseMetadata.snapshots().stream()
+            .filter(s -> s.snapshotId() != taggedSnapshot.snapshotId())
+            .collect(Collectors.toList());
 
-    // This MUST throw IllegalArgumentException for snapshots referenced by tags
-    IllegalArgumentException exception =
+    Map<String, String> properties = new HashMap<>(finalBaseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
+    // Keep refs pointing to the tagged snapshot (causing conflict)
+    Map<String, String> serializedRefs =
+        finalBaseMetadata.refs().entrySet().stream()
+            .collect(
+                Collectors.toMap(
+                    Map.Entry::getKey,
+                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
+
+    TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
+
+    // This MUST throw InvalidIcebergSnapshotException for snapshots referenced by tags
+    InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
-            IllegalArgumentException.class,
-            () ->
-                openHouseInternalTableOperations.maybeDeleteSnapshots(
-                    finalBase, finalSnapshotsToDelete),
-            "Should throw IllegalArgumentException when trying to delete snapshot referenced by tag");
+            InvalidIcebergSnapshotException.class,
+            () -> openHouseInternalTableOperations.applySnapshots(finalBaseMetadata, newMetadata),
+            "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by tag");
 
     // Verify error message mentions tag reference
     String exceptionMessage = exception.getMessage();
+    String expectedMessage =
+        "Cannot delete snapshots that are still referenced by branches/tags: snapshot "
+            + taggedSnapshot.snapshotId()
+            + " (referenced by: feature_branch)";
     Assertions.assertTrue(
-        exceptionMessage.contains("Still referenced by refs"),
+        exceptionMessage.contains(expectedMessage),
         "Error message should indicate snapshot is still referenced by branches: "
             + exceptionMessage);
   }
@@ -1460,29 +1461,39 @@ void testDeleteEmptySnapshotList() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata
-    TableMetadata base = BASE_TABLE_METADATA;
+    TableMetadata baseMetadata = BASE_TABLE_METADATA;
     for (Snapshot snapshot : testSnapshots) {
-      base =
-          TableMetadata.buildFrom(base)
+      baseMetadata =
+          TableMetadata.buildFrom(baseMetadata)
               .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
               .build();
     }
 
-    // Delete empty list
-    List<Snapshot> emptyList = List.of();
+    // Delete empty list - new metadata is same as base (no snapshots deleted)
+    Snapshot lastSnapshot = testSnapshots.get(testSnapshots.size() - 1);
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(baseMetadata.snapshots()));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
-    TableMetadata result = openHouseInternalTableOperations.maybeDeleteSnapshots(base, emptyList);
+    TableMetadata result =
+        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
 
     // Verify no changes were made
     Assertions.assertEquals(
-        base.snapshots().size(),
+        baseMetadata.snapshots().size(),
         result.snapshots().size(),
         "No snapshots should be deleted when list is empty");
 
     // Verify no deletion tracking properties were added
-    Map<String, String> properties = result.properties();
+    Map<String, String> resultProperties = result.properties();
     String deletedSnapshots =
-        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+        resultProperties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
     Assertions.assertNull(deletedSnapshots, "No deleted snapshots property should be set");
   }
 
@@ -1491,27 +1502,39 @@ void testDeleteNullSnapshotList() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata
-    TableMetadata base = BASE_TABLE_METADATA;
+    TableMetadata baseMetadata = BASE_TABLE_METADATA;
     for (Snapshot snapshot : testSnapshots) {
-      base =
-          TableMetadata.buildFrom(base)
+      baseMetadata =
+          TableMetadata.buildFrom(baseMetadata)
               .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
               .build();
     }
 
-    // Delete null list
-    TableMetadata result = openHouseInternalTableOperations.maybeDeleteSnapshots(base, null);
+    // Delete null list - new metadata is same as base (no snapshots deleted)
+    Snapshot lastSnapshot = testSnapshots.get(testSnapshots.size() - 1);
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(baseMetadata.snapshots()));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
+    TableMetadata result =
+        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
 
     // Verify no changes were made
     Assertions.assertEquals(
-        base.snapshots().size(),
+        baseMetadata.snapshots().size(),
         result.snapshots().size(),
         "No snapshots should be deleted when list is null");
 
     // Verify no deletion tracking properties were added
-    Map<String, String> properties = result.properties();
+    Map<String, String> resultProperties = result.properties();
     String deletedSnapshots =
-        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+        resultProperties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
     Assertions.assertNull(deletedSnapshots, "No deleted snapshots property should be set");
   }
 
@@ -1520,10 +1543,10 @@ void testDeleteNonExistentSnapshot() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata
-    TableMetadata base = BASE_TABLE_METADATA;
+    TableMetadata baseMetadata = BASE_TABLE_METADATA;
     for (Snapshot snapshot : testSnapshots) {
-      base =
-          TableMetadata.buildFrom(base)
+      baseMetadata =
+          TableMetadata.buildFrom(baseMetadata)
               .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
               .build();
     }
@@ -1532,25 +1555,32 @@ void testDeleteNonExistentSnapshot() throws IOException {
     List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
     Snapshot nonExistentSnapshot = extraSnapshots.get(0);
 
-    List<Snapshot> snapshotsToDelete = List.of(nonExistentSnapshot);
+    // New metadata is same as base (non-existent snapshot can't be removed)
+    Snapshot lastSnapshot = testSnapshots.get(testSnapshots.size() - 1);
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(baseMetadata.snapshots()));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     TableMetadata result =
-        openHouseInternalTableOperations.maybeDeleteSnapshots(base, snapshotsToDelete);
+        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
 
     // Verify original snapshots are unchanged
     Assertions.assertEquals(
-        base.snapshots().size(),
+        baseMetadata.snapshots().size(),
         result.snapshots().size(),
         "Snapshot count should be unchanged when deleting non-existent snapshot");
 
-    // Verify deletion is still tracked (documenting current behavior)
-    Map<String, String> properties = result.properties();
+    // Verify deletion is not tracked (since no actual deletion occurred)
+    Map<String, String> resultProperties = result.properties();
     String deletedSnapshots =
-        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
-    Assertions.assertNotNull(deletedSnapshots);
-    Assertions.assertTrue(
-        deletedSnapshots.contains(Long.toString(nonExistentSnapshot.snapshotId())),
-        "Non-existent snapshot should still be tracked as deleted");
+        resultProperties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+    Assertions.assertNull(deletedSnapshots, "No deleted snapshots should be tracked");
   }
 
   @Test
@@ -1558,22 +1588,34 @@ void testDeleteSnapshotMetricsRecorded() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata
-    TableMetadata base = BASE_TABLE_METADATA;
+    TableMetadata baseMetadata = BASE_TABLE_METADATA;
     for (Snapshot snapshot : testSnapshots) {
-      base = TableMetadata.buildFrom(base).addSnapshot(snapshot).build();
+      baseMetadata = TableMetadata.buildFrom(baseMetadata).addSnapshot(snapshot).build();
     }
 
-    // Delete some snapshots
-    List<Snapshot> snapshotsToDelete = testSnapshots.subList(0, 2);
+    // Make baseMetadata effectively final for lambda usage
+    final TableMetadata finalBaseMetadata = baseMetadata;
+
+    // Delete some snapshots (first two snapshots)
+    List<Snapshot> remainingSnapshots = testSnapshots.subList(2, testSnapshots.size());
+
+    Map<String, String> properties = new HashMap<>(finalBaseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(new HashMap<>())); // No refs since all are unreferenced
+
+    TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
 
     // Use the operations instance with mock metrics reporter
-    openHouseInternalTableOperationsWithMockMetrics.maybeDeleteSnapshots(base, snapshotsToDelete);
+    openHouseInternalTableOperationsWithMockMetrics.applySnapshots(finalBaseMetadata, newMetadata);
 
     // Verify metrics were recorded
     Mockito.verify(mockMetricsReporter)
         .count(
             eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR),
-            eq((double) snapshotsToDelete.size()));
+            eq((double) 2)); // 2 snapshots deleted
   }
 
   @Test
@@ -1581,7 +1623,7 @@ void testDeleteSnapshotMetricsRecordedBranch() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata with snapshots that have branch references
-    TableMetadata base =
+    TableMetadata baseMetadata =
         TableMetadata.buildFrom(BASE_TABLE_METADATA)
             .addSnapshot(testSnapshots.get(0)) // Unreferenced - can be deleted
             .addSnapshot(testSnapshots.get(1)) // Unreferenced - can be deleted
@@ -1589,17 +1631,28 @@ void testDeleteSnapshotMetricsRecordedBranch() throws IOException {
                 testSnapshots.get(2), SnapshotRef.MAIN_BRANCH) // Referenced - cannot be deleted
             .build();
 
-    // Delete unreferenced snapshots (emits metrics for basic deletion)
-    List<Snapshot> snapshotsToDelete = testSnapshots.subList(0, 2);
+    // Delete unreferenced snapshots (first two snapshots)
+    Snapshot referencedSnapshot = testSnapshots.get(2);
+    List<Snapshot> remainingSnapshots = List.of(referencedSnapshot);
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(referencedSnapshot)));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     // Use the operations instance with mock metrics reporter
-    openHouseInternalTableOperationsWithMockMetrics.maybeDeleteSnapshots(base, snapshotsToDelete);
+    openHouseInternalTableOperationsWithMockMetrics.applySnapshots(baseMetadata, newMetadata);
 
     // Verify metrics were recorded for the basic deletion
     Mockito.verify(mockMetricsReporter)
         .count(
             eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR),
-            eq((double) snapshotsToDelete.size()));
+            eq((double) 2)); // 2 snapshots deleted
   }
 
   @Test
@@ -1607,63 +1660,94 @@ void testDeleteSnapshotMetricsRecordedNonExistent() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata
-    TableMetadata base = BASE_TABLE_METADATA;
+    TableMetadata baseMetadata = BASE_TABLE_METADATA;
     for (Snapshot snapshot : testSnapshots) {
-      base =
-          TableMetadata.buildFrom(base)
+      baseMetadata =
+          TableMetadata.buildFrom(baseMetadata)
               .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH)
               .build();
     }
 
+    // Make baseMetadata effectively final for lambda usage
+    final TableMetadata finalBaseMetadata = baseMetadata;
+
     // Create a snapshot that doesn't exist in the metadata
     List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
     Snapshot nonExistentSnapshot = extraSnapshots.get(0);
-    List<Snapshot> snapshotsToDelete = List.of(nonExistentSnapshot);
+
+    // New metadata is same as base (non-existent snapshot can't be removed)
+    Snapshot lastSnapshot = testSnapshots.get(testSnapshots.size() - 1);
+    Map<String, String> properties = new HashMap<>(finalBaseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(finalBaseMetadata.snapshots()));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+
+    TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
 
     // Use the operations instance with mock metrics reporter
-    openHouseInternalTableOperationsWithMockMetrics.maybeDeleteSnapshots(base, snapshotsToDelete);
+    openHouseInternalTableOperationsWithMockMetrics.applySnapshots(finalBaseMetadata, newMetadata);
 
-    // Verify metrics are still recorded even for non-existent snapshots
-    Mockito.verify(mockMetricsReporter)
-        .count(
-            eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR),
-            eq((double) snapshotsToDelete.size()));
+    // Verify metrics are not recorded for non-existent snapshots (no actual deletion)
+    Mockito.verify(mockMetricsReporter, Mockito.never())
+        .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR), Mockito.anyDouble());
   }
 
   @Test
   void testDeleteAllSnapshotsFailsWhenMainBranchReferenced() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
-    // Create base metadata with all snapshots, where the last one is referenced by main branch
-    TableMetadata tempBase =
-        testSnapshots.subList(0, testSnapshots.size() - 1).stream()
-            .reduce(
-                BASE_TABLE_METADATA,
-                (metadata, snapshot) ->
-                    TableMetadata.buildFrom(metadata).addSnapshot(snapshot).build(),
-                (m1, m2) -> m2);
-    final TableMetadata base =
-        TableMetadata.buildFrom(tempBase)
-            .setBranchSnapshot(testSnapshots.get(testSnapshots.size() - 1), SnapshotRef.MAIN_BRANCH)
+    // Create metadata with 2 snapshots: one referenced by multiple branches, one unreferenced
+    Snapshot unreferencedSnapshot =
+        testSnapshots.get(0); // This will be referenced by both branches
+    Snapshot mainSnapshot = testSnapshots.get(1); // This one stays but is not referenced
+
+    TableMetadata baseMetadata =
+        TableMetadata.buildFrom(BASE_TABLE_METADATA)
+            .addSnapshot(unreferencedSnapshot)
+            .addSnapshot(mainSnapshot)
+            .setRef(
+                SnapshotRef.MAIN_BRANCH,
+                SnapshotRef.branchBuilder(mainSnapshot.snapshotId()).build())
             .build();
 
-    // Attempt to delete ALL snapshots (including the one referenced by main)
-    List<Snapshot> allSnapshots = new ArrayList<>(testSnapshots);
+    // Attempt to delete the shared snapshot by creating new metadata without it
+    // Keep the unreferenced snapshot so we're not deleting everything
+    List<Snapshot> remainingSnapshots = List.of(mainSnapshot);
 
-    // This should fail because we cannot delete the snapshot referenced by main branch
-    IllegalArgumentException exception =
+    // Keep refs pointing to the shared snapshot (causing conflict)
+    Map<String, SnapshotRef> refs = baseMetadata.refs();
+    Map<String, String> serializedRefs =
+        refs.entrySet().stream()
+            .collect(
+                Collectors.toMap(
+                    Map.Entry::getKey,
+                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(List.of()));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(mainSnapshot)));
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
+    // This MUST throw InvalidIcebergSnapshotException for snapshots referenced by multiple branches
+    InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
-            IllegalArgumentException.class,
-            () -> openHouseInternalTableOperations.maybeDeleteSnapshots(base, allSnapshots),
-            "Should throw IllegalArgumentException when trying to delete all snapshots including main branch reference");
+            InvalidIcebergSnapshotException.class,
+            () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+            "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by multiple branches");
 
-    // Verify error message indicates the snapshot is still referenced
+    // Verify error message mentions the snapshot is still referenced
     String exceptionMessage = exception.getMessage();
-    Assertions.assertTrue(
-        exceptionMessage.contains("Still referenced by refs")
-            || exceptionMessage.contains("referenced")
-            || exceptionMessage.contains("Cannot expire"),
-        "Error message should indicate snapshot is still referenced: " + exceptionMessage);
+    String expectedMessage =
+        "Cannot delete the current snapshot "
+            + mainSnapshot.snapshotId()
+            + " without adding replacement snapshots.";
+    Assertions.assertTrue(exceptionMessage.contains(expectedMessage));
   }
 
   @Test
@@ -1671,20 +1755,30 @@ void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata with unreferenced snapshots only (no main branch or other refs)
-    TableMetadata tempBase = BASE_TABLE_METADATA;
+    TableMetadata baseMetadata = BASE_TABLE_METADATA;
     for (Snapshot snapshot : testSnapshots) {
-      tempBase = TableMetadata.buildFrom(tempBase).addSnapshot(snapshot).build();
+      baseMetadata = TableMetadata.buildFrom(baseMetadata).addSnapshot(snapshot).build();
     }
-    final TableMetadata base = tempBase;
     // Note: No setBranchSnapshot or setRef calls - all snapshots are unreferenced
 
+    // Make baseMetadata effectively final for lambda usage
+    final TableMetadata finalBaseMetadata = baseMetadata;
+
     // Attempt to delete all unreferenced snapshots
-    List<Snapshot> allSnapshots = new ArrayList<>(testSnapshots);
+    Map<String, String> properties = new HashMap<>(finalBaseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(List.of())); // Empty - all snapshots deleted
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(new HashMap<>())); // No refs
+
+    TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
 
     // This should succeed since no snapshots are referenced by any branch/tag
     TableMetadata result =
         Assertions.assertDoesNotThrow(
-            () -> openHouseInternalTableOperations.maybeDeleteSnapshots(base, allSnapshots),
+            () -> openHouseInternalTableOperations.applySnapshots(finalBaseMetadata, newMetadata),
             "Should succeed when deleting all unreferenced snapshots");
 
     // Verify all snapshots were removed from the metadata
@@ -1694,12 +1788,12 @@ void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
         "All unreferenced snapshots should be deleted, resulting in empty snapshots list");
 
     // Verify deletion tracking shows all snapshots were deleted
-    Map<String, String> properties = result.properties();
+    Map<String, String> resultProperties = result.properties();
     String deletedSnapshots =
-        properties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
+        resultProperties.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS));
     Assertions.assertNotNull(deletedSnapshots, "Deleted snapshots should be tracked");
 
-    for (Snapshot snapshot : allSnapshots) {
+    for (Snapshot snapshot : testSnapshots) {
       Assertions.assertTrue(
           deletedSnapshots.contains(Long.toString(snapshot.snapshotId())),
           "Snapshot " + snapshot.snapshotId() + " should be tracked as deleted");
@@ -1711,13 +1805,13 @@ void testValidMultipleBranchesWithDifferentSnapshots() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
 
     // Create base metadata
-    TableMetadata base =
+    TableMetadata baseMetadata =
         TableMetadata.buildFrom(BASE_TABLE_METADATA)
             .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
             .build();
 
-    // Add multiple new snapshots
-    List<Snapshot> newSnapshots = testSnapshots.subList(1, 4); // snapshots 1, 2, 3
+    // New metadata includes all snapshots (base + new ones)
+    List<Snapshot> allSnapshots = testSnapshots.subList(0, 4); // snapshots 0, 1, 2, 3
 
     // Create snapshotRefs where each branch points to a DIFFERENT snapshot (valid scenario)
     Map<String, SnapshotRef> validRefs = new HashMap<>();
@@ -1725,11 +1819,24 @@ void testValidMultipleBranchesWithDifferentSnapshots() throws IOException {
     validRefs.put("branch_b", SnapshotRef.branchBuilder(testSnapshots.get(2).snapshotId()).build());
     validRefs.put("branch_c", SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build());
 
+    // Serialize the refs
+    Map<String, String> serializedRefs =
+        validRefs.entrySet().stream()
+            .collect(
+                Collectors.toMap(
+                    Map.Entry::getKey,
+                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
     // This should NOT throw an exception
     Assertions.assertDoesNotThrow(
-        () ->
-            openHouseInternalTableOperations.applySnapshotOperations(
-                base, newSnapshots, validRefs, false),
+        () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
         "Should NOT throw exception when branches target different snapshots");
   }
 
@@ -1739,7 +1846,7 @@ void testStandardWAPScenario() throws IOException {
     List<Snapshot> wapSnapshots = IcebergTestUtil.getWapSnapshots();
 
     // Create base with existing snapshots and a WAP snapshot
-    TableMetadata base =
+    TableMetadata baseMetadata =
         TableMetadata.buildFrom(BASE_TABLE_METADATA)
             .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
             .addSnapshot(wapSnapshots.get(0)) // WAP snapshot (not referenced by any branch)
@@ -1747,17 +1854,33 @@ void testStandardWAPScenario() throws IOException {
 
     // Standard WAP scenario: pull the WAP snapshot into main branch
     Snapshot wapSnapshot = wapSnapshots.get(0);
-    List<Snapshot> newSnapshots = List.of(); // No new snapshots, just referencing the existing WAP
+
+    // New metadata keeps the same snapshots but changes the main branch ref to point to WAP
+    // snapshot
+    List<Snapshot> allSnapshots = List.of(testSnapshots.get(0), wapSnapshot);
 
     // Create refs to pull WAP snapshot into main branch
     Map<String, SnapshotRef> refs = new HashMap<>();
     refs.put(SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(wapSnapshot.snapshotId()).build());
 
+    // Serialize the refs
+    Map<String, String> serializedRefs =
+        refs.entrySet().stream()
+            .collect(
+                Collectors.toMap(
+                    Map.Entry::getKey,
+                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
     // Should succeed - standard WAP workflow where WAP snapshot becomes the new main
     Assertions.assertDoesNotThrow(
-        () ->
-            openHouseInternalTableOperations.applySnapshotOperations(
-                base, newSnapshots, refs, false),
+        () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
         "Should successfully pull WAP snapshot into main branch");
   }
 
@@ -1942,85 +2065,121 @@ void testMultipleDiffCommitWithValidBranch() throws IOException {
    */
   @Test
   void testMultipleDiffCommitWithMultipleBranchesPointingToSameSnapshot() throws IOException {
+    // Combine regular snapshots (4) + extra snapshots (4) to get 8 total snapshots
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+    List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
+    List<Snapshot> allSnapshots = new ArrayList<>();
+    allSnapshots.addAll(testSnapshots);
+    allSnapshots.addAll(extraSnapshots);
+
+    // ========== Create base metadata with 2 branches ==========
+    // Base has snapshots 0, 1, 2, 3 with MAIN at snapshot 0 and feature_a at snapshot 1
+    TableMetadata.Builder baseBuilder = TableMetadata.buildFrom(BASE_TABLE_METADATA);
+    baseBuilder.addSnapshot(allSnapshots.get(0));
+    baseBuilder.addSnapshot(allSnapshots.get(1));
+    baseBuilder.addSnapshot(allSnapshots.get(2));
+    baseBuilder.addSnapshot(allSnapshots.get(3));
+    baseBuilder.setBranchSnapshot(allSnapshots.get(0).snapshotId(), SnapshotRef.MAIN_BRANCH);
+    baseBuilder.setBranchSnapshot(allSnapshots.get(1).snapshotId(), "feature_a");
+    TableMetadata baseMetadata = baseBuilder.build();
+
+    // Add custom properties with base snapshots
+    Map<String, String> baseProperties = new HashMap<>(baseMetadata.properties());
+    List<Snapshot> baseSnapshots = allSnapshots.subList(0, 4);
+    baseProperties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(baseSnapshots));
+
+    Map<String, String> baseRefs = new HashMap<>();
+    baseRefs.put(
+        SnapshotRef.MAIN_BRANCH,
+        SnapshotRefParser.toJson(
+            SnapshotRef.branchBuilder(allSnapshots.get(0).snapshotId()).build()));
+    baseRefs.put(
+        "feature_a",
+        SnapshotRefParser.toJson(
+            SnapshotRef.branchBuilder(allSnapshots.get(1).snapshotId()).build()));
+
+    baseProperties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(baseRefs));
+
+    TableMetadata finalBaseMetadata = baseMetadata.replaceProperties(baseProperties);
+
+    // ========== Create new metadata with 3 branches, all advanced 2 snapshots further ==========
+    // New metadata has snapshots 0-7 with MAIN at snapshot 2, feature_a at snapshot 3, feature_b at
+    // snapshot 4
+    TableMetadata.Builder newBuilder = TableMetadata.buildFrom(BASE_TABLE_METADATA);
+    for (int i = 0; i < 8; i++) {
+      newBuilder.addSnapshot(allSnapshots.get(i));
+    }
+    newBuilder.setBranchSnapshot(allSnapshots.get(2).snapshotId(), SnapshotRef.MAIN_BRANCH);
+    newBuilder.setBranchSnapshot(allSnapshots.get(3).snapshotId(), "feature_a");
+    newBuilder.setBranchSnapshot(allSnapshots.get(4).snapshotId(), "feature_b");
+    TableMetadata newMetadata = newBuilder.build();
+
+    // Add custom properties with new snapshots
+    Map<String, String> newProperties = new HashMap<>(newMetadata.properties());
+    List<Snapshot> newSnapshots = allSnapshots.subList(0, 8);
+    newProperties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(newSnapshots));
+
+    Map<String, String> newRefs = new HashMap<>();
+    newRefs.put(
+        SnapshotRef.MAIN_BRANCH,
+        SnapshotRefParser.toJson(
+            SnapshotRef.branchBuilder(allSnapshots.get(2).snapshotId()).build()));
+    newRefs.put(
+        "feature_a",
+        SnapshotRefParser.toJson(
+            SnapshotRef.branchBuilder(allSnapshots.get(3).snapshotId()).build()));
+    newRefs.put(
+        "feature_b",
+        SnapshotRefParser.toJson(
+            SnapshotRef.branchBuilder(allSnapshots.get(4).snapshotId()).build()));
+
+    newProperties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(newRefs));
+
+    TableMetadata finalNewMetadata = newMetadata.replaceProperties(newProperties);
+
+    // ========== COMMIT: Should SUCCEED ==========
+    openHouseInternalTableOperations.doCommit(finalBaseMetadata, finalNewMetadata);
+    Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
 
-    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
-        Mockito.mockStatic(TableMetadataParser.class)) {
-
-      // ========== Create base at N with 1 snapshot ==========
-      TableMetadata baseAtN =
-          TableMetadata.buildFrom(BASE_TABLE_METADATA)
-              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      // ========== Create divergent metadata with MAIN and feature_a both pointing to snapshot 3
-      // ==========
-      TableMetadata.Builder builder = TableMetadata.buildFrom(baseAtN);
-      // Add snapshots 1, 2, 3 without assigning to branches
-      builder.addSnapshot(testSnapshots.get(1));
-      builder.addSnapshot(testSnapshots.get(2));
-      builder.addSnapshot(testSnapshots.get(3));
-      // Set BOTH branches to point to the same existing snapshot (using snapshot ID)
-      builder.setBranchSnapshot(testSnapshots.get(3).snapshotId(), SnapshotRef.MAIN_BRANCH);
-      builder.setBranchSnapshot(testSnapshots.get(3).snapshotId(), "feature_a");
-      TableMetadata metadataWithBothBranches = builder.build();
-
-      // Add custom properties with snapshots
-      Map<String, String> divergentProperties =
-          new HashMap<>(metadataWithBothBranches.properties());
-      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
-
-      // Create refs matching the setBranchSnapshot calls - both pointing to snapshot 3
-      Map<String, String> sameSnapshotRefs = new HashMap<>();
-      sameSnapshotRefs.put(
-          SnapshotRef.MAIN_BRANCH,
-          SnapshotRefParser.toJson(
-              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
-      sameSnapshotRefs.put(
-          "feature_a",
-          SnapshotRefParser.toJson(
-              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
-
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(sameSnapshotRefs));
-
-      TableMetadata finalDivergentMetadata =
-          metadataWithBothBranches.replaceProperties(divergentProperties);
+    TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
 
-      // ========== COMMIT: Should SUCCEED - this is a valid end state ==========
-      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
-      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
-
-      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
+    // Verify all 8 snapshots are present
+    Assertions.assertEquals(
+        8, capturedMetadata.snapshots().size(), "Commit should contain all 8 snapshots");
 
-      // Verify all 4 snapshots are present
-      Assertions.assertEquals(
-          4,
-          capturedMetadata.snapshots().size(),
-          "Commit with multiple branches pointing to same snapshot should contain all 4 snapshots");
+    // Verify MAIN branch advanced 2 snapshots (from snapshot 0 to snapshot 2)
+    SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
+    Assertions.assertNotNull(mainRef, "Main branch ref should exist");
+    Assertions.assertEquals(
+        allSnapshots.get(2).snapshotId(),
+        mainRef.snapshotId(),
+        "Main branch should point to snapshot 2 (advanced 2 snapshots from snapshot 0)");
 
-      // Verify BOTH refs point to the same snapshot
-      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
-      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
-      Assertions.assertEquals(
-          testSnapshots.get(3).snapshotId(),
-          mainRef.snapshotId(),
-          "Main branch should point to the 4th snapshot");
+    // Verify feature_a branch advanced 2 snapshots (from snapshot 1 to snapshot 3)
+    SnapshotRef featureARef = capturedMetadata.ref("feature_a");
+    Assertions.assertNotNull(featureARef, "Feature_a branch ref should exist");
+    Assertions.assertEquals(
+        allSnapshots.get(3).snapshotId(),
+        featureARef.snapshotId(),
+        "Feature_a branch should point to snapshot 3 (advanced 2 snapshots from snapshot 1)");
 
-      SnapshotRef featureRef = capturedMetadata.ref("feature_a");
-      Assertions.assertNotNull(featureRef, "Feature_a branch ref should exist");
-      Assertions.assertEquals(
-          testSnapshots.get(3).snapshotId(),
-          featureRef.snapshotId(),
-          "Feature_a branch should also point to the 4th snapshot (same as main)");
+    // Verify feature_b branch exists and points to snapshot 4 (new branch in this commit)
+    SnapshotRef featureBRef = capturedMetadata.ref("feature_b");
+    Assertions.assertNotNull(featureBRef, "Feature_b branch ref should exist");
+    Assertions.assertEquals(
+        allSnapshots.get(4).snapshotId(),
+        featureBRef.snapshotId(),
+        "Feature_b branch should point to snapshot 4");
 
-      // Verify they point to the SAME snapshot
+    // Verify correct lineage: snapshots should be in order
+    List<Snapshot> capturedSnapshots = capturedMetadata.snapshots();
+    for (int i = 0; i < 8; i++) {
       Assertions.assertEquals(
-          mainRef.snapshotId(),
-          featureRef.snapshotId(),
-          "Both branches should point to the same snapshot ID");
+          allSnapshots.get(i).snapshotId(),
+          capturedSnapshots.get(i).snapshotId(),
+          "Snapshot " + i + " should be preserved in correct order");
     }
   }
 
@@ -2077,17 +2236,20 @@ void testMultipleDiffCommitWithInvalidBranch() throws IOException {
 
       // ========== COMMIT: Should throw CommitStateUnknownException due to ambiguous branches
       // ==========
-      CommitStateUnknownException exception =
+      InvalidIcebergSnapshotException exception =
           Assertions.assertThrows(
-              CommitStateUnknownException.class,
+              InvalidIcebergSnapshotException.class,
               () -> openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata),
-              "Should throw CommitStateUnknownException when multiple branches point to same snapshot");
+              "Should throw InvalidIcebergSnapshotException when multiple branches point to same snapshot");
 
       // Verify error message indicates the ambiguous commit
       String exceptionMessage = exception.getMessage();
+      String expectedMessage =
+          "Ambiguous commit: snapshot "
+              + testSnapshots.get(3).snapshotId()
+              + " is referenced by multiple branches [feature_a, main] in a single commit. Each snapshot can only be referenced by one branch per commit.";
       Assertions.assertTrue(
-          exceptionMessage.contains("Multiple branches")
-              && exceptionMessage.contains("same target snapshot"),
+          exceptionMessage.contains(expectedMessage),
           "Error message should indicate multiple branches targeting same snapshot: "
               + exceptionMessage);
     }

From a101d729831548ed83897994c8a98d04843bb5e2 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Thu, 9 Oct 2025 11:35:03 -0700
Subject: [PATCH 14/35] adding comments

---
 .../OpenHouseInternalTableOperationsTest.java | 203 ++++++++++++++++--
 1 file changed, 183 insertions(+), 20 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index 2ff4d1e21..e0c5f6513 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -129,6 +129,10 @@ void setup() {
     when(localStorage.getType()).thenReturn(StorageType.LOCAL);
   }
 
+  /**
+   * Tests committing snapshots to a table with no existing snapshots (initial version). Verifies
+   * that all snapshots are appended and tracked in table properties.
+   */
   @Test
   void testDoCommitAppendSnapshotsInitialVersion() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -164,6 +168,10 @@ void testDoCommitAppendSnapshotsInitialVersion() throws IOException {
     }
   }
 
+  /**
+   * Tests committing additional snapshots to a table that already has existing snapshots. Verifies
+   * that only new snapshots are appended and tracked appropriately.
+   */
   @Test
   void testDoCommitAppendSnapshotsExistingVersion() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -208,6 +216,10 @@ void testDoCommitAppendSnapshotsExistingVersion() throws IOException {
     }
   }
 
+  /**
+   * Tests committing changes that both append new snapshots and delete existing ones. Verifies that
+   * both appended and deleted snapshots are correctly tracked in properties.
+   */
   @Test
   void testDoCommitAppendAndDeleteSnapshots() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -266,6 +278,10 @@ void testDoCommitAppendAndDeleteSnapshots() throws IOException {
     }
   }
 
+  /**
+   * Tests that metadata file updates are performed for replicated table initial version commits.
+   * Verifies that updateMetadataField is called with the correct parameters for replicated tables.
+   */
   @Test
   void testDoCommitUpdateMetadataForInitalVersionCommit() throws IOException {
     Map<String, String> properties = new HashMap<>();
@@ -326,6 +342,10 @@ void testDoCommitUpdateMetadataForInitalVersionCommit() throws IOException {
     verify(mockLocalStorageClient).getNativeClient();
   }
 
+  /**
+   * Tests that metadata file updates are not performed for non-replicated tables. Verifies that
+   * updateMetadataField is never called when the table is not replicated.
+   */
   @Test
   void testDoCommitUpdateMetadataNotCalledForNonReplicatedTable() throws IOException {
     Map<String, String> properties = new HashMap<>();
@@ -352,6 +372,10 @@ void testDoCommitUpdateMetadataNotCalledForNonReplicatedTable() throws IOExcepti
     Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.any(HouseTable.class));
   }
 
+  /**
+   * Tests that metadata file updates are not performed for non-initial version commits. Verifies
+   * that updateMetadataField is only called during table creation, not for subsequent updates.
+   */
   @Test
   void testDoCommitUpdateMetadataNotCalledForNonInitialVersionCommit() throws IOException {
     Map<String, String> properties = new HashMap<>();
@@ -385,6 +409,10 @@ void testDoCommitUpdateMetadataNotCalledForNonInitialVersionCommit() throws IOEx
     Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.any(HouseTable.class));
   }
 
+  /**
+   * Tests committing changes that delete some snapshots while keeping others. Verifies that deleted
+   * snapshots are properly tracked in table properties.
+   */
   @Test
   void testDoCommitDeleteSnapshots() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -433,6 +461,10 @@ void testDoCommitDeleteSnapshots() throws IOException {
     }
   }
 
+  /**
+   * Tests that commits to staged tables do not persist to the repository. Verifies that table
+   * metadata is set locally but save() and findById() are never called.
+   */
   @Test
   void testDoCommitDoesntPersistForStagedTable() {
     TableMetadata metadata =
@@ -454,6 +486,10 @@ void testDoCommitDoesntPersistForStagedTable() {
             .get());
   }
 
+  /**
+   * Tests that repository exceptions are properly converted to Iceberg exceptions. Verifies that
+   * various repository exceptions map to CommitFailedException or CommitStateUnknownException.
+   */
   @Test
   void testDoCommitExceptionHandling() {
     TableMetadata base = BASE_TABLE_METADATA;
@@ -482,6 +518,11 @@ void testDoCommitExceptionHandling() {
         () -> openHouseInternalTableOperations.doCommit(base, metadata));
   }
 
+  /**
+   * Tests that attempting to delete a snapshot that is still referenced by a branch throws an
+   * exception. Verifies that InvalidIcebergSnapshotException is thrown when snapshot refs conflict
+   * with deletions.
+   */
   @Test
   void testDoCommitSnapshotsValidationThrowsException() throws IOException {
     TableMetadata metadata =
@@ -528,6 +569,10 @@ void testDoCommitSnapshotsValidationThrowsException() throws IOException {
         "Should throw exception when trying to delete referenced snapshots");
   }
 
+  /**
+   * Tests committing WAP (write-audit-publish) staged snapshots to an initial version table.
+   * Verifies that snapshots are marked as staged but not appended to the main branch.
+   */
   @Test
   void testDoCommitAppendStageOnlySnapshotsInitialVersion() throws IOException {
     List<Snapshot> testWapSnapshots = IcebergTestUtil.getWapSnapshots().subList(0, 2);
@@ -557,6 +602,10 @@ void testDoCommitAppendStageOnlySnapshotsInitialVersion() throws IOException {
     }
   }
 
+  /**
+   * Tests committing WAP staged snapshots to a table with existing snapshots. Verifies that new
+   * snapshots are tracked as staged without being appended to main.
+   */
   @Test
   void testDoCommitAppendStageOnlySnapshotsExistingVersion() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -600,6 +649,11 @@ void testDoCommitAppendStageOnlySnapshotsExistingVersion() throws IOException {
     }
   }
 
+  /**
+   * Tests validation that rejects appending snapshots older than the current metadata timestamp.
+   * Verifies that IllegalArgumentException is thrown for stale snapshots unless newer ones are
+   * included.
+   */
   @Test
   void testAppendSnapshotsWithOldSnapshots() throws IOException {
     // Create base metadata (existing table state)
@@ -640,6 +694,10 @@ void testAppendSnapshotsWithOldSnapshots() throws IOException {
     openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadataWithFuture);
   }
 
+  /**
+   * Tests cherry-picking a staged snapshot to main when the base snapshot hasn't changed. Verifies
+   * that the existing staged snapshot is promoted without creating a new snapshot.
+   */
   @Test
   void testDoCommitCherryPickSnapshotBaseUnchanged() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -681,6 +739,10 @@ void testDoCommitCherryPickSnapshotBaseUnchanged() throws IOException {
     }
   }
 
+  /**
+   * Tests cherry-picking a staged snapshot when the base has changed since staging. Verifies that a
+   * new snapshot is created and appended to track the rebased changes.
+   */
   @Test
   void testDoCommitCherryPickSnapshotBaseChanged() throws IOException {
     List<Snapshot> testWapSnapshots = IcebergTestUtil.getWapSnapshots();
@@ -721,6 +783,10 @@ void testDoCommitCherryPickSnapshotBaseChanged() throws IOException {
     }
   }
 
+  /**
+   * Tests cherry-picking the first staged snapshot (with no parent) to the main branch. Verifies
+   * that the staged snapshot is promoted directly without creating a new snapshot.
+   */
   @Test
   void testDoCommitCherryPickFirstSnapshot() throws IOException {
     List<Snapshot> testWapSnapshots = IcebergTestUtil.getWapSnapshots().subList(0, 1);
@@ -755,6 +821,10 @@ void testDoCommitCherryPickFirstSnapshot() throws IOException {
     }
   }
 
+  /**
+   * Tests deleting the last staged snapshot when no references point to it. Verifies that no
+   * snapshot operations are tracked since the snapshot was unreferenced.
+   */
   @Test
   void testDoCommitDeleteLastStagedSnapshotWhenNoRefs() throws IOException {
     List<Snapshot> testWapSnapshots = IcebergTestUtil.getWapSnapshots().subList(0, 1);
@@ -782,6 +852,10 @@ void testDoCommitDeleteLastStagedSnapshotWhenNoRefs() throws IOException {
     }
   }
 
+  /**
+   * Tests rebuilding an unpartitioned table's partition spec with a new schema. Verifies that the
+   * rebuilt spec remains unpartitioned.
+   */
   @Test
   void testRebuildPartitionSpecUnpartitioned() {
     Schema originalSchema =
@@ -796,6 +870,10 @@ void testRebuildPartitionSpecUnpartitioned() {
     Assertions.assertTrue(rebuiltSpec.isUnpartitioned());
   }
 
+  /**
+   * Tests rebuilding partition spec when the new schema has the same field IDs as the original.
+   * Verifies that partition fields are correctly mapped using matching field IDs.
+   */
   @Test
   void testRebuildPartitionSpec_NewSchemaSameFieldIds() {
     Schema originalSchema =
@@ -833,6 +911,11 @@ void testRebuildPartitionSpec_NewSchemaSameFieldIds() {
     Assertions.assertEquals(3, rebuiltSpec.fields().get(2).sourceId());
   }
 
+  /**
+   * Tests rebuilding partition spec when the new schema has different field IDs for same field
+   * names. Verifies that partition fields are correctly remapped to new field IDs based on field
+   * names.
+   */
   @Test
   void testRebuildPartitionSpec_NewSchemaDifferentFieldIds() {
     Schema originalSchema =
@@ -878,6 +961,10 @@ void testRebuildPartitionSpec_NewSchemaDifferentFieldIds() {
     Assertions.assertEquals(2, rebuiltSpec.fields().get(2).sourceId());
   }
 
+  /**
+   * Tests rebuilding partition spec when a partition field is missing from the new schema. Verifies
+   * that an IllegalArgumentException is thrown for the missing field.
+   */
   @Test
   void testRebuildPartitionSpec_fieldMissingInNewSchema() {
     Schema originalSchema =
@@ -899,6 +986,10 @@ void testRebuildPartitionSpec_fieldMissingInNewSchema() {
         "Field field1 does not exist in the new schema", exception.getMessage());
   }
 
+  /**
+   * Tests rebuilding sort order when the new schema has the same field IDs as the original.
+   * Verifies that sort fields are correctly mapped using matching field IDs.
+   */
   @Test
   void testRebuildSortOrder_NewSchemaSameFieldIds() {
     Schema originalSchema =
@@ -925,6 +1016,10 @@ void testRebuildSortOrder_NewSchemaSameFieldIds() {
     Assertions.assertEquals(2, rebuiltSortOrder.fields().get(1).sourceId());
   }
 
+  /**
+   * Tests rebuilding sort order when the new schema has different field IDs for same field names.
+   * Verifies that sort fields are correctly remapped to new field IDs based on field names.
+   */
   @Test
   void testRebuildSortOrder_NewSchemaDifferentFieldIds() {
     Schema originalSchema =
@@ -951,6 +1046,10 @@ void testRebuildSortOrder_NewSchemaDifferentFieldIds() {
     Assertions.assertEquals(1, rebuiltSortOrder.fields().get(1).sourceId());
   }
 
+  /**
+   * Tests rebuilding sort order when a sort field is missing from the new schema. Verifies that an
+   * IllegalArgumentException is thrown for the missing field.
+   */
   @Test
   void testRebuildSortOrder_fieldMissingInNewSchema() {
     Schema originalSchema =
@@ -969,6 +1068,10 @@ void testRebuildSortOrder_fieldMissingInNewSchema() {
         "Field field1 does not exist in the new schema", exception.getMessage());
   }
 
+  /**
+   * Tests that refresh metadata operations record metrics with database tag but not table tag.
+   * Verifies that only the database dimension is included to avoid high cardinality.
+   */
   @Test
   void testRefreshMetadataIncludesDatabaseTag() {
     testMetricIncludesDatabaseTag(
@@ -978,6 +1081,10 @@ void testRefreshMetadataIncludesDatabaseTag() {
         "Timer should not have table tag (removed because the table tag has super high cardinality and overloads metric emission max size)");
   }
 
+  /**
+   * Tests that commit metadata update operations record metrics with database tag but not table
+   * tag. Verifies that only the database dimension is included to avoid high cardinality.
+   */
   @Test
   void testCommitMetadataUpdateIncludesDatabaseTag() {
     testMetricIncludesDatabaseTag(
@@ -987,6 +1094,10 @@ void testCommitMetadataUpdateIncludesDatabaseTag() {
         "Timer should not have table tag (only database dimension should be included)");
   }
 
+  /**
+   * Tests that refresh metadata latency timer has histogram buckets configured. Verifies that the
+   * metrics can be used for histogram-based monitoring and alerting.
+   */
   @Test
   void testRefreshMetadataLatencyHasHistogramBuckets() {
     testMetricHasHistogramBuckets(
@@ -995,6 +1106,10 @@ void testRefreshMetadataLatencyHasHistogramBuckets() {
         this::executeRefreshMetadata);
   }
 
+  /**
+   * Tests that commit metadata update latency timer has histogram buckets configured. Verifies that
+   * the metrics can be used for histogram-based monitoring and alerting.
+   */
   @Test
   void testCommitMetadataUpdateLatencyHasHistogramBuckets() {
     testMetricHasHistogramBuckets(
@@ -1226,6 +1341,10 @@ private void verifyMetricHistogramBuckets(
 
   // ===== SNAPSHOT DELETION SAFETY TESTS =====
 
+  /**
+   * Tests that attempting to delete a snapshot referenced by the main branch throws an exception.
+   * Verifies that InvalidIcebergSnapshotException is thrown with appropriate error message.
+   */
   @Test
   void testDeleteSnapshotWithMainReference() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1276,6 +1395,10 @@ void testDeleteSnapshotWithMainReference() throws IOException {
         "Error message should indicate snapshot is still referenced: " + exception.getMessage());
   }
 
+  /**
+   * Tests that unreferenced snapshots can be successfully deleted from the table. Verifies that
+   * deleted snapshots are removed from metadata and tracked in properties.
+   */
   @Test
   void testDeleteSnapshotWithNoReference() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1337,6 +1460,11 @@ void testDeleteSnapshotWithNoReference() throws IOException {
     }
   }
 
+  /**
+   * Tests that attempting to delete a snapshot referenced by multiple branches throws an exception.
+   * Verifies that InvalidIcebergSnapshotException is thrown indicating the snapshot is still
+   * referenced.
+   */
   @Test
   void testDeleteSnapshotWithMultipleReference() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1394,6 +1522,10 @@ void testDeleteSnapshotWithMultipleReference() throws IOException {
             + exceptionMessage);
   }
 
+  /**
+   * Tests that attempting to delete a snapshot referenced by a tag throws an exception. Verifies
+   * that InvalidIcebergSnapshotException is thrown with branch/tag reference details.
+   */
   @Test
   void testDeleteSnapshotWithBranchReference() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1456,6 +1588,10 @@ void testDeleteSnapshotWithBranchReference() throws IOException {
             + exceptionMessage);
   }
 
+  /**
+   * Tests that attempting to delete an empty list of snapshots makes no changes to the table.
+   * Verifies that no snapshots are deleted and no deletion properties are set.
+   */
   @Test
   void testDeleteEmptySnapshotList() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1497,6 +1633,10 @@ void testDeleteEmptySnapshotList() throws IOException {
     Assertions.assertNull(deletedSnapshots, "No deleted snapshots property should be set");
   }
 
+  /**
+   * Tests that attempting to delete a null list of snapshots makes no changes to the table.
+   * Verifies that no snapshots are deleted and no deletion properties are set.
+   */
   @Test
   void testDeleteNullSnapshotList() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1538,6 +1678,10 @@ void testDeleteNullSnapshotList() throws IOException {
     Assertions.assertNull(deletedSnapshots, "No deleted snapshots property should be set");
   }
 
+  /**
+   * Tests that attempting to delete a snapshot that doesn't exist in the metadata has no effect.
+   * Verifies that snapshot count remains unchanged and no deletion tracking occurs.
+   */
   @Test
   void testDeleteNonExistentSnapshot() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1583,6 +1727,10 @@ void testDeleteNonExistentSnapshot() throws IOException {
     Assertions.assertNull(deletedSnapshots, "No deleted snapshots should be tracked");
   }
 
+  /**
+   * Tests that snapshot deletion operations record the correct metrics. Verifies that
+   * SNAPSHOTS_DELETED_CTR counter is incremented by the number of deleted snapshots.
+   */
   @Test
   void testDeleteSnapshotMetricsRecorded() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1618,6 +1766,10 @@ void testDeleteSnapshotMetricsRecorded() throws IOException {
             eq((double) 2)); // 2 snapshots deleted
   }
 
+  /**
+   * Tests that snapshot deletion metrics are recorded when deleting unreferenced snapshots.
+   * Verifies that SNAPSHOTS_DELETED_CTR counter tracks deletions with branch references present.
+   */
   @Test
   void testDeleteSnapshotMetricsRecordedBranch() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1655,6 +1807,10 @@ void testDeleteSnapshotMetricsRecordedBranch() throws IOException {
             eq((double) 2)); // 2 snapshots deleted
   }
 
+  /**
+   * Tests that snapshot deletion metrics are not recorded when no actual deletion occurs. Verifies
+   * that SNAPSHOTS_DELETED_CTR counter is not called for non-existent snapshots.
+   */
   @Test
   void testDeleteSnapshotMetricsRecordedNonExistent() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1695,6 +1851,11 @@ void testDeleteSnapshotMetricsRecordedNonExistent() throws IOException {
         .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR), Mockito.anyDouble());
   }
 
+  /**
+   * Tests that attempting to delete all snapshots fails when the main branch references a snapshot.
+   * Verifies that InvalidIcebergSnapshotException is thrown to prevent deleting referenced
+   * snapshots.
+   */
   @Test
   void testDeleteAllSnapshotsFailsWhenMainBranchReferenced() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1750,6 +1911,10 @@ void testDeleteAllSnapshotsFailsWhenMainBranchReferenced() throws IOException {
     Assertions.assertTrue(exceptionMessage.contains(expectedMessage));
   }
 
+  /**
+   * Tests that deleting all unreferenced snapshots succeeds without errors. Verifies that all
+   * snapshots can be deleted when no branches or tags reference them.
+   */
   @Test
   void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1800,6 +1965,10 @@ void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
     }
   }
 
+  /**
+   * Tests that multiple branches can point to different snapshots without conflicts. Verifies that
+   * commits with multiple valid branch references succeed without exceptions.
+   */
   @Test
   void testValidMultipleBranchesWithDifferentSnapshots() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1840,6 +2009,10 @@ void testValidMultipleBranchesWithDifferentSnapshots() throws IOException {
         "Should NOT throw exception when branches target different snapshots");
   }
 
+  /**
+   * Tests the standard Write-Audit-Publish (WAP) workflow where a staged snapshot becomes main.
+   * Verifies that pulling a WAP snapshot into the main branch succeeds without errors.
+   */
   @Test
   void testStandardWAPScenario() throws IOException {
     List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
@@ -1885,17 +2058,8 @@ void testStandardWAPScenario() throws IOException {
   }
 
   /**
-   * Integration test that verifies committing with base and metadata that are at least two commits
-   * divergent. This simulates scenarios where:
-   *
-   * <ul>
-   *   <li>Base metadata is at version N
-   *   <li>New metadata represents state at version N+2 or later (skipping intermediate versions)
-   *   <li>The commit should still succeed and write complete metadata
-   * </ul>
-   *
-   * <p>This test validates that Iceberg can handle "jump" commits where the metadata being
-   * committed has evolved significantly from the base.
+   * Tests committing metadata that has diverged multiple versions from the base (N to N+3).
+   * Verifies that "jump" commits succeed with all snapshots and references correctly applied.
    */
   @Test
   void testMultipleDiffCommit() throws IOException {
@@ -1974,8 +2138,8 @@ void testMultipleDiffCommit() throws IOException {
   }
 
   /**
-   * Test committing with divergent metadata and multiple valid branches. Base is at N with MAIN,
-   * metadata is at N+3 with both MAIN and feature_a branches pointing to different snapshots.
+   * Tests divergent commit (N to N+3) with multiple branches pointing to different snapshots.
+   * Verifies that divergent commits succeed when branch references are valid and non-conflicting.
    */
   @Test
   void testMultipleDiffCommitWithValidBranch() throws IOException {
@@ -2060,8 +2224,9 @@ void testMultipleDiffCommitWithValidBranch() throws IOException {
   }
 
   /**
-   * Test committing with divergent metadata where multiple branches point to the same snapshot.
-   * This is VALID when done through setBranchSnapshot() - the end state is allowed.
+   * Tests committing with multiple branches advancing forward, each pointing to different
+   * snapshots. Verifies that complex multi-branch commits succeed when each branch has a unique
+   * target snapshot.
    */
   @Test
   void testMultipleDiffCommitWithMultipleBranchesPointingToSameSnapshot() throws IOException {
@@ -2139,7 +2304,7 @@ void testMultipleDiffCommitWithMultipleBranchesPointingToSameSnapshot() throws I
 
     TableMetadata finalNewMetadata = newMetadata.replaceProperties(newProperties);
 
-    // ========== COMMIT: Should SUCCEED ==========
+    // commit should succeed
     openHouseInternalTableOperations.doCommit(finalBaseMetadata, finalNewMetadata);
     Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
 
@@ -2184,8 +2349,8 @@ void testMultipleDiffCommitWithMultipleBranchesPointingToSameSnapshot() throws I
   }
 
   /**
-   * Test committing with divergent metadata where multiple branches try to point to the same
-   * snapshot (ambiguous commit). This should throw an IllegalStateException.
+   * Tests that committing with multiple branches pointing to the same snapshot throws an exception.
+   * Verifies that InvalidIcebergSnapshotException is thrown for ambiguous branch configurations.
    */
   @Test
   void testMultipleDiffCommitWithInvalidBranch() throws IOException {
@@ -2234,8 +2399,6 @@ void testMultipleDiffCommitWithInvalidBranch() throws IOException {
       TableMetadata finalDivergentMetadata =
           metadataWithAllSnapshots.replaceProperties(divergentProperties);
 
-      // ========== COMMIT: Should throw CommitStateUnknownException due to ambiguous branches
-      // ==========
       InvalidIcebergSnapshotException exception =
           Assertions.assertThrows(
               InvalidIcebergSnapshotException.class,

From 11be4381fa5c5cc2ebc4dd7b28b2480fd8bca242 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Thu, 9 Oct 2025 16:29:17 -0700
Subject: [PATCH 15/35] working tests

---
 .../OpenHouseInternalTableOperationsTest.java | 123 +++++++++++++++++-
 1 file changed, 120 insertions(+), 3 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index e0c5f6513..f514ed162 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -201,7 +201,7 @@ void testDoCommitAppendSnapshotsExistingVersion() throws IOException {
       Assertions.assertEquals(
           5,
           updatedProperties
-              .size()); /*write.parquet.compression-codec, location, lastModifiedTime, version and deleted_snapshots*/
+              .size()); /*write.parquet.compression-codec, location, lastModifiedTime, version and appended_snapshots*/
       Assertions.assertEquals(
           TEST_LOCATION, updatedProperties.get(getCanonicalFieldName("tableVersion")));
 
@@ -1339,8 +1339,6 @@ private void verifyMetricHistogramBuckets(
     Assertions.assertFalse(Double.isNaN(maxTime), "Timer max time should not be NaN");
   }
 
-  // ===== SNAPSHOT DELETION SAFETY TESTS =====
-
   /**
    * Tests that attempting to delete a snapshot referenced by the main branch throws an exception.
    * Verifies that InvalidIcebergSnapshotException is thrown with appropriate error message.
@@ -2417,4 +2415,123 @@ void testMultipleDiffCommitWithInvalidBranch() throws IOException {
               + exceptionMessage);
     }
   }
+
+  /**
+   * Tests divergent commit (N to N+3) that includes both regular snapshots and WAP staged
+   * snapshots. Verifies that staged snapshots remain properly tracked as staged even during a
+   * multi-version jump commit.
+   */
+  @Test
+  void testMultipleDiffCommitWithWAPSnapshots() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+    List<Snapshot> wapSnapshots = IcebergTestUtil.getWapSnapshots();
+
+    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
+        Mockito.mockStatic(TableMetadataParser.class)) {
+
+      // ========== Create base at N with 1 snapshot ==========
+      TableMetadata baseAtN =
+          TableMetadata.buildFrom(BASE_TABLE_METADATA)
+              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+              .build();
+
+      // ========== Create divergent metadata at N+3 with 2 regular + 2 WAP snapshots ==========
+      // Simulate evolving through N+1 and N+2 without committing
+      // The new metadata will have:
+      // - testSnapshots[0] (existing in base, main branch)
+      // - testSnapshots[1] (new, main branch will advance here)
+      // - wapSnapshots[0] (new, staged - no branch reference)
+      // - wapSnapshots[1] (new, staged - no branch reference)
+
+      TableMetadata metadataAtNPlus3 =
+          TableMetadata.buildFrom(baseAtN)
+              .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH)
+              .addSnapshot(wapSnapshots.get(0))
+              .addSnapshot(wapSnapshots.get(1))
+              .build();
+
+      // Add custom properties for commit
+      Map<String, String> divergentProperties = new HashMap<>(metadataAtNPlus3.properties());
+
+      // Include 2 regular snapshots (0, 1) and 2 WAP snapshots (0, 1)
+      List<Snapshot> allSnapshots = new ArrayList<>();
+      allSnapshots.add(testSnapshots.get(0));
+      allSnapshots.add(testSnapshots.get(1));
+      allSnapshots.add(wapSnapshots.get(0));
+      allSnapshots.add(wapSnapshots.get(1));
+
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
+
+      // Only main branch ref pointing to testSnapshots[1], WAP snapshots have no refs
+      divergentProperties.put(
+          CatalogConstants.SNAPSHOTS_REFS_KEY,
+          SnapshotsUtil.serializeMap(
+              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(testSnapshots.get(1))));
+      divergentProperties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
+
+      TableMetadata finalDivergentMetadata =
+          metadataAtNPlus3.replaceProperties(divergentProperties);
+
+      // ========== COMMIT: Base at N, Metadata at N+3 (divergent by 3 commits) ==========
+      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
+      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
+
+      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
+      Map<String, String> updatedProperties = capturedMetadata.properties();
+
+      // Verify the divergent commit contains all 4 snapshots
+      Assertions.assertEquals(
+          4,
+          capturedMetadata.snapshots().size(),
+          "Divergent commit should contain all 4 snapshots (2 regular + 2 WAP)");
+
+      Set<Long> expectedSnapshotIds =
+          allSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+      Set<Long> actualSnapshotIds =
+          capturedMetadata.snapshots().stream()
+              .map(Snapshot::snapshotId)
+              .collect(Collectors.toSet());
+      Assertions.assertEquals(
+          expectedSnapshotIds,
+          actualSnapshotIds,
+          "All snapshot IDs (regular + WAP) should be present after divergent commit");
+
+      // Verify main ref points to the expected snapshot (testSnapshots[1])
+      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
+      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
+      Assertions.assertEquals(
+          testSnapshots.get(1).snapshotId(),
+          mainRef.snapshotId(),
+          "Main branch should point to testSnapshots[1] after divergent commit");
+
+      // Verify WAP snapshots are tracked as staged
+      String stagedSnapshots = updatedProperties.get(getCanonicalFieldName("staged_snapshots"));
+      Assertions.assertNotNull(stagedSnapshots, "Staged snapshots should be tracked");
+      Set<String> stagedSnapshotIds = Set.of(stagedSnapshots.split(","));
+      Assertions.assertTrue(
+          stagedSnapshotIds.contains(Long.toString(wapSnapshots.get(0).snapshotId())),
+          "WAP snapshot 0 should be tracked as staged");
+      Assertions.assertTrue(
+          stagedSnapshotIds.contains(Long.toString(wapSnapshots.get(1).snapshotId())),
+          "WAP snapshot 1 should be tracked as staged");
+
+      // Verify regular snapshot is tracked as appended (not testSnapshots[0] since it was in base)
+      String appendedSnapshots = updatedProperties.get(getCanonicalFieldName("appended_snapshots"));
+      Assertions.assertNotNull(appendedSnapshots, "Appended snapshots should be tracked");
+      Assertions.assertEquals(
+          Long.toString(testSnapshots.get(1).snapshotId()),
+          appendedSnapshots,
+          "testSnapshots[1] should be tracked as appended");
+
+      Assertions.assertNull(
+          updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")),
+          "No snapshots should be cherry-picked in this scenario");
+      Assertions.assertNull(
+          updatedProperties.get(getCanonicalFieldName("deleted_snapshots")),
+          "No snapshots should be deleted in this scenario");
+
+      Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
+    }
+  }
 }

From c7426b43c8af56ca76fd25bf38d3ef5feae52271 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Thu, 9 Oct 2025 21:07:31 -0700
Subject: [PATCH 16/35] complete refactor + new tests

---
 .../catalog/OpenHouseInternalCatalog.java     |  14 +-
 .../OpenHouseInternalTableOperations.java     | 658 +-----------------
 .../internal/catalog/SnapshotDiffApplier.java | 468 +++++++++++++
 .../internal/catalog/SnapshotInspector.java   |  96 ---
 .../OpenHouseInternalTableOperationsTest.java |  87 ++-
 .../catalog/SnapshotDiffApplierTest.java      | 359 ++++++++++
 .../catalog/SnapshotInspectorTest.java        | 171 -----
 .../RepositoryTestWithSettableComponents.java |  30 +-
 .../tables/e2e/h2/SpringH2Application.java    |  17 -
 .../tablestest/SpringH2TestApplication.java   |  18 -
 10 files changed, 916 insertions(+), 1002 deletions(-)
 create mode 100644 iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
 delete mode 100644 iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotInspector.java
 create mode 100644 iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
 delete mode 100644 iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotInspectorTest.java

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalCatalog.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalCatalog.java
index f77f7a0cb..2743c6579 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalCatalog.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalCatalog.java
@@ -59,22 +59,24 @@ public class OpenHouseInternalCatalog extends BaseMetastoreCatalog {
 
   @Autowired StorageType storageType;
 
-  @Autowired SnapshotInspector snapshotInspector;
-
   @Autowired HouseTableMapper houseTableMapper;
 
   @Autowired MeterRegistry meterRegistry;
 
   @Override
   protected TableOperations newTableOps(TableIdentifier tableIdentifier) {
+    FileIO fileIO = resolveFileIO(tableIdentifier);
+    MetricsReporter metricsReporter =
+        new MetricsReporter(this.meterRegistry, METRICS_PREFIX, Lists.newArrayList());
+    SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(metricsReporter);
     return new OpenHouseInternalTableOperations(
         houseTableRepository,
-        resolveFileIO(tableIdentifier),
-        snapshotInspector,
+        fileIO,
         houseTableMapper,
         tableIdentifier,
-        new MetricsReporter(this.meterRegistry, METRICS_PREFIX, Lists.newArrayList()),
-        fileIOManager);
+        metricsReporter,
+        fileIOManager,
+        snapshotDiffApplier);
   }
 
   @Override
diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
index 6a2c43305..d96d9d6b1 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java
@@ -4,7 +4,6 @@
 
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
-import com.google.common.collect.Sets;
 import com.google.gson.Gson;
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.cluster.storage.Storage;
@@ -24,15 +23,11 @@
 import java.io.IOException;
 import java.time.Clock;
 import java.time.Instant;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.TimeUnit;
-import java.util.stream.Collectors;
 import lombok.AllArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.hadoop.fs.FileSystem;
@@ -41,9 +36,6 @@
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.SchemaParser;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.SnapshotRef;
-import org.apache.iceberg.SnapshotSummary;
 import org.apache.iceberg.SortDirection;
 import org.apache.iceberg.SortField;
 import org.apache.iceberg.SortOrder;
@@ -68,8 +60,6 @@ public class OpenHouseInternalTableOperations extends BaseMetastoreTableOperatio
 
   FileIO fileIO;
 
-  SnapshotInspector snapshotInspector;
-
   HouseTableMapper houseTableMapper;
 
   TableIdentifier tableIdentifier;
@@ -78,6 +68,8 @@ public class OpenHouseInternalTableOperations extends BaseMetastoreTableOperatio
 
   FileIOManager fileIOManager;
 
+  SnapshotDiffApplier snapshotDiffApplier;
+
   private static final Gson GSON = new Gson();
 
   private static final Cache<String, Integer> CACHE =
@@ -227,7 +219,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) {
       metadata = rebuildTblMetaWithSchema(metadata, CatalogConstants.EVOLVED_SCHEMA_KEY, true);
     }
 
-    metadata = applySnapshots(base, metadata);
+    metadata = snapshotDiffApplier.applySnapshots(base, metadata);
 
     int version = currentVersion() + 1;
     CommitStatus commitStatus = CommitStatus.FAILURE;
@@ -482,650 +474,6 @@ static SortOrder rebuildSortOrder(SortOrder originalSortOrder, Schema newSchema)
     return builder.build();
   }
 
-  // ==================== Functional Snapshot Application Pipeline ====================
-
-  /**
-   * Immutable state object representing the complete snapshot diff and categorization. All fields
-   * are final and collections are unmodifiable.
-   */
-  @lombok.Value
-  @lombok.Builder
-  private static class SnapshotState {
-    List<Snapshot> providedSnapshots;
-    Map<String, SnapshotRef> providedRefs;
-    List<Snapshot> existingSnapshots;
-    Map<String, SnapshotRef> existingRefs;
-
-    // Categorization
-    List<Snapshot> wapSnapshots;
-    List<Snapshot> cherryPickedSnapshots;
-    List<Snapshot> regularSnapshots;
-
-    // Diff results
-    List<Snapshot> newSnapshots;
-    List<Snapshot> existingRetainedSnapshots;
-    List<Snapshot> deletedSnapshots;
-
-    // Branch updates
-    Map<String, SnapshotRef> branchUpdates;
-
-    // Metrics for recording
-    int appendedCount;
-    int stagedCount;
-    int cherryPickedCount;
-    int deletedCount;
-  }
-
-  /**
-   * Applies snapshot updates from metadata properties using a functional pipeline. This method
-   * follows principles: immutability, pure functions, and composition.
-   *
-   * <p>Pipeline stages: 1. Extract snapshots from properties 2. Parse snapshots from JSON 3. Parse
-   * references from JSON 4. Compute complete state diff (categorize, identify changes) 5. Validate
-   * entire operation 6. Apply state changes (returns builder) 7. Add metric properties to builder
-   * 8. Build once at top level to preserve lastUpdatedMillis from snapshot operations
-   *
-   * @param base The base table metadata (may be null for table creation)
-   * @param metadata The new metadata with properties containing snapshot updates
-   * @return Updated metadata with snapshots applied
-   */
-  TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata) {
-    // Check if snapshots update is requested
-    if (!metadata.properties().containsKey(CatalogConstants.SNAPSHOTS_JSON_KEY)) {
-      // No snapshot updates requested, return unchanged
-      return metadata;
-    }
-
-    return Optional.ofNullable(metadata.properties().get(CatalogConstants.SNAPSHOTS_JSON_KEY))
-        .map(
-            snapshotsJson -> {
-              // Stage 1-3: Extract and parse
-              SnapshotState.SnapshotStateBuilder stateBuilder = SnapshotState.builder();
-
-              // Extract and parse snapshots (Stage 1-2)
-              List<Snapshot> providedSnapshots = parseSnapshotsFromJson(snapshotsJson);
-              stateBuilder.providedSnapshots(Collections.unmodifiableList(providedSnapshots));
-
-              // Extract and parse references (Stage 3)
-              Map<String, SnapshotRef> providedRefs =
-                  Optional.ofNullable(
-                          metadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
-                      .map(this::parseReferencesFromJson)
-                      .orElse(Collections.emptyMap());
-              stateBuilder.providedRefs(Collections.unmodifiableMap(providedRefs));
-
-              // Get existing state from base
-              List<Snapshot> existingSnapshots =
-                  Optional.ofNullable(base)
-                      .map(TableMetadata::snapshots)
-                      .orElse(Collections.emptyList());
-              stateBuilder.existingSnapshots(Collections.unmodifiableList(existingSnapshots));
-
-              Map<String, SnapshotRef> existingRefs =
-                  Optional.ofNullable(base).map(TableMetadata::refs).orElse(Collections.emptyMap());
-              stateBuilder.existingRefs(Collections.unmodifiableMap(existingRefs));
-
-              // Stage 4: Compute complete state diff
-              SnapshotState state = computeStateDiff(stateBuilder);
-
-              // Stage 5: Validate entire operation
-              validateOperation(state, base);
-
-              // Stage 6: Apply state changes - returns builder
-              TableMetadata.Builder builder = applyStateChanges(metadata, state);
-
-              // Stage 7: Record metrics and add metric properties to builder
-              builder = recordMetrics(builder, state);
-
-              // Build once at the end to preserve lastUpdatedMillis from snapshot operations
-              return builder.build();
-            })
-        .orElse(metadata); // No snapshot updates if key not present
-  }
-
-  /** Stage 2: Parse snapshots from JSON string. Pure function - no side effects. */
-  private List<Snapshot> parseSnapshotsFromJson(String snapshotsJson) {
-    return SnapshotsUtil.parseSnapshots(fileIO, snapshotsJson);
-  }
-
-  /** Stage 3: Parse references from JSON string. Pure function - no side effects. */
-  private Map<String, SnapshotRef> parseReferencesFromJson(String refsJson) {
-    return SnapshotsUtil.parseSnapshotRefs(refsJson);
-  }
-
-  /**
-   * Stage 4: Compute complete state diff. Pure function that categorizes snapshots and identifies
-   * changes.
-   */
-  private SnapshotState computeStateDiff(SnapshotState.SnapshotStateBuilder builder) {
-    SnapshotState partial = builder.build();
-
-    Map<Long, Snapshot> providedById =
-        partial.getProvidedSnapshots().stream()
-            .collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
-    Map<Long, Snapshot> existingById =
-        partial.getExistingSnapshots().stream()
-            .collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
-
-    // Categorize all snapshots by type
-    SnapshotCategories categories =
-        categorizeAllSnapshots(
-            partial.getProvidedSnapshots(),
-            existingById,
-            partial.getExistingRefs(),
-            partial.getProvidedRefs());
-
-    // Identify snapshot changes (new, retained, deleted)
-    SnapshotChanges changes =
-        identifySnapshotChanges(
-            partial.getProvidedSnapshots(),
-            partial.getExistingSnapshots(),
-            providedById,
-            existingById);
-
-    // Identify branch updates
-    Map<String, SnapshotRef> branchUpdates =
-        computeBranchUpdates(partial.getProvidedRefs(), partial.getExistingRefs());
-
-    // Compute metrics
-    SnapshotMetrics metrics = computeSnapshotMetrics(categories, changes, existingById);
-
-    // Build complete state
-    return builder
-        .wapSnapshots(Collections.unmodifiableList(categories.wapSnapshots))
-        .cherryPickedSnapshots(Collections.unmodifiableList(categories.cherryPickedSnapshots))
-        .regularSnapshots(Collections.unmodifiableList(categories.regularSnapshots))
-        .newSnapshots(Collections.unmodifiableList(changes.newSnapshots))
-        .existingRetainedSnapshots(Collections.unmodifiableList(changes.existingRetainedSnapshots))
-        .deletedSnapshots(Collections.unmodifiableList(changes.deletedSnapshots))
-        .branchUpdates(Collections.unmodifiableMap(branchUpdates))
-        .appendedCount(metrics.appendedCount)
-        .stagedCount(metrics.stagedCount)
-        .cherryPickedCount(metrics.cherryPickedCount)
-        .deletedCount(metrics.deletedCount)
-        .build();
-  }
-
-  /** Container for categorized snapshots. */
-  @lombok.Value
-  private static class SnapshotCategories {
-    List<Snapshot> wapSnapshots;
-    List<Snapshot> cherryPickedSnapshots;
-    List<Snapshot> regularSnapshots;
-  }
-
-  /** Categorize all snapshots into WAP, cherry-picked, and regular. */
-  private SnapshotCategories categorizeAllSnapshots(
-      List<Snapshot> providedSnapshots,
-      Map<Long, Snapshot> existingById,
-      Map<String, SnapshotRef> existingRefs,
-      Map<String, SnapshotRef> providedRefs) {
-    List<Snapshot> wapSnapshots =
-        categorizeWapSnapshots(providedSnapshots, existingRefs, providedRefs);
-    List<Snapshot> cherryPickedSnapshots =
-        categorizeCherryPickedSnapshots(
-            providedSnapshots, existingById, existingRefs, providedRefs);
-
-    // Cherry-picked snapshots should not be considered WAP/staged anymore
-    Set<Long> cherryPickedIds =
-        cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-    wapSnapshots =
-        wapSnapshots.stream()
-            .filter(s -> !cherryPickedIds.contains(s.snapshotId()))
-            .collect(Collectors.toList());
-
-    List<Snapshot> regularSnapshots =
-        categorizeRegularSnapshots(providedSnapshots, wapSnapshots, cherryPickedSnapshots);
-
-    return new SnapshotCategories(wapSnapshots, cherryPickedSnapshots, regularSnapshots);
-  }
-
-  /** Container for snapshot changes. */
-  @lombok.Value
-  private static class SnapshotChanges {
-    List<Snapshot> newSnapshots;
-    List<Snapshot> existingRetainedSnapshots;
-    List<Snapshot> deletedSnapshots;
-  }
-
-  /** Identify which snapshots are new, retained, or deleted. */
-  private SnapshotChanges identifySnapshotChanges(
-      List<Snapshot> providedSnapshots,
-      List<Snapshot> existingSnapshots,
-      Map<Long, Snapshot> providedById,
-      Map<Long, Snapshot> existingById) {
-
-    List<Snapshot> newSnapshots =
-        providedSnapshots.stream()
-            .filter(s -> !existingById.containsKey(s.snapshotId()))
-            .collect(Collectors.toList());
-
-    List<Snapshot> existingRetainedSnapshots =
-        providedSnapshots.stream()
-            .filter(s -> existingById.containsKey(s.snapshotId()))
-            .collect(Collectors.toList());
-
-    List<Snapshot> deletedSnapshots =
-        existingSnapshots.stream()
-            .filter(s -> !providedById.containsKey(s.snapshotId()))
-            .collect(Collectors.toList());
-
-    return new SnapshotChanges(newSnapshots, existingRetainedSnapshots, deletedSnapshots);
-  }
-
-  /** Container for snapshot metrics. */
-  @lombok.Value
-  private static class SnapshotMetrics {
-    int appendedCount;
-    int stagedCount;
-    int cherryPickedCount;
-    int deletedCount;
-  }
-
-  /** Compute metrics based on categorized snapshots and changes. */
-  private SnapshotMetrics computeSnapshotMetrics(
-      SnapshotCategories categories, SnapshotChanges changes, Map<Long, Snapshot> existingById) {
-
-    int appendedCount =
-        (int)
-            categories.regularSnapshots.stream()
-                .filter(s -> !existingById.containsKey(s.snapshotId()))
-                .count();
-    int stagedCount = categories.wapSnapshots.size();
-    int cherryPickedCount = categories.cherryPickedSnapshots.size();
-    int deletedCount = changes.deletedSnapshots.size();
-
-    return new SnapshotMetrics(appendedCount, stagedCount, cherryPickedCount, deletedCount);
-  }
-
-  /**
-   * Categorize WAP (Write-Audit-Publish) snapshots. A snapshot is considered WAP/staged if it has
-   * the wap.id property AND is not on any branch in either the existing or provided metadata. This
-   * correctly handles: 1. Snapshots that were on branches in base - not WAP even if unreferenced in
-   * new metadata 2. Snapshots being published (staged -> branch) - not WAP as they're now on a
-   * branch
-   *
-   * @param snapshots List of provided snapshots
-   * @param existingRefs Existing snapshot refs from base metadata
-   * @param providedRefs Provided snapshot refs from new metadata
-   * @return List of WAP snapshots
-   */
-  private List<Snapshot> categorizeWapSnapshots(
-      List<Snapshot> snapshots,
-      Map<String, SnapshotRef> existingRefs,
-      Map<String, SnapshotRef> providedRefs) {
-    // Get set of snapshot IDs that are/were on branches
-    Set<Long> branchSnapshotIds = new java.util.HashSet<>();
-    branchSnapshotIds.addAll(
-        existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet()));
-    branchSnapshotIds.addAll(
-        providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet()));
-
-    return snapshots.stream()
-        .filter(
-            s -> s.summary() != null && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
-        .filter(s -> !branchSnapshotIds.contains(s.snapshotId()))
-        .collect(Collectors.toList());
-  }
-
-  /**
-   * Categorize cherry-picked snapshots. A snapshot is cherry-picked if: 1. It exists in the current
-   * metadata but has a different parent than in the provided snapshots (indicating it was moved to
-   * a different branch), OR 2. It is referenced as the source of a cherry-pick by another
-   * snapshot's "source-snapshot-id", OR 3. It has wap.id AND was staged (not on a branch) in
-   * existing refs AND is now on a branch in provided refs (indicating it's being published)
-   */
-  private List<Snapshot> categorizeCherryPickedSnapshots(
-      List<Snapshot> providedSnapshots,
-      Map<Long, Snapshot> existingById,
-      Map<String, SnapshotRef> existingRefs,
-      Map<String, SnapshotRef> providedRefs) {
-
-    // Find snapshots that are sources of cherry-picks
-    Set<Long> cherryPickSourceIds =
-        providedSnapshots.stream()
-            .filter(s -> s.summary() != null && s.summary().containsKey("source-snapshot-id"))
-            .map(s -> Long.parseLong(s.summary().get("source-snapshot-id")))
-            .collect(Collectors.toSet());
-
-    // Get snapshot IDs on branches
-    Set<Long> existingBranchSnapshotIds =
-        existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
-    Set<Long> providedBranchSnapshotIds =
-        providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
-
-    return providedSnapshots.stream()
-        .filter(
-            provided -> {
-              Snapshot existing = existingById.get(provided.snapshotId());
-              if (existing == null) {
-                return false; // New snapshot, not cherry-picked
-              }
-              // Check if parent changed (indicating cherry-pick to different branch)
-              Long providedParent = provided.parentId();
-              Long existingParent = existing.parentId();
-              boolean parentChanged = !Objects.equal(providedParent, existingParent);
-
-              // Check if this snapshot is the source of a cherry-pick
-              boolean isCherryPickSource = cherryPickSourceIds.contains(provided.snapshotId());
-
-              // Check if this is a WAP snapshot being published (staged -> branch)
-              boolean hasWapId =
-                  provided.summary() != null
-                      && provided.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
-              boolean wasStaged = !existingBranchSnapshotIds.contains(provided.snapshotId());
-              boolean isNowOnBranch = providedBranchSnapshotIds.contains(provided.snapshotId());
-              boolean isBeingPublished = hasWapId && wasStaged && isNowOnBranch;
-
-              return parentChanged || isCherryPickSource || isBeingPublished;
-            })
-        .collect(Collectors.toList());
-  }
-
-  /**
-   * Categorize regular (appended) snapshots. Regular snapshots are those that are not WAP or
-   * cherry-picked.
-   */
-  private List<Snapshot> categorizeRegularSnapshots(
-      List<Snapshot> allSnapshots,
-      List<Snapshot> wapSnapshots,
-      List<Snapshot> cherryPickedSnapshots) {
-
-    Set<Long> wapIds = wapSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-    Set<Long> cherryPickedIds =
-        cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-
-    return allSnapshots.stream()
-        .filter(s -> !wapIds.contains(s.snapshotId()) && !cherryPickedIds.contains(s.snapshotId()))
-        .collect(Collectors.toList());
-  }
-
-  /** Compute branch updates by comparing provided and existing refs. */
-  private Map<String, SnapshotRef> computeBranchUpdates(
-      Map<String, SnapshotRef> providedRefs, Map<String, SnapshotRef> existingRefs) {
-
-    return providedRefs.entrySet().stream()
-        .filter(
-            entry -> {
-              SnapshotRef existing = existingRefs.get(entry.getKey());
-              return existing == null || existing.snapshotId() != entry.getValue().snapshotId();
-            })
-        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
-  }
-
-  /** Stage 5: Validate entire operation. Throws exceptions for invalid operations. */
-  private void validateOperation(SnapshotState state, TableMetadata base) {
-    // Validation 1: Current snapshot not deleted without replacements
-    validateCurrentSnapshotNotDeleted(state, base);
-
-    // Validation 2: No ambiguous commits (multiple branches → same snapshot)
-    validateNoAmbiguousCommits(state);
-
-    // Validation 3: Deleted snapshots not referenced by branches/tags
-    validateDeletedSnapshotsNotReferenced(state);
-
-    // Validation 4: Individual snapshot validation using SnapshotInspector
-    validateIndividualSnapshots(state);
-  }
-
-  /**
-   * Validate that current snapshot is not deleted without replacements. Package-private for
-   * testing.
-   */
-  void validateCurrentSnapshotNotDeleted(SnapshotState state, TableMetadata base) {
-    if (base == null || base.currentSnapshot() == null) {
-      return; // No current snapshot to validate
-    }
-
-    long currentSnapshotId = base.currentSnapshot().snapshotId();
-    boolean currentDeleted =
-        state.getDeletedSnapshots().stream().anyMatch(s -> s.snapshotId() == currentSnapshotId);
-
-    if (currentDeleted && state.getNewSnapshots().isEmpty()) {
-      throw new InvalidIcebergSnapshotException(
-          String.format(
-              "Cannot delete the current snapshot %s without adding replacement snapshots. "
-                  + "Deleted: [%s], New: [%s]",
-              currentSnapshotId,
-              state.getDeletedSnapshots().stream()
-                  .map(s -> Long.toString(s.snapshotId()))
-                  .collect(Collectors.joining(", ")),
-              state.getNewSnapshots().stream()
-                  .map(s -> Long.toString(s.snapshotId()))
-                  .collect(Collectors.joining(", "))));
-    }
-  }
-
-  /**
-   * Validate no ambiguous commits (multiple branches pointing to same snapshot in one commit).
-   * Package-private for testing.
-   */
-  void validateNoAmbiguousCommits(SnapshotState state) {
-    Map<Long, List<String>> snapshotToBranches =
-        state.getBranchUpdates().entrySet().stream()
-            .collect(
-                Collectors.groupingBy(
-                    e -> e.getValue().snapshotId(),
-                    Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
-
-    snapshotToBranches.forEach(
-        (snapshotId, branches) -> {
-          if (branches.size() > 1) {
-            throw new InvalidIcebergSnapshotException(
-                String.format(
-                    "Ambiguous commit: snapshot %s is referenced by multiple branches [%s] in a single commit. "
-                        + "Each snapshot can only be referenced by one branch per commit.",
-                    snapshotId, String.join(", ", branches)));
-          }
-        });
-  }
-
-  /**
-   * Validate that deleted snapshots are not referenced by any branches or tags. Package-private for
-   * testing.
-   */
-  void validateDeletedSnapshotsNotReferenced(SnapshotState state) {
-    Set<Long> deletedIds =
-        state.getDeletedSnapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-
-    Map<Long, List<String>> referencedIdsToRefs =
-        state.getProvidedRefs().entrySet().stream()
-            .collect(
-                Collectors.groupingBy(
-                    e -> e.getValue().snapshotId(),
-                    Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
-
-    Map<Long, List<String>> invalidDeletes =
-        deletedIds.stream()
-            .filter(referencedIdsToRefs::containsKey)
-            .collect(Collectors.toMap(id -> id, referencedIdsToRefs::get));
-
-    if (!invalidDeletes.isEmpty()) {
-      String details =
-          invalidDeletes.entrySet().stream()
-              .map(
-                  e ->
-                      String.format(
-                          "snapshot %s (referenced by: %s)",
-                          e.getKey(), String.join(", ", e.getValue())))
-              .collect(Collectors.joining("; "));
-      throw new InvalidIcebergSnapshotException(
-          String.format(
-              "Cannot delete snapshots that are still referenced by branches/tags: %s", details));
-    }
-  }
-
-  /**
-   * Validate individual snapshots using existing SnapshotInspector. Package-private for testing.
-   */
-  void validateIndividualSnapshots(SnapshotState state) {
-    state
-        .getNewSnapshots()
-        .forEach(
-            snapshot -> {
-              if (snapshotInspector != null) {
-                snapshotInspector.validateSnapshot(snapshot);
-              }
-            });
-  }
-
-  /**
-   * Stage 6: Apply state changes to create TableMetadata builder. Returns builder (not built) to
-   * allow metric properties to be added before the final build, preserving lastUpdatedMillis.
-   *
-   * <p>This method uses Iceberg's proper APIs: - removeSnapshots() to delete snapshots -
-   * addSnapshot() to add new snapshots - setBranchSnapshot() to set branch references
-   *
-   * <p>The order of operations matters: 1. Start with base metadata (buildFrom copies all existing
-   * state) 2. Remove deleted snapshots first (using proper removeSnapshots API) 3. Remove stale
-   * branch references 4. Add new snapshots and set branch pointers
-   *
-   * @return Builder with all snapshot changes applied but not yet built
-   */
-  private TableMetadata.Builder applyStateChanges(TableMetadata metadata, SnapshotState state) {
-    TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
-
-    // Step 1: Remove deleted snapshots using proper Iceberg API
-    if (!state.getDeletedSnapshots().isEmpty()) {
-      Set<Long> deletedIds =
-          state.getDeletedSnapshots().stream()
-              .map(Snapshot::snapshotId)
-              .collect(Collectors.toSet());
-      builder.removeSnapshots(deletedIds);
-    }
-
-    // Step 2: Remove stale branch references (branches that are no longer in provided refs)
-    Set<String> providedRefNames = state.getProvidedRefs().keySet();
-    metadata.refs().keySet().stream()
-        .filter(refName -> !providedRefNames.contains(refName))
-        .forEach(builder::removeRef);
-
-    // Step 3: Identify existing snapshots (after deletions)
-    Set<Long> existingSnapshotIds =
-        metadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-    Set<Long> deletedIds =
-        state.getDeletedSnapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-    existingSnapshotIds.removeAll(deletedIds);
-
-    // Step 4: Identify snapshots referenced by branches
-    Set<Long> referencedByBranches =
-        state.getProvidedRefs().values().stream()
-            .map(SnapshotRef::snapshotId)
-            .collect(Collectors.toSet());
-
-    // Step 5: Add unreferenced new snapshots (referenced ones are added via setBranchSnapshot)
-    state.getProvidedSnapshots().stream()
-        .filter(s -> !existingSnapshotIds.contains(s.snapshotId()))
-        .filter(s -> !referencedByBranches.contains(s.snapshotId()))
-        .forEach(builder::addSnapshot);
-
-    // Step 6: Set branch pointers for all provided refs
-    state
-        .getProvidedRefs()
-        .forEach(
-            (branchName, ref) -> {
-              Snapshot snapshot =
-                  state.getProvidedSnapshots().stream()
-                      .filter(s -> s.snapshotId() == ref.snapshotId())
-                      .findFirst()
-                      .orElseThrow(
-                          () ->
-                              new InvalidIcebergSnapshotException(
-                                  String.format(
-                                      "Branch %s references non-existent snapshot %s",
-                                      branchName, ref.snapshotId())));
-
-              if (existingSnapshotIds.contains(snapshot.snapshotId())) {
-                // Snapshot already exists - just update the branch pointer if needed
-                SnapshotRef existingRef = metadata.refs().get(branchName);
-                if (existingRef == null || existingRef.snapshotId() != ref.snapshotId()) {
-                  builder.setRef(branchName, ref);
-                }
-              } else {
-                // Snapshot is new - setBranchSnapshot will add it and set the branch pointer
-                builder.setBranchSnapshot(snapshot, branchName);
-              }
-            });
-
-    return builder;
-  }
-
-  /**
-   * Stage 7: Add metric properties to builder. Returns the builder for final build in
-   * applySnapshots. This allows the single build to preserve lastUpdatedMillis from snapshot
-   * operations.
-   *
-   * @param builder Builder with snapshot changes already applied
-   * @param state Snapshot state containing metrics to record
-   * @return Builder with metric properties added, ready to be built
-   */
-  private TableMetadata.Builder recordMetrics(TableMetadata.Builder builder, SnapshotState state) {
-    // Emit metrics to reporter
-    if (state.getAppendedCount() > 0) {
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, state.getAppendedCount());
-    }
-    if (state.getStagedCount() > 0) {
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, state.getStagedCount());
-    }
-    if (state.getCherryPickedCount() > 0) {
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, state.getCherryPickedCount());
-    }
-    if (state.getDeletedCount() > 0) {
-      metricsReporter.count(
-          InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, state.getDeletedCount());
-    }
-
-    // Helper to format snapshot IDs as comma-separated string
-    java.util.function.Function<List<Snapshot>, String> formatIds =
-        snapshots ->
-            snapshots.stream()
-                .map(s -> Long.toString(s.snapshotId()))
-                .collect(Collectors.joining(","));
-
-    // Record categorization metrics as comma-separated snapshot IDs
-    if (!state.getRegularSnapshots().isEmpty()) {
-      List<Snapshot> newRegularSnapshots =
-          state.getRegularSnapshots().stream()
-              .filter(s -> state.getNewSnapshots().contains(s))
-              .collect(Collectors.toList());
-      if (!newRegularSnapshots.isEmpty()) {
-        builder.setProperties(
-            Collections.singletonMap(
-                getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-                formatIds.apply(newRegularSnapshots)));
-      }
-    }
-    if (!state.getWapSnapshots().isEmpty()) {
-      builder.setProperties(
-          Collections.singletonMap(
-              getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-              formatIds.apply(state.getWapSnapshots())));
-    }
-    if (!state.getCherryPickedSnapshots().isEmpty()) {
-      builder.setProperties(
-          Collections.singletonMap(
-              getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-              formatIds.apply(state.getCherryPickedSnapshots())));
-    }
-    if (!state.getDeletedSnapshots().isEmpty()) {
-      builder.setProperties(
-          Collections.singletonMap(
-              getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
-              formatIds.apply(state.getDeletedSnapshots())));
-    }
-
-    // Remove the transient snapshot keys from properties
-    builder.removeProperties(
-        Sets.newHashSet(CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY));
-
-    return builder;
-  }
-
-  // ==================== End Functional Snapshot Application Pipeline ====================
-
   /**
    * If this commit comes from Iceberg built-in retry in
    * org.apache.iceberg.PropertiesUpdate#commit() Then throw fatal {@link CommitFailedException} to
diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
new file mode 100644
index 000000000..347cf4f7d
--- /dev/null
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -0,0 +1,468 @@
+package com.linkedin.openhouse.internal.catalog;
+
+import static com.linkedin.openhouse.internal.catalog.mapper.HouseTableSerdeUtils.getCanonicalFieldName;
+
+import com.google.common.collect.Sets;
+import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
+import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import lombok.AllArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotRef;
+import org.apache.iceberg.SnapshotSummary;
+import org.apache.iceberg.TableMetadata;
+import org.apache.iceberg.relocated.com.google.common.base.Objects;
+
+/**
+ * Service responsible for applying snapshot changes to Iceberg table metadata.
+ *
+ * <p>This class handles the complex logic of computing snapshot diffs, validating changes, and
+ * applying them to table metadata. It supports various snapshot operations including:
+ *
+ * <ul>
+ *   <li>Adding new snapshots (regular commits)
+ *   <li>Staging snapshots (WAP - Write-Audit-Publish)
+ *   <li>Cherry-picking snapshots across branches
+ *   <li>Deleting snapshots
+ *   <li>Updating branch references
+ * </ul>
+ *
+ * <p>The service performs comprehensive validation to ensure data integrity and prevent invalid
+ * operations such as deleting referenced snapshots or creating ambiguous branch references.
+ */
+@AllArgsConstructor
+@Slf4j
+public class SnapshotDiffApplier {
+
+  private final MetricsReporter metricsReporter;
+
+  /**
+   * Applies snapshot updates from metadata properties. Simple and clear: parse input, compute diff,
+   * validate, apply, record metrics, build.
+   *
+   * @param base The base table metadata (may be null for table creation)
+   * @param metadata The new metadata with properties containing snapshot updates
+   * @return Updated metadata with snapshots applied
+   */
+  public TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata) {
+    String snapshotsJson = metadata.properties().get(CatalogConstants.SNAPSHOTS_JSON_KEY);
+    if (snapshotsJson == null) {
+      return metadata;
+    }
+
+    // Parse input
+    List<Snapshot> providedSnapshots = SnapshotsUtil.parseSnapshots(null, snapshotsJson);
+    Map<String, SnapshotRef> providedRefs =
+        Optional.ofNullable(metadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
+            .map(SnapshotsUtil::parseSnapshotRefs)
+            .orElse(Collections.emptyMap());
+
+    List<Snapshot> existingSnapshots = base != null ? base.snapshots() : Collections.emptyList();
+    Map<String, SnapshotRef> existingRefs = base != null ? base.refs() : Collections.emptyMap();
+
+    // Compute diff (all maps created once in constructor)
+    SnapshotDiff diff =
+        new SnapshotDiff(providedSnapshots, providedRefs, existingSnapshots, existingRefs);
+
+    // Validate, apply, record metrics, build
+    diff.validate(base);
+    TableMetadata.Builder builder = diff.applyTo(metadata);
+    diff.recordMetrics(builder);
+    return builder.build();
+  }
+
+  /**
+   * State object that computes and caches all snapshot analysis. Computes all maps once in the
+   * constructor to avoid redundant operations. Provides clear methods for validation and
+   * application.
+   */
+  private class SnapshotDiff {
+    // Input state
+    private final List<Snapshot> providedSnapshots;
+    private final Map<String, SnapshotRef> providedRefs;
+    private final List<Snapshot> existingSnapshots;
+    private final Map<String, SnapshotRef> existingRefs;
+
+    // Computed maps (created once)
+    private final Map<Long, Snapshot> providedById;
+    private final Map<Long, Snapshot> existingById;
+    private final Set<Long> existingBranchIds;
+    private final Set<Long> providedBranchIds;
+
+    // Categorized snapshots
+    private final List<Snapshot> wapSnapshots;
+    private final List<Snapshot> cherryPickedSnapshots;
+    private final List<Snapshot> regularSnapshots;
+
+    // Changes
+    private final List<Snapshot> newSnapshots;
+    private final List<Snapshot> deletedSnapshots;
+    private final Map<String, SnapshotRef> branchUpdates;
+
+    SnapshotDiff(
+        List<Snapshot> providedSnapshots,
+        Map<String, SnapshotRef> providedRefs,
+        List<Snapshot> existingSnapshots,
+        Map<String, SnapshotRef> existingRefs) {
+      this.providedSnapshots = providedSnapshots;
+      this.providedRefs = providedRefs;
+      this.existingSnapshots = existingSnapshots;
+      this.existingRefs = existingRefs;
+
+      // Compute all maps once
+      this.providedById =
+          providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
+      this.existingById =
+          existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
+      this.existingBranchIds =
+          existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
+      this.providedBranchIds =
+          providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
+
+      // Compute categorization (order matters: cherry-picked filters WAP)
+      List<Snapshot> initialWapSnapshots = computeWapSnapshots();
+      this.cherryPickedSnapshots = computeCherryPickedSnapshots();
+      this.wapSnapshots = filterWapFromCherryPicked(initialWapSnapshots);
+      this.regularSnapshots = computeRegularSnapshots();
+
+      // Compute changes
+      this.newSnapshots =
+          providedSnapshots.stream()
+              .filter(s -> !existingById.containsKey(s.snapshotId()))
+              .collect(Collectors.toList());
+      this.deletedSnapshots =
+          existingSnapshots.stream()
+              .filter(s -> !providedById.containsKey(s.snapshotId()))
+              .collect(Collectors.toList());
+      this.branchUpdates = computeBranchUpdates();
+    }
+
+    private List<Snapshot> computeWapSnapshots() {
+      Set<Long> allBranchIds =
+          java.util.stream.Stream.concat(existingBranchIds.stream(), providedBranchIds.stream())
+              .collect(Collectors.toSet());
+
+      return providedSnapshots.stream()
+          .filter(
+              s ->
+                  s.summary() != null
+                      && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)
+                      && !allBranchIds.contains(s.snapshotId()))
+          .collect(Collectors.toList());
+    }
+
+    private List<Snapshot> computeCherryPickedSnapshots() {
+      Set<Long> cherryPickSourceIds =
+          providedSnapshots.stream()
+              .filter(s -> s.summary() != null && s.summary().containsKey("source-snapshot-id"))
+              .map(s -> Long.parseLong(s.summary().get("source-snapshot-id")))
+              .collect(Collectors.toSet());
+
+      return providedSnapshots.stream()
+          .filter(
+              provided -> {
+                Snapshot existing = existingById.get(provided.snapshotId());
+                if (existing == null) {
+                  return false;
+                }
+
+                // Parent changed (moved to different branch)
+                if (!Objects.equal(provided.parentId(), existing.parentId())) {
+                  return true;
+                }
+
+                // Is source of cherry-pick
+                if (cherryPickSourceIds.contains(provided.snapshotId())) {
+                  return true;
+                }
+
+                // WAP snapshot being published (staged → branch)
+                boolean hasWapId =
+                    provided.summary() != null
+                        && provided.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
+                boolean wasStaged = !existingBranchIds.contains(provided.snapshotId());
+                boolean isNowOnBranch = providedBranchIds.contains(provided.snapshotId());
+                return hasWapId && wasStaged && isNowOnBranch;
+              })
+          .collect(Collectors.toList());
+    }
+
+    private List<Snapshot> filterWapFromCherryPicked(List<Snapshot> initialWapSnapshots) {
+      Set<Long> cherryPickedIds =
+          cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+      return initialWapSnapshots.stream()
+          .filter(s -> !cherryPickedIds.contains(s.snapshotId()))
+          .collect(Collectors.toList());
+    }
+
+    private List<Snapshot> computeRegularSnapshots() {
+      Set<Long> excludedIds =
+          java.util.stream.Stream.concat(
+                  wapSnapshots.stream().map(Snapshot::snapshotId),
+                  cherryPickedSnapshots.stream().map(Snapshot::snapshotId))
+              .collect(Collectors.toSet());
+
+      return providedSnapshots.stream()
+          .filter(s -> !excludedIds.contains(s.snapshotId()))
+          .collect(Collectors.toList());
+    }
+
+    private Map<String, SnapshotRef> computeBranchUpdates() {
+      return providedRefs.entrySet().stream()
+          .filter(
+              entry -> {
+                SnapshotRef existing = existingRefs.get(entry.getKey());
+                return existing == null || existing.snapshotId() != entry.getValue().snapshotId();
+              })
+          .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+    }
+
+    /**
+     * Validates all snapshot changes before applying them to table metadata. Runs multiple
+     * validation checks to ensure snapshot operations are safe and consistent.
+     *
+     * @param base The base table metadata to validate against (may be null for table creation)
+     * @throws InvalidIcebergSnapshotException if any validation check fails
+     */
+    void validate(TableMetadata base) {
+      validateCurrentSnapshotNotDeleted(base);
+      validateNoAmbiguousCommits();
+      validateDeletedSnapshotsNotReferenced();
+    }
+
+    /**
+     * Validates that the current snapshot is not deleted without providing replacement snapshots.
+     * This prevents leaving the table in an inconsistent state where the current snapshot pointer
+     * would reference a non-existent snapshot.
+     *
+     * @param base The base table metadata containing the current snapshot (may be null for table
+     *     creation)
+     * @throws InvalidIcebergSnapshotException if the current snapshot is being deleted without
+     *     replacements
+     */
+    private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
+      if (base == null || base.currentSnapshot() == null) {
+        return;
+      }
+
+      long currentSnapshotId = base.currentSnapshot().snapshotId();
+      boolean currentDeleted =
+          deletedSnapshots.stream().anyMatch(s -> s.snapshotId() == currentSnapshotId);
+
+      if (currentDeleted && newSnapshots.isEmpty()) {
+        throw new InvalidIcebergSnapshotException(
+            String.format(
+                "Cannot delete the current snapshot %s without adding replacement snapshots. "
+                    + "Deleted: [%s], New: [%s]",
+                currentSnapshotId,
+                deletedSnapshots.stream()
+                    .map(s -> Long.toString(s.snapshotId()))
+                    .collect(Collectors.joining(", ")),
+                newSnapshots.stream()
+                    .map(s -> Long.toString(s.snapshotId()))
+                    .collect(Collectors.joining(", "))));
+      }
+    }
+
+    /**
+     * Validates that no single snapshot is referenced by multiple branches in the same commit. This
+     * prevents ambiguous commits where it's unclear which branch should be the primary reference
+     * for a snapshot. Each snapshot can only be associated with one branch per commit to maintain
+     * clear lineage and avoid conflicts.
+     *
+     * @throws InvalidIcebergSnapshotException if a snapshot is referenced by multiple branches
+     */
+    private void validateNoAmbiguousCommits() {
+      Map<Long, List<String>> snapshotToBranches =
+          branchUpdates.entrySet().stream()
+              .collect(
+                  Collectors.groupingBy(
+                      e -> e.getValue().snapshotId(),
+                      Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
+
+      snapshotToBranches.forEach(
+          (snapshotId, branches) -> {
+            if (branches.size() > 1) {
+              throw new InvalidIcebergSnapshotException(
+                  String.format(
+                      "Ambiguous commit: snapshot %s is referenced by multiple branches [%s] in a single commit. "
+                          + "Each snapshot can only be referenced by one branch per commit.",
+                      snapshotId, String.join(", ", branches)));
+            }
+          });
+    }
+
+    /**
+     * Validates that snapshots being deleted are not still referenced by any branches or tags. This
+     * prevents data loss and maintains referential integrity by ensuring that all branch and tag
+     * pointers reference valid snapshots that will continue to exist after the commit.
+     *
+     * @throws InvalidIcebergSnapshotException if any deleted snapshot is still referenced by a
+     *     branch or tag
+     */
+    private void validateDeletedSnapshotsNotReferenced() {
+      Set<Long> deletedIds =
+          deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+
+      Map<Long, List<String>> referencedIdsToRefs =
+          providedRefs.entrySet().stream()
+              .collect(
+                  Collectors.groupingBy(
+                      e -> e.getValue().snapshotId(),
+                      Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
+
+      List<String> invalidDeleteDetails =
+          deletedIds.stream()
+              .filter(referencedIdsToRefs::containsKey)
+              .map(
+                  id ->
+                      String.format(
+                          "snapshot %s (referenced by: %s)",
+                          id, String.join(", ", referencedIdsToRefs.get(id))))
+              .collect(Collectors.toList());
+
+      if (!invalidDeleteDetails.isEmpty()) {
+        throw new InvalidIcebergSnapshotException(
+            String.format(
+                "Cannot delete snapshots that are still referenced by branches/tags: %s",
+                String.join("; ", invalidDeleteDetails)));
+      }
+    }
+
+    TableMetadata.Builder applyTo(TableMetadata metadata) {
+      TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
+
+      // Remove deleted snapshots
+      if (!deletedSnapshots.isEmpty()) {
+        Set<Long> deletedIds =
+            deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+        builder.removeSnapshots(deletedIds);
+      }
+
+      // Remove stale branch references
+      metadata.refs().keySet().stream()
+          .filter(refName -> !providedRefs.containsKey(refName))
+          .forEach(builder::removeRef);
+
+      // Track existing snapshot IDs after deletions
+      Set<Long> existingAfterDeletion =
+          metadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+      deletedSnapshots.forEach(s -> existingAfterDeletion.remove(s.snapshotId()));
+
+      // Add unreferenced new snapshots
+      providedSnapshots.stream()
+          .filter(
+              s ->
+                  !existingAfterDeletion.contains(s.snapshotId())
+                      && !providedBranchIds.contains(s.snapshotId()))
+          .forEach(builder::addSnapshot);
+
+      // Set branch pointers
+      providedRefs.forEach(
+          (branchName, ref) -> {
+            Snapshot snapshot = providedById.get(ref.snapshotId());
+            if (snapshot == null) {
+              throw new InvalidIcebergSnapshotException(
+                  String.format(
+                      "Branch %s references non-existent snapshot %s",
+                      branchName, ref.snapshotId()));
+            }
+
+            if (existingAfterDeletion.contains(snapshot.snapshotId())) {
+              SnapshotRef existingRef = metadata.refs().get(branchName);
+              if (existingRef == null || existingRef.snapshotId() != ref.snapshotId()) {
+                builder.setRef(branchName, ref);
+              }
+            } else {
+              builder.setBranchSnapshot(snapshot, branchName);
+            }
+          });
+
+      return builder;
+    }
+
+    void recordMetrics(TableMetadata.Builder builder) {
+      int appendedCount =
+          (int)
+              regularSnapshots.stream()
+                  .filter(s -> !existingById.containsKey(s.snapshotId()))
+                  .count();
+      int stagedCount = wapSnapshots.size();
+      int cherryPickedCount = cherryPickedSnapshots.size();
+      int deletedCount = deletedSnapshots.size();
+
+      if (appendedCount > 0) {
+        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedCount);
+      }
+      if (stagedCount > 0) {
+        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedCount);
+      }
+      if (cherryPickedCount > 0) {
+        metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, cherryPickedCount);
+      }
+      if (deletedCount > 0) {
+        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, deletedCount);
+      }
+
+      // Record snapshot IDs in properties
+      List<Snapshot> newRegularSnapshots =
+          regularSnapshots.stream().filter(newSnapshots::contains).collect(Collectors.toList());
+      if (!newRegularSnapshots.isEmpty()) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
+                formatSnapshotIds(newRegularSnapshots)));
+      }
+      if (!wapSnapshots.isEmpty()) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
+                formatSnapshotIds(wapSnapshots)));
+      }
+      if (!cherryPickedSnapshots.isEmpty()) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
+                formatSnapshotIds(cherryPickedSnapshots)));
+      }
+      if (!deletedSnapshots.isEmpty()) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
+                formatSnapshotIds(deletedSnapshots)));
+      }
+
+      builder.removeProperties(
+          Sets.newHashSet(
+              CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY));
+    }
+  }
+
+  /**
+   * Formats a list of snapshots as a comma-separated string of snapshot IDs. Optimized
+   * implementation using StringBuilder for better performance with large lists.
+   *
+   * @param snapshots List of snapshots to format
+   * @return Comma-separated string of snapshot IDs, or empty string if list is empty
+   */
+  private String formatSnapshotIds(List<Snapshot> snapshots) {
+    if (snapshots.isEmpty()) {
+      return "";
+    }
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < snapshots.size(); i++) {
+      if (i > 0) {
+        sb.append(',');
+      }
+      sb.append(snapshots.get(i).snapshotId());
+    }
+    return sb.toString();
+  }
+}
diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotInspector.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotInspector.java
deleted file mode 100644
index dc7dd06c2..000000000
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotInspector.java
+++ /dev/null
@@ -1,96 +0,0 @@
-package com.linkedin.openhouse.internal.catalog;
-
-import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
-import java.io.UncheckedIOException;
-import java.util.List;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import java.util.stream.StreamSupport;
-import org.apache.hadoop.fs.Path;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.DeleteFile;
-import org.apache.iceberg.ManifestFile;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.TableMetadata;
-import org.apache.iceberg.io.FileIO;
-import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.stereotype.Component;
-
-/**
- * A inspector class providing functionalities that inspect components of {@link Snapshot} provided
- * by clients and decide if OpenHouse need to take additional steps to incorporate it or decide
- * whether to incorporate at all.
- *
- * <p>Instance of this class will be injected into {@link OpenHouseInternalTableOperations} in
- * runtime.
- */
-@Component
-public class SnapshotInspector {
-  @Autowired private Consumer<Supplier<Path>> fileSecurer;
-  /**
-   * TODO: ADD Validation for snapshot: Sequence-number based, schema-id based, see iceberg spec for
-   * details. Throwing exceptions when failures occurred.
-   *
-   * @param providedSnapshot deserialized {@link Snapshot} object that clients provided.
-   * @throws InvalidIcebergSnapshotException Exception thrown from the process validating the
-   *     snapshot provided by client.
-   */
-  void validateSnapshot(Snapshot providedSnapshot) throws InvalidIcebergSnapshotException {
-    // TODO: Fill this method.
-  }
-
-  void validateSnapshotsUpdate(
-      TableMetadata metadata, List<Snapshot> addedSnapshots, List<Snapshot> deletedSnapshots) {
-    if (metadata.currentSnapshot() == null) {
-      // no need to verify attempt to delete current snapshot if it doesn't exist
-      // deletedSnapshots is necessarily empty when original snapshots list is empty
-      return;
-    }
-    if (!addedSnapshots.isEmpty()) {
-      // latest snapshot can be deleted if new snapshots are added.
-      return;
-    }
-    long latestSnapshotId = metadata.currentSnapshot().snapshotId();
-    if (!deletedSnapshots.isEmpty()
-        && deletedSnapshots.get(deletedSnapshots.size() - 1).snapshotId() == latestSnapshotId) {
-      throw new InvalidIcebergSnapshotException(
-          String.format("Cannot delete the latest snapshot %s", latestSnapshotId));
-    }
-  }
-
-  /**
-   * A sister method to {@link #validateSnapshot(Snapshot)} that change the file-level permission to
-   * be OpenHouse exclusive to avoid unexpected changes from unauthorized parties. Throwing
-   * exceptions when failures occurred.
-   *
-   * @param providedSnapshot deserialized {@link Snapshot} object that clients provided.
-   * @param fileIO {@link FileIO} object
-   * @throws UncheckedIOException Exception thrown from the process securing the files associated
-   *     with {@param providedSnapshot}.
-   */
-  @VisibleForTesting
-  void secureSnapshot(Snapshot providedSnapshot, FileIO fileIO) throws UncheckedIOException {
-    secureDataFile(providedSnapshot.addedDataFiles(fileIO));
-    secureDeleteFile(providedSnapshot.addedDeleteFiles(fileIO));
-    secureManifestFile(providedSnapshot.allManifests(fileIO));
-  }
-
-  private void secureDataFile(Iterable<DataFile> dataFiles) {
-    StreamSupport.stream(dataFiles.spliterator(), false)
-        .map(x -> (Supplier<Path>) (() -> new Path(x.path().toString())))
-        .forEach(fileSecurer);
-  }
-
-  private void secureDeleteFile(Iterable<DeleteFile> deleteFiles) {
-    StreamSupport.stream(deleteFiles.spliterator(), false)
-        .map(x -> (Supplier<Path>) (() -> new Path(x.path().toString())))
-        .forEach(fileSecurer);
-  }
-
-  private void secureManifestFile(List<ManifestFile> manifestFiles) throws UncheckedIOException {
-    manifestFiles.stream()
-        .map(x -> (Supplier<Path>) (() -> new Path(x.path())))
-        .forEach(fileSecurer);
-  }
-}
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index f514ed162..c5c186eb5 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -103,26 +103,31 @@ void setup() {
     Mockito.when(mockHouseTableMapper.toHouseTable(Mockito.any(TableMetadata.class), Mockito.any()))
         .thenReturn(mockHouseTable);
     HadoopFileIO fileIO = new HadoopFileIO(new Configuration());
+    MetricsReporter metricsReporter =
+        new MetricsReporter(new SimpleMeterRegistry(), "TEST_CATALOG", Lists.newArrayList());
+    SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(metricsReporter);
     openHouseInternalTableOperations =
         new OpenHouseInternalTableOperations(
             mockHouseTableRepository,
             fileIO,
-            Mockito.mock(SnapshotInspector.class),
             mockHouseTableMapper,
             TEST_TABLE_IDENTIFIER,
-            new MetricsReporter(new SimpleMeterRegistry(), "TEST_CATALOG", Lists.newArrayList()),
-            fileIOManager);
+            metricsReporter,
+            fileIOManager,
+            snapshotDiffApplier);
 
     // Create a separate instance with mock metrics reporter for testing metrics
+    SnapshotDiffApplier snapshotDiffApplierWithMockMetrics =
+        new SnapshotDiffApplier(mockMetricsReporter);
     openHouseInternalTableOperationsWithMockMetrics =
         new OpenHouseInternalTableOperations(
             mockHouseTableRepository,
             fileIO,
-            Mockito.mock(SnapshotInspector.class),
             mockHouseTableMapper,
             TEST_TABLE_IDENTIFIER,
             mockMetricsReporter,
-            fileIOManager);
+            fileIOManager,
+            snapshotDiffApplierWithMockMetrics);
 
     LocalStorage localStorage = mock(LocalStorage.class);
     when(fileIOManager.getStorage(fileIO)).thenReturn(localStorage);
@@ -677,7 +682,9 @@ void testAppendSnapshotsWithOldSnapshots() throws IOException {
 
     Assertions.assertThrows(
         IllegalArgumentException.class,
-        () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata));
+        () ->
+            openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                baseMetadata, newMetadata));
 
     // the latest snapshots have larger timestamp than the previous metadata timestamp, so it should
     // pass the validation
@@ -691,7 +698,8 @@ void testAppendSnapshotsWithOldSnapshots() throws IOException {
             IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
 
     TableMetadata newMetadataWithFuture = baseMetadata.replaceProperties(propertiesWithFuture);
-    openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadataWithFuture);
+    openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+        baseMetadata, newMetadataWithFuture);
   }
 
   /**
@@ -1136,17 +1144,19 @@ private void testMetricIncludesDatabaseTag(
     SimpleMeterRegistry meterRegistry = new SimpleMeterRegistry();
     MetricsReporter realMetricsReporter =
         new MetricsReporter(meterRegistry, "TEST_CATALOG", Lists.newArrayList());
+    HadoopFileIO fileIO = new HadoopFileIO(new Configuration());
+    SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(realMetricsReporter);
 
     // Create instance with real metrics reporter
     OpenHouseInternalTableOperations operationsWithRealMetrics =
         new OpenHouseInternalTableOperations(
             mockHouseTableRepository,
-            new HadoopFileIO(new Configuration()),
-            Mockito.mock(SnapshotInspector.class),
+            fileIO,
             mockHouseTableMapper,
             TEST_TABLE_IDENTIFIER,
             realMetricsReporter,
-            fileIOManager);
+            fileIOManager,
+            snapshotDiffApplier);
 
     // Setup test-specific mocks
     setupFunction.accept(operationsWithRealMetrics);
@@ -1199,17 +1209,19 @@ private void testMetricHasHistogramBuckets(
 
     MetricsReporter realMetricsReporter =
         new MetricsReporter(meterRegistry, "TEST_CATALOG", Lists.newArrayList());
+    HadoopFileIO fileIO = new HadoopFileIO(new Configuration());
+    SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(realMetricsReporter);
 
     // Create instance with real metrics reporter
     OpenHouseInternalTableOperations operationsWithRealMetrics =
         new OpenHouseInternalTableOperations(
             mockHouseTableRepository,
-            new HadoopFileIO(new Configuration()),
-            Mockito.mock(SnapshotInspector.class),
+            fileIO,
             mockHouseTableMapper,
             TEST_TABLE_IDENTIFIER,
             realMetricsReporter,
-            fileIOManager);
+            fileIOManager,
+            snapshotDiffApplier);
 
     // Setup test-specific mocks
     setupFunction.accept(operationsWithRealMetrics);
@@ -1380,7 +1392,9 @@ void testDeleteSnapshotWithMainReference() throws IOException {
     InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
             InvalidIcebergSnapshotException.class,
-            () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+            () ->
+                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                    baseMetadata, newMetadata),
             "Should throw InvalidIcebergSnapshotException when trying to delete referenced snapshot");
 
     // Verify error message mentions the reference
@@ -1427,7 +1441,8 @@ void testDeleteSnapshotWithNoReference() throws IOException {
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     TableMetadata result =
-        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
+        openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+            baseMetadata, newMetadata);
 
     // Verify unreferenced snapshots were removed
     List<Snapshot> unreferencedSnapshots = testSnapshots.subList(0, 2);
@@ -1508,7 +1523,9 @@ void testDeleteSnapshotWithMultipleReference() throws IOException {
     InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
             InvalidIcebergSnapshotException.class,
-            () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+            () ->
+                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                    baseMetadata, newMetadata),
             "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by multiple branches");
 
     // Verify error message mentions the snapshot is still referenced
@@ -1571,7 +1588,9 @@ void testDeleteSnapshotWithBranchReference() throws IOException {
     InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
             InvalidIcebergSnapshotException.class,
-            () -> openHouseInternalTableOperations.applySnapshots(finalBaseMetadata, newMetadata),
+            () ->
+                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                    finalBaseMetadata, newMetadata),
             "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by tag");
 
     // Verify error message mentions tag reference
@@ -1616,7 +1635,8 @@ void testDeleteEmptySnapshotList() throws IOException {
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     TableMetadata result =
-        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
+        openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+            baseMetadata, newMetadata);
 
     // Verify no changes were made
     Assertions.assertEquals(
@@ -1661,7 +1681,8 @@ void testDeleteNullSnapshotList() throws IOException {
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     TableMetadata result =
-        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
+        openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+            baseMetadata, newMetadata);
 
     // Verify no changes were made
     Assertions.assertEquals(
@@ -1710,7 +1731,8 @@ void testDeleteNonExistentSnapshot() throws IOException {
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     TableMetadata result =
-        openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata);
+        openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+            baseMetadata, newMetadata);
 
     // Verify original snapshots are unchanged
     Assertions.assertEquals(
@@ -1755,7 +1777,8 @@ void testDeleteSnapshotMetricsRecorded() throws IOException {
     TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
 
     // Use the operations instance with mock metrics reporter
-    openHouseInternalTableOperationsWithMockMetrics.applySnapshots(finalBaseMetadata, newMetadata);
+    openHouseInternalTableOperationsWithMockMetrics.snapshotDiffApplier.applySnapshots(
+        finalBaseMetadata, newMetadata);
 
     // Verify metrics were recorded
     Mockito.verify(mockMetricsReporter)
@@ -1796,7 +1819,8 @@ void testDeleteSnapshotMetricsRecordedBranch() throws IOException {
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
     // Use the operations instance with mock metrics reporter
-    openHouseInternalTableOperationsWithMockMetrics.applySnapshots(baseMetadata, newMetadata);
+    openHouseInternalTableOperationsWithMockMetrics.snapshotDiffApplier.applySnapshots(
+        baseMetadata, newMetadata);
 
     // Verify metrics were recorded for the basic deletion
     Mockito.verify(mockMetricsReporter)
@@ -1842,7 +1866,8 @@ void testDeleteSnapshotMetricsRecordedNonExistent() throws IOException {
     TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
 
     // Use the operations instance with mock metrics reporter
-    openHouseInternalTableOperationsWithMockMetrics.applySnapshots(finalBaseMetadata, newMetadata);
+    openHouseInternalTableOperationsWithMockMetrics.snapshotDiffApplier.applySnapshots(
+        finalBaseMetadata, newMetadata);
 
     // Verify metrics are not recorded for non-existent snapshots (no actual deletion)
     Mockito.verify(mockMetricsReporter, Mockito.never())
@@ -1897,7 +1922,9 @@ void testDeleteAllSnapshotsFailsWhenMainBranchReferenced() throws IOException {
     InvalidIcebergSnapshotException exception =
         Assertions.assertThrows(
             InvalidIcebergSnapshotException.class,
-            () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+            () ->
+                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                    baseMetadata, newMetadata),
             "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by multiple branches");
 
     // Verify error message mentions the snapshot is still referenced
@@ -1941,7 +1968,9 @@ void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
     // This should succeed since no snapshots are referenced by any branch/tag
     TableMetadata result =
         Assertions.assertDoesNotThrow(
-            () -> openHouseInternalTableOperations.applySnapshots(finalBaseMetadata, newMetadata),
+            () ->
+                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                    finalBaseMetadata, newMetadata),
             "Should succeed when deleting all unreferenced snapshots");
 
     // Verify all snapshots were removed from the metadata
@@ -2003,7 +2032,9 @@ void testValidMultipleBranchesWithDifferentSnapshots() throws IOException {
 
     // This should NOT throw an exception
     Assertions.assertDoesNotThrow(
-        () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+        () ->
+            openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                baseMetadata, newMetadata),
         "Should NOT throw exception when branches target different snapshots");
   }
 
@@ -2051,7 +2082,9 @@ void testStandardWAPScenario() throws IOException {
 
     // Should succeed - standard WAP workflow where WAP snapshot becomes the new main
     Assertions.assertDoesNotThrow(
-        () -> openHouseInternalTableOperations.applySnapshots(baseMetadata, newMetadata),
+        () ->
+            openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
+                baseMetadata, newMetadata),
         "Should successfully pull WAP snapshot into main branch");
   }
 
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
new file mode 100644
index 000000000..4fa913b4d
--- /dev/null
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
@@ -0,0 +1,359 @@
+package com.linkedin.openhouse.internal.catalog;
+
+import static com.linkedin.openhouse.internal.catalog.mapper.HouseTableSerdeUtils.getCanonicalFieldName;
+import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.Mockito.*;
+
+import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
+import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import lombok.SneakyThrows;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotRef;
+import org.apache.iceberg.SortOrder;
+import org.apache.iceberg.TableMetadata;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+public class SnapshotDiffApplierTest {
+
+  private SnapshotDiffApplier snapshotDiffApplier;
+  private MetricsReporter mockMetricsReporter;
+  private TableMetadata baseMetadata;
+  private static final String TEST_TABLE_LOCATION = getTempLocation();
+
+  @SneakyThrows
+  private static String getTempLocation() {
+    return Files.createTempDirectory(UUID.randomUUID().toString()).toString();
+  }
+
+  @BeforeEach
+  void setup() {
+    mockMetricsReporter = Mockito.mock(MetricsReporter.class);
+    snapshotDiffApplier = new SnapshotDiffApplier(mockMetricsReporter);
+
+    Schema schema =
+        new Schema(
+            Types.NestedField.required(1, "id", Types.LongType.get()),
+            Types.NestedField.optional(2, "data", Types.StringType.get()));
+
+    baseMetadata =
+        TableMetadata.newTableMetadata(
+            schema,
+            PartitionSpec.unpartitioned(),
+            SortOrder.unsorted(),
+            TEST_TABLE_LOCATION,
+            new HashMap<>());
+  }
+
+  @Test
+  void testApplySnapshots_noSnapshotsJson_returnsUnmodified() {
+    TableMetadata result = snapshotDiffApplier.applySnapshots(null, baseMetadata);
+
+    assertEquals(baseMetadata, result);
+    verifyNoInteractions(mockMetricsReporter);
+  }
+
+  @Test
+  void testApplySnapshots_nullBase_handlesTableCreation() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(null, newMetadata);
+
+    assertNotNull(result);
+    assertEquals(snapshots.size(), result.snapshots().size());
+  }
+
+  @Test
+  void testApplySnapshots_addNewSnapshots_success() throws IOException {
+    List<Snapshot> initialSnapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, initialSnapshots);
+
+    List<Snapshot> allSnapshots = new ArrayList<>(initialSnapshots);
+    allSnapshots.addAll(IcebergTestUtil.getExtraSnapshots());
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+                allSnapshots.get(allSnapshots.size() - 1))));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    assertNotNull(result);
+    assertTrue(result.snapshots().size() > baseWithSnapshots.snapshots().size());
+
+    verify(mockMetricsReporter, atLeastOnce()).count(anyString(), anyDouble());
+  }
+
+  @Test
+  void testValidateCurrentSnapshotNotDeleted_whenCurrentDeleted_throwsException()
+      throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY,
+        SnapshotsUtil.serializedSnapshots(Collections.emptyList()));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(new HashMap<>()));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+
+    InvalidIcebergSnapshotException exception =
+        assertThrows(
+            InvalidIcebergSnapshotException.class,
+            () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+
+    assertTrue(exception.getMessage().contains("Cannot delete the current snapshot"));
+  }
+
+  @Test
+  void testValidateNoAmbiguousCommits_whenSnapshotReferencedByMultipleBranches_throwsException()
+      throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    Snapshot targetSnapshot = snapshots.get(0);
+
+    Map<String, String> snapshotRefs = new HashMap<>();
+    SnapshotRef ref = SnapshotRef.branchBuilder(targetSnapshot.snapshotId()).build();
+    snapshotRefs.put("branch1", org.apache.iceberg.SnapshotRefParser.toJson(ref));
+    snapshotRefs.put("branch2", org.apache.iceberg.SnapshotRefParser.toJson(ref));
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+
+    InvalidIcebergSnapshotException exception =
+        assertThrows(
+            InvalidIcebergSnapshotException.class,
+            () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+
+    assertTrue(exception.getMessage().contains("Ambiguous commit"));
+    assertTrue(exception.getMessage().contains("referenced by multiple branches"));
+  }
+
+  @Test
+  void
+      testValidateDeletedSnapshotsNotReferenced_whenDeletedSnapshotStillReferenced_throwsException()
+          throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    Snapshot snapshotToDelete = snapshots.get(0);
+    List<Snapshot> remainingSnapshots = snapshots.subList(1, snapshots.size());
+
+    Map<String, String> snapshotRefs = new HashMap<>();
+    SnapshotRef ref = SnapshotRef.branchBuilder(snapshotToDelete.snapshotId()).build();
+    snapshotRefs.put(SnapshotRef.MAIN_BRANCH, org.apache.iceberg.SnapshotRefParser.toJson(ref));
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+
+    InvalidIcebergSnapshotException exception =
+        assertThrows(
+            InvalidIcebergSnapshotException.class,
+            () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+
+    assertTrue(exception.getMessage().contains("Cannot delete snapshots"));
+    assertTrue(exception.getMessage().contains("still referenced"));
+  }
+
+  @Test
+  void testApplySnapshots_withWapSnapshots_recordsCorrectMetrics() throws IOException {
+    List<Snapshot> baseSnapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, baseSnapshots);
+
+    List<Snapshot> wapSnapshots = IcebergTestUtil.getWapSnapshots();
+    List<Snapshot> allSnapshots = new ArrayList<>(baseSnapshots);
+    allSnapshots.addAll(wapSnapshots);
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+                baseSnapshots.get(baseSnapshots.size() - 1))));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    assertNotNull(result);
+
+    verify(mockMetricsReporter)
+        .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR), anyDouble());
+  }
+
+  @Test
+  void testApplySnapshots_deleteSnapshots_recordsCorrectMetrics() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    List<Snapshot> remainingSnapshots = snapshots.subList(1, snapshots.size());
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+                remainingSnapshots.get(remainingSnapshots.size() - 1))));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    assertNotNull(result);
+    assertEquals(remainingSnapshots.size(), result.snapshots().size());
+
+    verify(mockMetricsReporter)
+        .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR), eq(1.0));
+  }
+
+  @Test
+  void testApplySnapshots_recordsSnapshotIdsInProperties() throws IOException {
+    List<Snapshot> baseSnapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, baseSnapshots);
+
+    List<Snapshot> newSnapshotsList = IcebergTestUtil.getExtraSnapshots();
+    List<Snapshot> allSnapshots = new ArrayList<>(baseSnapshots);
+    allSnapshots.addAll(newSnapshotsList);
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+                allSnapshots.get(allSnapshots.size() - 1))));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    assertNotNull(result);
+
+    String appendedSnapshots =
+        result.properties().get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS));
+    assertNotNull(appendedSnapshots, "Appended snapshots should be recorded in properties");
+
+    assertTrue(appendedSnapshots.contains(",") || !appendedSnapshots.isEmpty());
+  }
+
+  @Test
+  void testApplySnapshots_removesSnapshotKeysFromProperties() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    properties.put(
+        CatalogConstants.SNAPSHOTS_REFS_KEY,
+        SnapshotsUtil.serializeMap(
+            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(null, newMetadata);
+
+    assertNotNull(result);
+
+    assertFalse(
+        result.properties().containsKey(CatalogConstants.SNAPSHOTS_JSON_KEY),
+        "Snapshots JSON key should be removed from final properties");
+    assertFalse(
+        result.properties().containsKey(CatalogConstants.SNAPSHOTS_REFS_KEY),
+        "Snapshots refs key should be removed from final properties");
+  }
+
+  @Test
+  void testApplySnapshots_branchUpdates_appliesCorrectly() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    Snapshot newBranchTarget = snapshots.get(1);
+    Map<String, String> snapshotRefs =
+        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(newBranchTarget);
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    assertNotNull(result);
+    assertNotNull(result.currentSnapshot());
+    assertEquals(newBranchTarget.snapshotId(), result.currentSnapshot().snapshotId());
+  }
+
+  @Test
+  void testApplySnapshots_multipleBranchUpdates_success() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    Map<String, String> snapshotRefs = new HashMap<>();
+    SnapshotRef mainRef = SnapshotRef.branchBuilder(snapshots.get(0).snapshotId()).build();
+    SnapshotRef devRef = SnapshotRef.branchBuilder(snapshots.get(1).snapshotId()).build();
+    snapshotRefs.put(SnapshotRef.MAIN_BRANCH, org.apache.iceberg.SnapshotRefParser.toJson(mainRef));
+    snapshotRefs.put("dev", org.apache.iceberg.SnapshotRefParser.toJson(devRef));
+
+    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
+
+    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    assertNotNull(result);
+    assertEquals(2, result.refs().size());
+  }
+
+  private TableMetadata addSnapshotsToMetadata(TableMetadata metadata, List<Snapshot> snapshots) {
+    TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
+    for (Snapshot snapshot : snapshots) {
+      builder.addSnapshot(snapshot);
+    }
+    if (!snapshots.isEmpty()) {
+      Snapshot lastSnapshot = snapshots.get(snapshots.size() - 1);
+      SnapshotRef ref = SnapshotRef.branchBuilder(lastSnapshot.snapshotId()).build();
+      builder.setRef(SnapshotRef.MAIN_BRANCH, ref);
+    }
+    return builder.build();
+  }
+}
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotInspectorTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotInspectorTest.java
deleted file mode 100644
index 3fb9ced17..000000000
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotInspectorTest.java
+++ /dev/null
@@ -1,171 +0,0 @@
-package com.linkedin.openhouse.internal.catalog;
-
-import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
-import com.linkedin.openhouse.internal.catalog.mapper.HouseTableMapperTest;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.attribute.FileAttribute;
-import java.nio.file.attribute.PosixFilePermission;
-import java.nio.file.attribute.PosixFilePermissions;
-import java.util.Collections;
-import java.util.List;
-import java.util.Set;
-import java.util.UUID;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.DataFiles;
-import org.apache.iceberg.ManifestFiles;
-import org.apache.iceberg.ManifestWriter;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.SnapshotRef;
-import org.apache.iceberg.TableMetadata;
-import org.apache.iceberg.hadoop.HadoopOutputFile;
-import org.apache.iceberg.io.FileIO;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.types.Types;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.mockito.Mockito;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.context.annotation.Import;
-
-@SpringBootTest
-@Import(HouseTableMapperTest.MockConfiguration.class)
-class SnapshotInspectorTest {
-
-  @Autowired SnapshotInspector snapshotInspector;
-
-  @TempDir static Path tempDir;
-
-  private static final TableMetadata NO_SNAPSHOTS_METADATA =
-      TableMetadata.newTableMetadata(
-          new Schema(
-              Types.NestedField.required(1, "data", Types.StringType.get()),
-              Types.NestedField.required(2, "ts", Types.TimestampType.withoutZone())),
-          PartitionSpec.unpartitioned(),
-          UUID.randomUUID().toString(),
-          ImmutableMap.of());
-
-  @Test
-  void testValidateSnapshotsUpdateWithNoSnapshotMetadata() throws IOException {
-
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-    // No exception since added as well deleted snapshots are allowed to support replication
-    // use case which performs table commit with added and deleted snapshots.
-    Assertions.assertDoesNotThrow(
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                NO_SNAPSHOTS_METADATA, testSnapshots.subList(0, 1), testSnapshots.subList(1, 4)));
-    Assertions.assertDoesNotThrow(
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                NO_SNAPSHOTS_METADATA, testSnapshots, Collections.emptyList()));
-    Assertions.assertDoesNotThrow(
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                NO_SNAPSHOTS_METADATA, Collections.emptyList(), testSnapshots));
-  }
-
-  @Test
-  void testValidateSnapshotsUpdateWithSnapshotMetadata() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-    List<Snapshot> extraTestSnapshots = IcebergTestUtil.getExtraSnapshots();
-    TableMetadata metadataWithSnapshots =
-        TableMetadata.buildFrom(NO_SNAPSHOTS_METADATA)
-            .setBranchSnapshot(testSnapshots.get(testSnapshots.size() - 1), SnapshotRef.MAIN_BRANCH)
-            .build();
-    Assertions.assertDoesNotThrow(
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                metadataWithSnapshots, testSnapshots, Collections.emptyList()));
-    // No validation error if snapshots are added and deleted
-    Assertions.assertDoesNotThrow(
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                metadataWithSnapshots, testSnapshots, testSnapshots));
-    // No validation error if snapshots are added and deleted
-    Assertions.assertDoesNotThrow(
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                metadataWithSnapshots, extraTestSnapshots, testSnapshots));
-    Assertions.assertThrows(
-        InvalidIcebergSnapshotException.class,
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                metadataWithSnapshots, Collections.emptyList(), testSnapshots));
-    Assertions.assertDoesNotThrow(
-        () ->
-            snapshotInspector.validateSnapshotsUpdate(
-                metadataWithSnapshots,
-                Collections.emptyList(),
-                testSnapshots.subList(0, testSnapshots.size() - 1)));
-  }
-
-  @Test
-  void testSecureSnapshot() throws IOException {
-    // The default file attribute that sets the permission as 777 when a file is created.
-    FileAttribute<Set<PosixFilePermission>> attr =
-        PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxrwxrwx"));
-
-    // Mock DataFile and ManifestFile
-    Snapshot mockSnapshot = Mockito.mock(org.apache.iceberg.Snapshot.class);
-    Path tempFile1 = Files.createFile(tempDir.resolve("data1.parquet"), attr);
-    Path tempFile2 = Files.createFile(tempDir.resolve("data2.parquet"), attr);
-    Path tempFile3 = Files.createFile(tempDir.resolve("manifest"), attr);
-
-    // Mock FileIO
-    FileIO fileIO = Mockito.mock(org.apache.iceberg.io.FileIO.class);
-
-    List<DataFile> dataFileList =
-        ImmutableList.of(
-            createDataFile(tempFile1.toString()), createDataFile(tempFile2.toString()));
-
-    ManifestWriter<DataFile> manifestWriter =
-        ManifestFiles.write(
-            PartitionSpec.unpartitioned(),
-            HadoopOutputFile.fromLocation(tempFile3.toString(), new Configuration()));
-    manifestWriter.close();
-
-    Mockito.when(mockSnapshot.allManifests(fileIO))
-        .thenReturn(ImmutableList.of(manifestWriter.toManifestFile()));
-    Mockito.when(mockSnapshot.addedDataFiles(fileIO)).thenReturn(dataFileList);
-    snapshotInspector.secureSnapshot(mockSnapshot, fileIO);
-
-    /* Verify the perms of files are modified as com.linkedin.openhouse.internal.catalog.MockApplication.perm does */
-    FileSystem fileSystem = FileSystem.get(new Configuration());
-    Assertions.assertEquals(
-        fileSystem
-            .getFileStatus(new org.apache.hadoop.fs.Path(tempFile1.toString()))
-            .getPermission(),
-        MockApplication.FS_PERMISSION);
-    Assertions.assertEquals(
-        fileSystem
-            .getFileStatus(new org.apache.hadoop.fs.Path(tempFile2.toString()))
-            .getPermission(),
-        MockApplication.FS_PERMISSION);
-    Assertions.assertEquals(
-        fileSystem
-            .getFileStatus(new org.apache.hadoop.fs.Path(tempFile3.toString()))
-            .getPermission(),
-        MockApplication.FS_PERMISSION);
-  }
-
-  public static DataFile createDataFile(String dataPath) throws IOException {
-    Files.write(Paths.get(dataPath), Lists.newArrayList(), StandardCharsets.UTF_8);
-    return DataFiles.builder(PartitionSpec.unpartitioned())
-        .withPath(dataPath)
-        .withFileSizeInBytes(10)
-        .withRecordCount(1)
-        .build();
-  }
-}
diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java
index 85044da5e..c6073aff7 100644
--- a/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java
+++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java
@@ -8,7 +8,6 @@
 import com.linkedin.openhouse.cluster.storage.StorageManager;
 import com.linkedin.openhouse.common.test.cluster.PropertyOverrideContextInitializer;
 import com.linkedin.openhouse.internal.catalog.OpenHouseInternalTableOperations;
-import com.linkedin.openhouse.internal.catalog.SnapshotInspector;
 import com.linkedin.openhouse.internal.catalog.fileio.FileIOManager;
 import com.linkedin.openhouse.internal.catalog.mapper.HouseTableMapper;
 import com.linkedin.openhouse.internal.catalog.model.HouseTable;
@@ -60,8 +59,6 @@ public class RepositoryTestWithSettableComponents {
 
   @Autowired FileIOManager fileIOManager;
 
-  @Autowired SnapshotInspector snapshotInspector;
-
   @Autowired HouseTableMapper houseTableMapper;
 
   @Autowired MeterRegistry meterRegistry;
@@ -97,15 +94,18 @@ void testNoRetryInternalRepo() {
 
     // construct a real table object to prepare subsequent client call for table-update (that they
     // will fail)
+    MetricsReporter metricsReporter =
+        new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList());
+    SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(metricsReporter);
     OpenHouseInternalTableOperations actualOps =
         new OpenHouseInternalTableOperations(
             houseTablesRepository,
             fileIO,
-            snapshotInspector,
             houseTableMapper,
             tableIdentifier,
-            new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList()),
-            fileIOManager);
+            metricsReporter,
+            fileIOManager,
+            snapshotDiffApplier);
     ((SettableCatalogForTest) catalog).setOperation(actualOps);
     TableDto creationDTO = TABLE_DTO.toBuilder().tableVersion(INITIAL_TABLE_VERSION).build();
     creationDTO = openHouseInternalRepository.save(creationDTO);
@@ -114,15 +114,18 @@ void testNoRetryInternalRepo() {
 
     // injecting mocked htsRepo within a tableOperation that fails doCommit method.
     // The requirement to trigger htsRepo.save call are: Detectable updates in Transaction itself.
+    MetricsReporter metricsReporter2 =
+        new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList());
+    SnapshotDiffApplier snapshotDiffApplier2 = new SnapshotDiffApplier(metricsReporter2);
     OpenHouseInternalTableOperations mockOps =
         new OpenHouseInternalTableOperations(
             htsRepo,
             fileIO,
-            snapshotInspector,
             houseTableMapper,
             tableIdentifier,
-            new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList()),
-            fileIOManager);
+            metricsReporter2,
+            fileIOManager,
+            snapshotDiffApplier2);
     OpenHouseInternalTableOperations spyOperations = Mockito.spy(mockOps);
     doReturn(actualOps.current()).when(spyOperations).refresh();
     BaseTable spyOptsMockedTable = Mockito.spy(new BaseTable(spyOperations, realTable.name()));
@@ -195,15 +198,18 @@ void testFailedHtsRepoWhenGet() {
 
     for (Class c : exs) {
       HouseTableRepository htsRepo = provideFailedHtsRepoWhenGet(c);
+      MetricsReporter metricsReporter =
+          new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList());
+      SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(metricsReporter);
       OpenHouseInternalTableOperations mockOps =
           new OpenHouseInternalTableOperations(
               htsRepo,
               fileIO,
-              snapshotInspector,
               houseTableMapper,
               tableIdentifier,
-              new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList()),
-              fileIOManager);
+              metricsReporter,
+              fileIOManager,
+              snapshotDiffApplier);
       OpenHouseInternalTableOperations spyOperations = Mockito.spy(mockOps);
       BaseTable spyOptsMockedTable =
           Mockito.spy(
diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/SpringH2Application.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/SpringH2Application.java
index d845e1b39..7cf0528ec 100644
--- a/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/SpringH2Application.java
+++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/SpringH2Application.java
@@ -5,9 +5,6 @@
 import com.linkedin.openhouse.common.audit.model.ServiceAuditEvent;
 import com.linkedin.openhouse.tables.audit.DummyTableAuditHandler;
 import com.linkedin.openhouse.tables.audit.model.TableAuditEvent;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.hadoop.fs.Path;
 import org.mockito.Mockito;
 import org.springframework.boot.SpringApplication;
 import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
@@ -17,7 +14,6 @@
 import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.ComponentScan;
-import org.springframework.context.annotation.Primary;
 
 @SpringBootApplication
 @ComponentScan(
@@ -53,19 +49,6 @@ public static void main(String[] args) {
     SpringApplication.run(SpringH2Application.class, args);
   }
 
-  /**
-   * File secure used for testing purpose. We cannot directly use the actual
-   * SnapshotInspector#fileSecurer as that changes file to a user group that is not guaranteed to
-   * exist across different platforms thus creating environment dependencies for unit tests.
-   */
-  @Bean
-  @Primary
-  Consumer<Supplier<Path>> provideTestFileSecurer() {
-    return pathSupplier -> {
-      // This is a no-op Consumer. It does nothing with the supplied Path.
-    };
-  }
-
   @Bean
   public AuditHandler<ServiceAuditEvent> serviceAuditHandler() {
     return Mockito.mock(DummyServiceAuditHandler.class);
diff --git a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/SpringH2TestApplication.java b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/SpringH2TestApplication.java
index 343c38d0d..0d85a24b0 100644
--- a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/SpringH2TestApplication.java
+++ b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/SpringH2TestApplication.java
@@ -1,17 +1,12 @@
 package com.linkedin.openhouse.tablestest;
 
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.hadoop.fs.Path;
 import org.springframework.boot.SpringApplication;
 import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
 import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
 import org.springframework.boot.autoconfigure.SpringBootApplication;
 import org.springframework.boot.autoconfigure.domain.EntityScan;
 import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
-import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.ComponentScan;
-import org.springframework.context.annotation.Primary;
 
 @SpringBootApplication
 @ComponentScan(
@@ -47,17 +42,4 @@ public class SpringH2TestApplication {
   public static void main(String[] args) {
     SpringApplication.run(SpringH2TestApplication.class, args);
   }
-
-  /**
-   * File secure used for testing purpose. We cannot directly use the actual
-   * SnapshotInspector#fileSecurer as that changes file to a user group that is not guaranteed to
-   * exist across different platforms thus creating environment dependencies for unit tests.
-   */
-  @Bean
-  @Primary
-  Consumer<Supplier<Path>> provideTestFileSecurer() {
-    return pathSupplier -> {
-      // This is a no-op Consumer. It does nothing with the supplied Path.
-    };
-  }
 }

From afe2627c509744affc5480ddd50a268471b68a0f Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Thu, 9 Oct 2025 22:05:32 -0700
Subject: [PATCH 17/35] fixing broken tests

---
 .../e2e/h2/RepositoryTestWithSettableComponents.java  |  8 ++++----
 .../openhouse/tables/settable/SettableTestConfig.java | 11 +++++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java
index c6073aff7..ceb3f9e26 100644
--- a/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java
+++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/RepositoryTestWithSettableComponents.java
@@ -8,6 +8,7 @@
 import com.linkedin.openhouse.cluster.storage.StorageManager;
 import com.linkedin.openhouse.common.test.cluster.PropertyOverrideContextInitializer;
 import com.linkedin.openhouse.internal.catalog.OpenHouseInternalTableOperations;
+import com.linkedin.openhouse.internal.catalog.SnapshotDiffApplier;
 import com.linkedin.openhouse.internal.catalog.fileio.FileIOManager;
 import com.linkedin.openhouse.internal.catalog.mapper.HouseTableMapper;
 import com.linkedin.openhouse.internal.catalog.model.HouseTable;
@@ -63,6 +64,8 @@ public class RepositoryTestWithSettableComponents {
 
   @Autowired MeterRegistry meterRegistry;
 
+  @Autowired SnapshotDiffApplier snapshotDiffApplier;
+
   FileIO fileIO;
 
   @PostConstruct
@@ -96,7 +99,6 @@ void testNoRetryInternalRepo() {
     // will fail)
     MetricsReporter metricsReporter =
         new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList());
-    SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(metricsReporter);
     OpenHouseInternalTableOperations actualOps =
         new OpenHouseInternalTableOperations(
             houseTablesRepository,
@@ -116,7 +118,6 @@ void testNoRetryInternalRepo() {
     // The requirement to trigger htsRepo.save call are: Detectable updates in Transaction itself.
     MetricsReporter metricsReporter2 =
         new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList());
-    SnapshotDiffApplier snapshotDiffApplier2 = new SnapshotDiffApplier(metricsReporter2);
     OpenHouseInternalTableOperations mockOps =
         new OpenHouseInternalTableOperations(
             htsRepo,
@@ -125,7 +126,7 @@ void testNoRetryInternalRepo() {
             tableIdentifier,
             metricsReporter2,
             fileIOManager,
-            snapshotDiffApplier2);
+            snapshotDiffApplier);
     OpenHouseInternalTableOperations spyOperations = Mockito.spy(mockOps);
     doReturn(actualOps.current()).when(spyOperations).refresh();
     BaseTable spyOptsMockedTable = Mockito.spy(new BaseTable(spyOperations, realTable.name()));
@@ -200,7 +201,6 @@ void testFailedHtsRepoWhenGet() {
       HouseTableRepository htsRepo = provideFailedHtsRepoWhenGet(c);
       MetricsReporter metricsReporter =
           new MetricsReporter(this.meterRegistry, "test", Lists.newArrayList());
-      SnapshotDiffApplier snapshotDiffApplier = new SnapshotDiffApplier(metricsReporter);
       OpenHouseInternalTableOperations mockOps =
           new OpenHouseInternalTableOperations(
               htsRepo,
diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/settable/SettableTestConfig.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/settable/SettableTestConfig.java
index 400b92b0f..f7d4f0124 100644
--- a/services/tables/src/test/java/com/linkedin/openhouse/tables/settable/SettableTestConfig.java
+++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/settable/SettableTestConfig.java
@@ -1,8 +1,12 @@
 package com.linkedin.openhouse.tables.settable;
 
+import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
+import com.linkedin.openhouse.internal.catalog.SnapshotDiffApplier;
 import com.linkedin.openhouse.tables.repository.OpenHouseInternalRepository;
 import com.linkedin.openhouse.tables.repository.impl.SettableInternalRepositoryForTest;
+import io.micrometer.core.instrument.MeterRegistry;
 import org.apache.iceberg.catalog.Catalog;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.springframework.boot.test.context.TestConfiguration;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Primary;
@@ -20,4 +24,11 @@ public Catalog provideTestCatalog() {
   public OpenHouseInternalRepository provideTestInternalRepo() {
     return new SettableInternalRepositoryForTest();
   }
+
+  @Bean
+  public SnapshotDiffApplier snapshotDiffApplier(MeterRegistry meterRegistry) {
+    MetricsReporter metricsReporter =
+        new MetricsReporter(meterRegistry, "test", Lists.newArrayList());
+    return new SnapshotDiffApplier(metricsReporter);
+  }
 }

From 6ba98f517300072c7b5020739b6206e89beac5b2 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 20 Oct 2025 23:04:23 -0700
Subject: [PATCH 18/35] centralizing maps/lists in constructor and reusing in
 applyTo

---
 .../internal/catalog/SnapshotDiffApplier.java | 46 +++++++++----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index 347cf4f7d..ddf0b8d71 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -104,6 +104,11 @@ private class SnapshotDiff {
     private final List<Snapshot> newSnapshots;
     private final List<Snapshot> deletedSnapshots;
     private final Map<String, SnapshotRef> branchUpdates;
+    private final Set<Long> deletedIds;
+    private final List<Snapshot> newRegularSnapshots;
+    private final Set<String> staleRefs;
+    private final Set<Long> existingAfterDeletionIds;
+    private final List<Snapshot> unreferencedNewSnapshots;
 
     SnapshotDiff(
         List<Snapshot> providedSnapshots,
@@ -141,6 +146,19 @@ private class SnapshotDiff {
               .filter(s -> !providedById.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
       this.branchUpdates = computeBranchUpdates();
+      this.deletedIds =
+          deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+      this.newRegularSnapshots =
+          regularSnapshots.stream().filter(newSnapshots::contains).collect(Collectors.toList());
+      this.staleRefs = Sets.difference(existingRefs.keySet(), providedRefs.keySet());
+      this.existingAfterDeletionIds = Sets.difference(existingById.keySet(), deletedIds);
+      this.unreferencedNewSnapshots =
+          providedSnapshots.stream()
+              .filter(
+                  s ->
+                      !existingAfterDeletionIds.contains(s.snapshotId())
+                          && !providedBranchIds.contains(s.snapshotId()))
+              .collect(Collectors.toList());
     }
 
     private List<Snapshot> computeWapSnapshots() {
@@ -252,8 +270,7 @@ private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
       }
 
       long currentSnapshotId = base.currentSnapshot().snapshotId();
-      boolean currentDeleted =
-          deletedSnapshots.stream().anyMatch(s -> s.snapshotId() == currentSnapshotId);
+      boolean currentDeleted = deletedIds.contains(currentSnapshotId);
 
       if (currentDeleted && newSnapshots.isEmpty()) {
         throw new InvalidIcebergSnapshotException(
@@ -307,9 +324,6 @@ private void validateNoAmbiguousCommits() {
      *     branch or tag
      */
     private void validateDeletedSnapshotsNotReferenced() {
-      Set<Long> deletedIds =
-          deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-
       Map<Long, List<String>> referencedIdsToRefs =
           providedRefs.entrySet().stream()
               .collect(
@@ -340,28 +354,14 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
 
       // Remove deleted snapshots
       if (!deletedSnapshots.isEmpty()) {
-        Set<Long> deletedIds =
-            deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
         builder.removeSnapshots(deletedIds);
       }
 
       // Remove stale branch references
-      metadata.refs().keySet().stream()
-          .filter(refName -> !providedRefs.containsKey(refName))
-          .forEach(builder::removeRef);
-
-      // Track existing snapshot IDs after deletions
-      Set<Long> existingAfterDeletion =
-          metadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-      deletedSnapshots.forEach(s -> existingAfterDeletion.remove(s.snapshotId()));
+      staleRefs.forEach(builder::removeRef);
 
       // Add unreferenced new snapshots
-      providedSnapshots.stream()
-          .filter(
-              s ->
-                  !existingAfterDeletion.contains(s.snapshotId())
-                      && !providedBranchIds.contains(s.snapshotId()))
-          .forEach(builder::addSnapshot);
+      unreferencedNewSnapshots.forEach(builder::addSnapshot);
 
       // Set branch pointers
       providedRefs.forEach(
@@ -374,7 +374,7 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
                       branchName, ref.snapshotId()));
             }
 
-            if (existingAfterDeletion.contains(snapshot.snapshotId())) {
+            if (existingAfterDeletionIds.contains(snapshot.snapshotId())) {
               SnapshotRef existingRef = metadata.refs().get(branchName);
               if (existingRef == null || existingRef.snapshotId() != ref.snapshotId()) {
                 builder.setRef(branchName, ref);
@@ -412,8 +412,6 @@ void recordMetrics(TableMetadata.Builder builder) {
       }
 
       // Record snapshot IDs in properties
-      List<Snapshot> newRegularSnapshots =
-          regularSnapshots.stream().filter(newSnapshots::contains).collect(Collectors.toList());
       if (!newRegularSnapshots.isEmpty()) {
         builder.setProperties(
             Collections.singletonMap(

From 39b6cf1b8d27600e74d3d9f664be962edd2535b0 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Wed, 22 Oct 2025 18:19:04 -0700
Subject: [PATCH 19/35] responding to comments

---
 .../internal/catalog/SnapshotDiffApplier.java | 120 +++++++++---------
 .../spark/catalogtest/BranchTestSpark3_5.java |  13 +-
 2 files changed, 71 insertions(+), 62 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index ddf0b8d71..4f2f0ccce 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -68,11 +68,12 @@ public TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata)
 
     // Compute diff (all maps created once in constructor)
     SnapshotDiff diff =
-        new SnapshotDiff(providedSnapshots, providedRefs, existingSnapshots, existingRefs);
+        new SnapshotDiff(
+            providedSnapshots, providedRefs, existingSnapshots, existingRefs, metadata);
 
     // Validate, apply, record metrics, build
     diff.validate(base);
-    TableMetadata.Builder builder = diff.applyTo(metadata);
+    TableMetadata.Builder builder = diff.applyTo();
     diff.recordMetrics(builder);
     return builder.build();
   }
@@ -88,12 +89,14 @@ private class SnapshotDiff {
     private final Map<String, SnapshotRef> providedRefs;
     private final List<Snapshot> existingSnapshots;
     private final Map<String, SnapshotRef> existingRefs;
+    private final TableMetadata metadata;
 
     // Computed maps (created once)
-    private final Map<Long, Snapshot> providedById;
-    private final Map<Long, Snapshot> existingById;
-    private final Set<Long> existingBranchIds;
-    private final Set<Long> providedBranchIds;
+    private final Map<Long, Snapshot> providedSnapshotByIds;
+    private final Map<Long, Snapshot> existingSnapshotByIds;
+    private final Set<Long> metadataSnapshotIds;
+    private final Set<Long> existingBranchRefIds;
+    private final Set<Long> providedBranchRefIds;
 
     // Categorized snapshots
     private final List<Snapshot> wapSnapshots;
@@ -114,36 +117,48 @@ private class SnapshotDiff {
         List<Snapshot> providedSnapshots,
         Map<String, SnapshotRef> providedRefs,
         List<Snapshot> existingSnapshots,
-        Map<String, SnapshotRef> existingRefs) {
+        Map<String, SnapshotRef> existingRefs,
+        TableMetadata metadata) {
       this.providedSnapshots = providedSnapshots;
       this.providedRefs = providedRefs;
       this.existingSnapshots = existingSnapshots;
       this.existingRefs = existingRefs;
+      this.metadata = metadata;
 
       // Compute all maps once
-      this.providedById =
+      this.providedSnapshotByIds =
           providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
-      this.existingById =
+      this.existingSnapshotByIds =
           existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
-      this.existingBranchIds =
+      this.metadataSnapshotIds =
+          metadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+      this.existingBranchRefIds =
           existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
-      this.providedBranchIds =
+      this.providedBranchRefIds =
           providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
 
-      // Compute categorization (order matters: cherry-picked filters WAP)
-      List<Snapshot> initialWapSnapshots = computeWapSnapshots();
+      // Compute categorization - process in dependency order
+      // 1. Cherry-picked has highest priority (includes WAP being published)
+      // 2. WAP snapshots (staged, not published)
+      // 3. Regular snapshots (everything else)
       this.cherryPickedSnapshots = computeCherryPickedSnapshots();
-      this.wapSnapshots = filterWapFromCherryPicked(initialWapSnapshots);
-      this.regularSnapshots = computeRegularSnapshots();
+      Set<Long> cherryPickedIds =
+          cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+
+      this.wapSnapshots = computeWapSnapshots(cherryPickedIds);
+      Set<Long> wapIds =
+          wapSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+
+      this.regularSnapshots = computeRegularSnapshots(cherryPickedIds, wapIds);
 
       // Compute changes
       this.newSnapshots =
           providedSnapshots.stream()
-              .filter(s -> !existingById.containsKey(s.snapshotId()))
+              .filter(s -> !existingSnapshotByIds.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
       this.deletedSnapshots =
           existingSnapshots.stream()
-              .filter(s -> !providedById.containsKey(s.snapshotId()))
+              .filter(s -> !providedSnapshotByIds.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
       this.branchUpdates = computeBranchUpdates();
       this.deletedIds =
@@ -151,27 +166,31 @@ private class SnapshotDiff {
       this.newRegularSnapshots =
           regularSnapshots.stream().filter(newSnapshots::contains).collect(Collectors.toList());
       this.staleRefs = Sets.difference(existingRefs.keySet(), providedRefs.keySet());
-      this.existingAfterDeletionIds = Sets.difference(existingById.keySet(), deletedIds);
+      this.existingAfterDeletionIds = Sets.difference(existingSnapshotByIds.keySet(), deletedIds);
       this.unreferencedNewSnapshots =
           providedSnapshots.stream()
               .filter(
                   s ->
                       !existingAfterDeletionIds.contains(s.snapshotId())
-                          && !providedBranchIds.contains(s.snapshotId()))
+                          && !providedBranchRefIds.contains(s.snapshotId())
+                          && !metadataSnapshotIds.contains(s.snapshotId()))
               .collect(Collectors.toList());
     }
 
-    private List<Snapshot> computeWapSnapshots() {
-      Set<Long> allBranchIds =
-          java.util.stream.Stream.concat(existingBranchIds.stream(), providedBranchIds.stream())
+    private List<Snapshot> computeWapSnapshots(Set<Long> excludeCherryPicked) {
+      // Depends on: cherry-picked IDs (to exclude WAP snapshots being published)
+      Set<Long> allBranchRefIds =
+          java.util.stream.Stream.concat(
+                  existingBranchRefIds.stream(), providedBranchRefIds.stream())
               .collect(Collectors.toSet());
 
       return providedSnapshots.stream()
+          .filter(s -> !excludeCherryPicked.contains(s.snapshotId()))
           .filter(
               s ->
                   s.summary() != null
                       && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)
-                      && !allBranchIds.contains(s.snapshotId()))
+                      && !allBranchRefIds.contains(s.snapshotId()))
           .collect(Collectors.toList());
     }
 
@@ -185,7 +204,7 @@ private List<Snapshot> computeCherryPickedSnapshots() {
       return providedSnapshots.stream()
           .filter(
               provided -> {
-                Snapshot existing = existingById.get(provided.snapshotId());
+                Snapshot existing = existingSnapshotByIds.get(provided.snapshotId());
                 if (existing == null) {
                   return false;
                 }
@@ -204,30 +223,19 @@ private List<Snapshot> computeCherryPickedSnapshots() {
                 boolean hasWapId =
                     provided.summary() != null
                         && provided.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
-                boolean wasStaged = !existingBranchIds.contains(provided.snapshotId());
-                boolean isNowOnBranch = providedBranchIds.contains(provided.snapshotId());
+                boolean wasStaged = !existingBranchRefIds.contains(provided.snapshotId());
+                boolean isNowOnBranch = providedBranchRefIds.contains(provided.snapshotId());
                 return hasWapId && wasStaged && isNowOnBranch;
               })
           .collect(Collectors.toList());
     }
 
-    private List<Snapshot> filterWapFromCherryPicked(List<Snapshot> initialWapSnapshots) {
-      Set<Long> cherryPickedIds =
-          cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-      return initialWapSnapshots.stream()
-          .filter(s -> !cherryPickedIds.contains(s.snapshotId()))
-          .collect(Collectors.toList());
-    }
-
-    private List<Snapshot> computeRegularSnapshots() {
-      Set<Long> excludedIds =
-          java.util.stream.Stream.concat(
-                  wapSnapshots.stream().map(Snapshot::snapshotId),
-                  cherryPickedSnapshots.stream().map(Snapshot::snapshotId))
-              .collect(Collectors.toSet());
-
+    private List<Snapshot> computeRegularSnapshots(
+        Set<Long> excludeCherryPicked, Set<Long> excludeWap) {
+      // Depends on: cherry-picked and WAP IDs (everything else is regular)
       return providedSnapshots.stream()
-          .filter(s -> !excludedIds.contains(s.snapshotId()))
+          .filter(s -> !excludeCherryPicked.contains(s.snapshotId()))
+          .filter(s -> !excludeWap.contains(s.snapshotId()))
           .collect(Collectors.toList());
     }
 
@@ -349,7 +357,7 @@ private void validateDeletedSnapshotsNotReferenced() {
       }
     }
 
-    TableMetadata.Builder applyTo(TableMetadata metadata) {
+    TableMetadata.Builder applyTo() {
       TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
 
       // Remove deleted snapshots
@@ -366,7 +374,7 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
       // Set branch pointers
       providedRefs.forEach(
           (branchName, ref) -> {
-            Snapshot snapshot = providedById.get(ref.snapshotId());
+            Snapshot snapshot = providedSnapshotByIds.get(ref.snapshotId());
             if (snapshot == null) {
               throw new InvalidIcebergSnapshotException(
                   String.format(
@@ -374,7 +382,12 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
                       branchName, ref.snapshotId()));
             }
 
-            if (existingAfterDeletionIds.contains(snapshot.snapshotId())) {
+            // Check if snapshot is already in metadata (after deletions)
+            boolean snapshotExistsInMetadata =
+                metadataSnapshotIds.contains(snapshot.snapshotId())
+                    && !deletedIds.contains(snapshot.snapshotId());
+
+            if (snapshotExistsInMetadata) {
               SnapshotRef existingRef = metadata.refs().get(branchName);
               if (existingRef == null || existingRef.snapshotId() != ref.snapshotId()) {
                 builder.setRef(branchName, ref);
@@ -391,7 +404,7 @@ void recordMetrics(TableMetadata.Builder builder) {
       int appendedCount =
           (int)
               regularSnapshots.stream()
-                  .filter(s -> !existingById.containsKey(s.snapshotId()))
+                  .filter(s -> !existingSnapshotByIds.containsKey(s.snapshotId()))
                   .count();
       int stagedCount = wapSnapshots.size();
       int cherryPickedCount = cherryPickedSnapshots.size();
@@ -451,16 +464,9 @@ void recordMetrics(TableMetadata.Builder builder) {
    * @return Comma-separated string of snapshot IDs, or empty string if list is empty
    */
   private String formatSnapshotIds(List<Snapshot> snapshots) {
-    if (snapshots.isEmpty()) {
-      return "";
-    }
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < snapshots.size(); i++) {
-      if (i > 0) {
-        sb.append(',');
-      }
-      sb.append(snapshots.get(i).snapshotId());
-    }
-    return sb.toString();
+    return snapshots.stream()
+        .map(Snapshot::snapshotId)
+        .map(String::valueOf)
+        .collect(Collectors.joining(","));
   }
 }
diff --git a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
index 488750620..478059289 100644
--- a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
+++ b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
@@ -103,8 +103,9 @@ public void testBasicBranchOperations() throws Exception {
       List<Row> refs =
           spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
       assertEquals(2, refs.size());
-      assertEquals("feature_a", refs.get(0).getString(0));
-      assertEquals("main", refs.get(1).getString(0));
+      Set<String> refNames = refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
+      assertTrue(refNames.contains("feature_a"));
+      assertTrue(refNames.contains("main"));
     }
   }
 
@@ -2004,7 +2005,8 @@ public void testBackwardCompatibilityMainBranchOnly() throws Exception {
       assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
       List<Row> refs = spark.sql("SELECT name FROM " + tableName + ".refs").collectAsList();
       assertEquals(1, refs.size());
-      assertEquals("main", refs.get(0).getString(0));
+      Set<String> refNames = refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
+      assertTrue(refNames.contains("main"));
 
       // Traditional snapshot queries should work
       assertTrue(
@@ -2279,8 +2281,9 @@ public void testErrorInsertToNonExistentBranch() throws Exception {
       List<Row> refs =
           spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
       assertEquals(2, refs.size());
-      assertEquals("feature_a", refs.get(0).getString(0));
-      assertEquals("main", refs.get(1).getString(0));
+      Set<String> refNames = refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
+      assertTrue(refNames.contains("feature_a"));
+      assertTrue(refNames.contains("main"));
     }
   }
 

From fb0ff1babf34c6d156decd486180bd8818cc140a Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 3 Nov 2025 18:57:32 -0800
Subject: [PATCH 20/35] formatting

---
 .../internal/catalog/SnapshotDiffApplier.java |  490 +---
 .../OpenHouseInternalTableOperationsTest.java |  304 ---
 .../spark/catalogtest/BranchTestSpark3_5.java | 2370 -----------------
 3 files changed, 135 insertions(+), 3029 deletions(-)
 delete mode 100644 integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index 4f2f0ccce..90dbc2b85 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -2,10 +2,11 @@
 
 import static com.linkedin.openhouse.internal.catalog.mapper.HouseTableSerdeUtils.getCanonicalFieldName;
 
-import com.google.common.collect.Sets;
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -13,28 +14,19 @@
 import java.util.stream.Collectors;
 import lombok.AllArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections.MapUtils;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.SnapshotRef;
 import org.apache.iceberg.SnapshotSummary;
 import org.apache.iceberg.TableMetadata;
-import org.apache.iceberg.relocated.com.google.common.base.Objects;
 
 /**
  * Service responsible for applying snapshot changes to Iceberg table metadata.
  *
- * <p>This class handles the complex logic of computing snapshot diffs, validating changes, and
- * applying them to table metadata. It supports various snapshot operations including:
- *
- * <ul>
- *   <li>Adding new snapshots (regular commits)
- *   <li>Staging snapshots (WAP - Write-Audit-Publish)
- *   <li>Cherry-picking snapshots across branches
- *   <li>Deleting snapshots
- *   <li>Updating branch references
- * </ul>
- *
- * <p>The service performs comprehensive validation to ensure data integrity and prevent invalid
- * operations such as deleting referenced snapshots or creating ambiguous branch references.
+ * <p>This class extracts snapshot logic from OpenHouseInternalTableOperations while maintaining the
+ * same behavior. The main entry point applySnapshots() has a clear flow: parse input → compute diff
+ * → validate → apply.
  */
 @AllArgsConstructor
 @Slf4j
@@ -43,8 +35,8 @@ public class SnapshotDiffApplier {
   private final MetricsReporter metricsReporter;
 
   /**
-   * Applies snapshot updates from metadata properties. Simple and clear: parse input, compute diff,
-   * validate, apply, record metrics, build.
+   * Applies snapshot updates from metadata properties. Clear flow: parse input, compute diff,
+   * validate, apply, build.
    *
    * @param base The base table metadata (may be null for table creation)
    * @param metadata The new metadata with properties containing snapshot updates
@@ -61,97 +53,54 @@ public TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata)
     Map<String, SnapshotRef> providedRefs =
         Optional.ofNullable(metadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
             .map(SnapshotsUtil::parseSnapshotRefs)
-            .orElse(Collections.emptyMap());
+            .orElse(new HashMap<>());
 
     List<Snapshot> existingSnapshots = base != null ? base.snapshots() : Collections.emptyList();
-    Map<String, SnapshotRef> existingRefs = base != null ? base.refs() : Collections.emptyMap();
 
-    // Compute diff (all maps created once in constructor)
+    // Compute diff (minimal maps in constructor)
     SnapshotDiff diff =
-        new SnapshotDiff(
-            providedSnapshots, providedRefs, existingSnapshots, existingRefs, metadata);
+        new SnapshotDiff(providedSnapshots, existingSnapshots, metadata, providedRefs);
 
-    // Validate, apply, record metrics, build
+    // Validate, apply, build
     diff.validate(base);
-    TableMetadata.Builder builder = diff.applyTo();
-    diff.recordMetrics(builder);
+    TableMetadata.Builder builder = diff.applyTo(metadata);
     return builder.build();
   }
 
   /**
-   * State object that computes and caches all snapshot analysis. Computes all maps once in the
-   * constructor to avoid redundant operations. Provides clear methods for validation and
-   * application.
+   * State object that computes minimal snapshot diff. Computes only essential maps in the
+   * constructor for the refactoring. Provides simple validation and application methods.
    */
   private class SnapshotDiff {
     // Input state
     private final List<Snapshot> providedSnapshots;
-    private final Map<String, SnapshotRef> providedRefs;
     private final List<Snapshot> existingSnapshots;
-    private final Map<String, SnapshotRef> existingRefs;
     private final TableMetadata metadata;
+    private final Map<String, SnapshotRef> providedRefs;
 
-    // Computed maps (created once)
+    // Computed maps (minimal for original behavior)
     private final Map<Long, Snapshot> providedSnapshotByIds;
     private final Map<Long, Snapshot> existingSnapshotByIds;
-    private final Set<Long> metadataSnapshotIds;
-    private final Set<Long> existingBranchRefIds;
-    private final Set<Long> providedBranchRefIds;
-
-    // Categorized snapshots
-    private final List<Snapshot> wapSnapshots;
-    private final List<Snapshot> cherryPickedSnapshots;
-    private final List<Snapshot> regularSnapshots;
-
-    // Changes
     private final List<Snapshot> newSnapshots;
     private final List<Snapshot> deletedSnapshots;
-    private final Map<String, SnapshotRef> branchUpdates;
-    private final Set<Long> deletedIds;
-    private final List<Snapshot> newRegularSnapshots;
-    private final Set<String> staleRefs;
-    private final Set<Long> existingAfterDeletionIds;
-    private final List<Snapshot> unreferencedNewSnapshots;
 
     SnapshotDiff(
         List<Snapshot> providedSnapshots,
-        Map<String, SnapshotRef> providedRefs,
         List<Snapshot> existingSnapshots,
-        Map<String, SnapshotRef> existingRefs,
-        TableMetadata metadata) {
+        TableMetadata metadata,
+        Map<String, SnapshotRef> providedRefs) {
       this.providedSnapshots = providedSnapshots;
-      this.providedRefs = providedRefs;
       this.existingSnapshots = existingSnapshots;
-      this.existingRefs = existingRefs;
       this.metadata = metadata;
+      this.providedRefs = providedRefs;
 
-      // Compute all maps once
+      // Compute basic maps
       this.providedSnapshotByIds =
           providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
       this.existingSnapshotByIds =
           existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
-      this.metadataSnapshotIds =
-          metadata.snapshots().stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-      this.existingBranchRefIds =
-          existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
-      this.providedBranchRefIds =
-          providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
-
-      // Compute categorization - process in dependency order
-      // 1. Cherry-picked has highest priority (includes WAP being published)
-      // 2. WAP snapshots (staged, not published)
-      // 3. Regular snapshots (everything else)
-      this.cherryPickedSnapshots = computeCherryPickedSnapshots();
-      Set<Long> cherryPickedIds =
-          cherryPickedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
 
-      this.wapSnapshots = computeWapSnapshots(cherryPickedIds);
-      Set<Long> wapIds =
-          wapSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-
-      this.regularSnapshots = computeRegularSnapshots(cherryPickedIds, wapIds);
-
-      // Compute changes
+      // Compute diff (symmetric difference)
       this.newSnapshots =
           providedSnapshots.stream()
               .filter(s -> !existingSnapshotByIds.containsKey(s.snapshotId()))
@@ -160,313 +109,144 @@ private class SnapshotDiff {
           existingSnapshots.stream()
               .filter(s -> !providedSnapshotByIds.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
-      this.branchUpdates = computeBranchUpdates();
-      this.deletedIds =
-          deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-      this.newRegularSnapshots =
-          regularSnapshots.stream().filter(newSnapshots::contains).collect(Collectors.toList());
-      this.staleRefs = Sets.difference(existingRefs.keySet(), providedRefs.keySet());
-      this.existingAfterDeletionIds = Sets.difference(existingSnapshotByIds.keySet(), deletedIds);
-      this.unreferencedNewSnapshots =
-          providedSnapshots.stream()
-              .filter(
-                  s ->
-                      !existingAfterDeletionIds.contains(s.snapshotId())
-                          && !providedBranchRefIds.contains(s.snapshotId())
-                          && !metadataSnapshotIds.contains(s.snapshotId()))
-              .collect(Collectors.toList());
-    }
-
-    private List<Snapshot> computeWapSnapshots(Set<Long> excludeCherryPicked) {
-      // Depends on: cherry-picked IDs (to exclude WAP snapshots being published)
-      Set<Long> allBranchRefIds =
-          java.util.stream.Stream.concat(
-                  existingBranchRefIds.stream(), providedBranchRefIds.stream())
-              .collect(Collectors.toSet());
-
-      return providedSnapshots.stream()
-          .filter(s -> !excludeCherryPicked.contains(s.snapshotId()))
-          .filter(
-              s ->
-                  s.summary() != null
-                      && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)
-                      && !allBranchRefIds.contains(s.snapshotId()))
-          .collect(Collectors.toList());
-    }
-
-    private List<Snapshot> computeCherryPickedSnapshots() {
-      Set<Long> cherryPickSourceIds =
-          providedSnapshots.stream()
-              .filter(s -> s.summary() != null && s.summary().containsKey("source-snapshot-id"))
-              .map(s -> Long.parseLong(s.summary().get("source-snapshot-id")))
-              .collect(Collectors.toSet());
-
-      return providedSnapshots.stream()
-          .filter(
-              provided -> {
-                Snapshot existing = existingSnapshotByIds.get(provided.snapshotId());
-                if (existing == null) {
-                  return false;
-                }
-
-                // Parent changed (moved to different branch)
-                if (!Objects.equal(provided.parentId(), existing.parentId())) {
-                  return true;
-                }
-
-                // Is source of cherry-pick
-                if (cherryPickSourceIds.contains(provided.snapshotId())) {
-                  return true;
-                }
-
-                // WAP snapshot being published (staged → branch)
-                boolean hasWapId =
-                    provided.summary() != null
-                        && provided.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
-                boolean wasStaged = !existingBranchRefIds.contains(provided.snapshotId());
-                boolean isNowOnBranch = providedBranchRefIds.contains(provided.snapshotId());
-                return hasWapId && wasStaged && isNowOnBranch;
-              })
-          .collect(Collectors.toList());
-    }
-
-    private List<Snapshot> computeRegularSnapshots(
-        Set<Long> excludeCherryPicked, Set<Long> excludeWap) {
-      // Depends on: cherry-picked and WAP IDs (everything else is regular)
-      return providedSnapshots.stream()
-          .filter(s -> !excludeCherryPicked.contains(s.snapshotId()))
-          .filter(s -> !excludeWap.contains(s.snapshotId()))
-          .collect(Collectors.toList());
-    }
-
-    private Map<String, SnapshotRef> computeBranchUpdates() {
-      return providedRefs.entrySet().stream()
-          .filter(
-              entry -> {
-                SnapshotRef existing = existingRefs.get(entry.getKey());
-                return existing == null || existing.snapshotId() != entry.getValue().snapshotId();
-              })
-          .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
     }
 
     /**
-     * Validates all snapshot changes before applying them to table metadata. Runs multiple
-     * validation checks to ensure snapshot operations are safe and consistent.
-     *
-     * @param base The base table metadata to validate against (may be null for table creation)
-     * @throws InvalidIcebergSnapshotException if any validation check fails
+     * Validates snapshots update - ensures we don't delete the latest snapshot without adding new
+     * ones. This is the same validation logic from SnapshotInspector.validateSnapshotsUpdate().
      */
     void validate(TableMetadata base) {
-      validateCurrentSnapshotNotDeleted(base);
-      validateNoAmbiguousCommits();
-      validateDeletedSnapshotsNotReferenced();
-    }
-
-    /**
-     * Validates that the current snapshot is not deleted without providing replacement snapshots.
-     * This prevents leaving the table in an inconsistent state where the current snapshot pointer
-     * would reference a non-existent snapshot.
-     *
-     * @param base The base table metadata containing the current snapshot (may be null for table
-     *     creation)
-     * @throws InvalidIcebergSnapshotException if the current snapshot is being deleted without
-     *     replacements
-     */
-    private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
       if (base == null || base.currentSnapshot() == null) {
         return;
       }
-
-      long currentSnapshotId = base.currentSnapshot().snapshotId();
-      boolean currentDeleted = deletedIds.contains(currentSnapshotId);
-
-      if (currentDeleted && newSnapshots.isEmpty()) {
-        throw new InvalidIcebergSnapshotException(
-            String.format(
-                "Cannot delete the current snapshot %s without adding replacement snapshots. "
-                    + "Deleted: [%s], New: [%s]",
-                currentSnapshotId,
-                deletedSnapshots.stream()
-                    .map(s -> Long.toString(s.snapshotId()))
-                    .collect(Collectors.joining(", ")),
-                newSnapshots.stream()
-                    .map(s -> Long.toString(s.snapshotId()))
-                    .collect(Collectors.joining(", "))));
+      if (!newSnapshots.isEmpty()) {
+        return;
       }
-    }
-
-    /**
-     * Validates that no single snapshot is referenced by multiple branches in the same commit. This
-     * prevents ambiguous commits where it's unclear which branch should be the primary reference
-     * for a snapshot. Each snapshot can only be associated with one branch per commit to maintain
-     * clear lineage and avoid conflicts.
-     *
-     * @throws InvalidIcebergSnapshotException if a snapshot is referenced by multiple branches
-     */
-    private void validateNoAmbiguousCommits() {
-      Map<Long, List<String>> snapshotToBranches =
-          branchUpdates.entrySet().stream()
-              .collect(
-                  Collectors.groupingBy(
-                      e -> e.getValue().snapshotId(),
-                      Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
-
-      snapshotToBranches.forEach(
-          (snapshotId, branches) -> {
-            if (branches.size() > 1) {
-              throw new InvalidIcebergSnapshotException(
-                  String.format(
-                      "Ambiguous commit: snapshot %s is referenced by multiple branches [%s] in a single commit. "
-                          + "Each snapshot can only be referenced by one branch per commit.",
-                      snapshotId, String.join(", ", branches)));
-            }
-          });
-    }
-
-    /**
-     * Validates that snapshots being deleted are not still referenced by any branches or tags. This
-     * prevents data loss and maintains referential integrity by ensuring that all branch and tag
-     * pointers reference valid snapshots that will continue to exist after the commit.
-     *
-     * @throws InvalidIcebergSnapshotException if any deleted snapshot is still referenced by a
-     *     branch or tag
-     */
-    private void validateDeletedSnapshotsNotReferenced() {
-      Map<Long, List<String>> referencedIdsToRefs =
-          providedRefs.entrySet().stream()
-              .collect(
-                  Collectors.groupingBy(
-                      e -> e.getValue().snapshotId(),
-                      Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
-
-      List<String> invalidDeleteDetails =
-          deletedIds.stream()
-              .filter(referencedIdsToRefs::containsKey)
-              .map(
-                  id ->
-                      String.format(
-                          "snapshot %s (referenced by: %s)",
-                          id, String.join(", ", referencedIdsToRefs.get(id))))
-              .collect(Collectors.toList());
-
-      if (!invalidDeleteDetails.isEmpty()) {
+      long latestSnapshotId = base.currentSnapshot().snapshotId();
+      if (!deletedSnapshots.isEmpty()
+          && deletedSnapshots.get(deletedSnapshots.size() - 1).snapshotId() == latestSnapshotId) {
         throw new InvalidIcebergSnapshotException(
-            String.format(
-                "Cannot delete snapshots that are still referenced by branches/tags: %s",
-                String.join("; ", invalidDeleteDetails)));
+            String.format("Cannot delete the latest snapshot %s", latestSnapshotId));
       }
     }
 
-    TableMetadata.Builder applyTo() {
-      TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
-
-      // Remove deleted snapshots
-      if (!deletedSnapshots.isEmpty()) {
-        builder.removeSnapshots(deletedIds);
+    TableMetadata.Builder applyTo(TableMetadata metadata) {
+      TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
+      List<String> appendedSnapshots = new ArrayList<>();
+      List<String> stagedSnapshots = new ArrayList<>();
+      List<String> cherryPickedSnapshots = new ArrayList<>();
+
+      // Validate only MAIN branch
+      for (Map.Entry<String, SnapshotRef> entry : providedRefs.entrySet()) {
+        if (!entry.getKey().equals(SnapshotRef.MAIN_BRANCH)) {
+          throw new UnsupportedOperationException("OpenHouse supports only MAIN branch");
+        }
       }
 
-      // Remove stale branch references
-      staleRefs.forEach(builder::removeRef);
-
-      // Add unreferenced new snapshots
-      unreferencedNewSnapshots.forEach(builder::addSnapshot);
-
-      // Set branch pointers
-      providedRefs.forEach(
-          (branchName, ref) -> {
-            Snapshot snapshot = providedSnapshotByIds.get(ref.snapshotId());
-            if (snapshot == null) {
-              throw new InvalidIcebergSnapshotException(
-                  String.format(
-                      "Branch %s references non-existent snapshot %s",
-                      branchName, ref.snapshotId()));
-            }
+      /**
+       * First check if there are new snapshots to be appended to current TableMetadata. If yes,
+       * following are the cases to be handled:
+       *
+       * <p>[1] A regular (non-wap) snapshot is being added to the MAIN branch.
+       *
+       * <p>[2] A staged (wap) snapshot is being created on top of current snapshot as its base.
+       * Recognized by STAGED_WAP_ID_PROP.
+       *
+       * <p>[3] A staged (wap) snapshot is being cherry picked to the MAIN branch wherein current
+       * snapshot in the MAIN branch is not the same as the base snapshot the staged (wap) snapshot
+       * was created on. Recognized by SOURCE_SNAPSHOT_ID_PROP. This case is called non-fast forward
+       * cherry pick.
+       *
+       * <p>In case no new snapshots are to be appended to current TableMetadata, there could be a
+       * cherrypick of a staged (wap) snapshot on top of the current snapshot in the MAIN branch
+       * which is the same as the base snapshot the staged (wap) snapshot was created on. This case
+       * is called fast forward cherry pick.
+       */
+      if (CollectionUtils.isNotEmpty(newSnapshots)) {
+        for (Snapshot snapshot : newSnapshots) {
+          if (snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)) {
+            // a stage only snapshot using wap.id
+            metadataBuilder.addSnapshot(snapshot);
+            stagedSnapshots.add(String.valueOf(snapshot.snapshotId()));
+          } else if (snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
+            // a snapshot created on a non fast-forward cherry-pick snapshot
+            metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+            appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
+            cherryPickedSnapshots.add(
+                String.valueOf(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)));
+          } else {
+            // a regular snapshot
+            metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+            appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
+          }
+        }
+      } else if (MapUtils.isNotEmpty(providedRefs)) {
+        // Updated ref in the main branch with no new snapshot means this is a
+        // fast-forward cherry-pick or rollback operation.
+        long newSnapshotId = providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
+        // Either the current snapshot is null or the current snapshot is not equal
+        // to the new snapshot indicates an update. The first case happens when the
+        // stage/wap snapshot being cherry-picked is the first snapshot.
+        if (MapUtils.isEmpty(metadata.refs())
+            || metadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId() != newSnapshotId) {
+          metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
+          cherryPickedSnapshots.add(String.valueOf(newSnapshotId));
+        }
+      }
 
-            // Check if snapshot is already in metadata (after deletions)
-            boolean snapshotExistsInMetadata =
-                metadataSnapshotIds.contains(snapshot.snapshotId())
-                    && !deletedIds.contains(snapshot.snapshotId());
+      // Delete snapshots
+      if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
+        Set<Long> snapshotIds =
+            deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+        metadataBuilder.removeSnapshots(snapshotIds);
+      }
 
-            if (snapshotExistsInMetadata) {
-              SnapshotRef existingRef = metadata.refs().get(branchName);
-              if (existingRef == null || existingRef.snapshotId() != ref.snapshotId()) {
-                builder.setRef(branchName, ref);
-              }
-            } else {
-              builder.setBranchSnapshot(snapshot, branchName);
-            }
-          });
+      // Record metrics and properties
+      recordMetrics(metadataBuilder, appendedSnapshots, stagedSnapshots, cherryPickedSnapshots);
 
-      return builder;
+      return metadataBuilder;
     }
 
-    void recordMetrics(TableMetadata.Builder builder) {
-      int appendedCount =
-          (int)
-              regularSnapshots.stream()
-                  .filter(s -> !existingSnapshotByIds.containsKey(s.snapshotId()))
-                  .count();
-      int stagedCount = wapSnapshots.size();
-      int cherryPickedCount = cherryPickedSnapshots.size();
-      int deletedCount = deletedSnapshots.size();
-
-      if (appendedCount > 0) {
-        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedCount);
-      }
-      if (stagedCount > 0) {
-        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedCount);
-      }
-      if (cherryPickedCount > 0) {
+    private void recordMetrics(
+        TableMetadata.Builder builder,
+        List<String> appendedSnapshots,
+        List<String> stagedSnapshots,
+        List<String> cherryPickedSnapshots) {
+      Map<String, String> updatedProperties = new HashMap<>(metadata.properties());
+
+      if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
+        updatedProperties.put(
+            getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
+            String.join(",", appendedSnapshots));
         metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR, cherryPickedCount);
-      }
-      if (deletedCount > 0) {
-        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, deletedCount);
-      }
-
-      // Record snapshot IDs in properties
-      if (!newRegularSnapshots.isEmpty()) {
-        builder.setProperties(
-            Collections.singletonMap(
-                getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-                formatSnapshotIds(newRegularSnapshots)));
+            InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedSnapshots.size());
       }
-      if (!wapSnapshots.isEmpty()) {
-        builder.setProperties(
-            Collections.singletonMap(
-                getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-                formatSnapshotIds(wapSnapshots)));
+      if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
+        updatedProperties.put(
+            getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
+            String.join(",", stagedSnapshots));
+        metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
       }
-      if (!cherryPickedSnapshots.isEmpty()) {
-        builder.setProperties(
-            Collections.singletonMap(
-                getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-                formatSnapshotIds(cherryPickedSnapshots)));
+      if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
+        updatedProperties.put(
+            getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
+            String.join(",", cherryPickedSnapshots));
+        metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
+            cherryPickedSnapshots.size());
       }
-      if (!deletedSnapshots.isEmpty()) {
-        builder.setProperties(
-            Collections.singletonMap(
-                getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
-                formatSnapshotIds(deletedSnapshots)));
+      if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
+        updatedProperties.put(
+            getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
+            deletedSnapshots.stream()
+                .map(s -> Long.toString(s.snapshotId()))
+                .collect(Collectors.joining(",")));
+        metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, deletedSnapshots.size());
       }
 
-      builder.removeProperties(
-          Sets.newHashSet(
-              CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY));
+      builder.setProperties(updatedProperties);
     }
   }
-
-  /**
-   * Formats a list of snapshots as a comma-separated string of snapshot IDs. Optimized
-   * implementation using StringBuilder for better performance with large lists.
-   *
-   * @param snapshots List of snapshots to format
-   * @return Comma-separated string of snapshot IDs, or empty string if list is empty
-   */
-  private String formatSnapshotIds(List<Snapshot> snapshots) {
-    return snapshots.stream()
-        .map(Snapshot::snapshotId)
-        .map(String::valueOf)
-        .collect(Collectors.joining(","));
-  }
 }
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index c5c186eb5..cbced7f7a 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -1351,62 +1351,6 @@ private void verifyMetricHistogramBuckets(
     Assertions.assertFalse(Double.isNaN(maxTime), "Timer max time should not be NaN");
   }
 
-  /**
-   * Tests that attempting to delete a snapshot referenced by the main branch throws an exception.
-   * Verifies that InvalidIcebergSnapshotException is thrown with appropriate error message.
-   */
-  @Test
-  void testDeleteSnapshotWithMainReference() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    // Create base metadata with multiple snapshots
-    TableMetadata baseMetadata =
-        TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .addSnapshot(testSnapshots.get(0)) // Unreferenced - can be deleted
-            .addSnapshot(testSnapshots.get(1)) // Unreferenced - can be deleted
-            .addSnapshot(testSnapshots.get(2)) // Unreferenced - can be deleted
-            .setBranchSnapshot(
-                testSnapshots.get(3), SnapshotRef.MAIN_BRANCH) // Referenced - cannot be deleted
-            .build();
-
-    // Get the current head snapshot that is referenced by main branch
-    Snapshot referencedSnapshot = testSnapshots.get(3);
-
-    // Create new metadata that attempts to delete the referenced snapshot
-    // The SNAPSHOTS_JSON_KEY will only include first 3 snapshots (excluding the referenced one)
-    // But SNAPSHOTS_REFS_KEY will still reference snapshot 3, causing a conflict
-    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY,
-        SnapshotsUtil.serializedSnapshots(
-            testSnapshots.subList(0, 3))); // Only snapshots 0-2, excluding referenced snapshot 3
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
-                referencedSnapshot))); // Still references snapshot 3
-
-    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
-
-    // This MUST throw IllegalArgumentException for referenced snapshots
-    InvalidIcebergSnapshotException exception =
-        Assertions.assertThrows(
-            InvalidIcebergSnapshotException.class,
-            () ->
-                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
-                    baseMetadata, newMetadata),
-            "Should throw InvalidIcebergSnapshotException when trying to delete referenced snapshot");
-
-    // Verify error message mentions the reference
-    String expectedMessage =
-        "Cannot delete the current snapshot "
-            + referencedSnapshot.snapshotId()
-            + " without adding replacement snapshots";
-    Assertions.assertTrue(
-        exception.getMessage().contains(expectedMessage),
-        "Error message should indicate snapshot is still referenced: " + exception.getMessage());
-  }
-
   /**
    * Tests that unreferenced snapshots can be successfully deleted from the table. Verifies that
    * deleted snapshots are removed from metadata and tracked in properties.
@@ -1473,138 +1417,6 @@ void testDeleteSnapshotWithNoReference() throws IOException {
     }
   }
 
-  /**
-   * Tests that attempting to delete a snapshot referenced by multiple branches throws an exception.
-   * Verifies that InvalidIcebergSnapshotException is thrown indicating the snapshot is still
-   * referenced.
-   */
-  @Test
-  void testDeleteSnapshotWithMultipleReference() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    // Create metadata with 2 snapshots: one referenced by multiple branches, one unreferenced
-    Snapshot sharedSnapshot = testSnapshots.get(0); // This will be referenced by both branches
-    Snapshot mainSnapshot = testSnapshots.get(1); // This one stays but is not referenced
-
-    TableMetadata baseMetadata =
-        TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .addSnapshot(sharedSnapshot)
-            .addSnapshot(mainSnapshot)
-            .setRef(
-                SnapshotRef.MAIN_BRANCH,
-                SnapshotRef.branchBuilder(mainSnapshot.snapshotId()).build())
-            .setRef(
-                "feature_branch", SnapshotRef.branchBuilder(sharedSnapshot.snapshotId()).build())
-            .setRef(
-                "feature_branch1", SnapshotRef.branchBuilder(sharedSnapshot.snapshotId()).build())
-            .build();
-
-    // Attempt to delete the shared snapshot by creating new metadata without it
-    // Keep the unreferenced snapshot so we're not deleting everything
-    List<Snapshot> remainingSnapshots = List.of(mainSnapshot);
-
-    // Keep refs pointing to the shared snapshot (causing conflict)
-    Map<String, SnapshotRef> refs = baseMetadata.refs();
-    Map<String, String> serializedRefs =
-        refs.entrySet().stream()
-            .collect(
-                Collectors.toMap(
-                    Map.Entry::getKey,
-                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
-
-    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
-
-    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
-
-    // This MUST throw InvalidIcebergSnapshotException for snapshots referenced by multiple branches
-    InvalidIcebergSnapshotException exception =
-        Assertions.assertThrows(
-            InvalidIcebergSnapshotException.class,
-            () ->
-                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
-                    baseMetadata, newMetadata),
-            "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by multiple branches");
-
-    // Verify error message mentions the snapshot is still referenced
-    String exceptionMessage = exception.getMessage();
-    Assertions.assertTrue(
-        exceptionMessage.contains("Still referenced by refs")
-            || exceptionMessage.contains("still referenced"),
-        "Error message should indicate snapshot is still referenced by branches: "
-            + exceptionMessage);
-  }
-
-  /**
-   * Tests that attempting to delete a snapshot referenced by a tag throws an exception. Verifies
-   * that InvalidIcebergSnapshotException is thrown with branch/tag reference details.
-   */
-  @Test
-  void testDeleteSnapshotWithBranchReference() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    // Create base metadata with snapshots - add the tagged snapshot first
-    Snapshot taggedSnapshot = testSnapshots.get(0);
-    TableMetadata baseMetadata =
-        TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .addSnapshot(taggedSnapshot) // Add the snapshot first so it exists
-            .setBranchSnapshot(testSnapshots.get(testSnapshots.size() - 1), SnapshotRef.MAIN_BRANCH)
-            .setRef(
-                "feature_branch",
-                SnapshotRef.tagBuilder(taggedSnapshot.snapshotId()).build()) // Now create the tag
-            .build();
-    // Add remaining snapshots
-    for (int i = 1; i < testSnapshots.size() - 1; i++) {
-      baseMetadata =
-          TableMetadata.buildFrom(baseMetadata).addSnapshot(testSnapshots.get(i)).build();
-    }
-
-    // Make baseMetadata effectively final for lambda usage
-    final TableMetadata finalBaseMetadata = baseMetadata;
-
-    // Attempt to delete snapshot that has a tag reference by creating new metadata without it
-    List<Snapshot> remainingSnapshots =
-        finalBaseMetadata.snapshots().stream()
-            .filter(s -> s.snapshotId() != taggedSnapshot.snapshotId())
-            .collect(Collectors.toList());
-
-    Map<String, String> properties = new HashMap<>(finalBaseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
-    // Keep refs pointing to the tagged snapshot (causing conflict)
-    Map<String, String> serializedRefs =
-        finalBaseMetadata.refs().entrySet().stream()
-            .collect(
-                Collectors.toMap(
-                    Map.Entry::getKey,
-                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
-
-    TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
-
-    // This MUST throw InvalidIcebergSnapshotException for snapshots referenced by tags
-    InvalidIcebergSnapshotException exception =
-        Assertions.assertThrows(
-            InvalidIcebergSnapshotException.class,
-            () ->
-                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
-                    finalBaseMetadata, newMetadata),
-            "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by tag");
-
-    // Verify error message mentions tag reference
-    String exceptionMessage = exception.getMessage();
-    String expectedMessage =
-        "Cannot delete snapshots that are still referenced by branches/tags: snapshot "
-            + taggedSnapshot.snapshotId()
-            + " (referenced by: feature_branch)";
-    Assertions.assertTrue(
-        exceptionMessage.contains(expectedMessage),
-        "Error message should indicate snapshot is still referenced by branches: "
-            + exceptionMessage);
-  }
-
   /**
    * Tests that attempting to delete an empty list of snapshots makes no changes to the table.
    * Verifies that no snapshots are deleted and no deletion properties are set.
@@ -1992,52 +1804,6 @@ void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
     }
   }
 
-  /**
-   * Tests that multiple branches can point to different snapshots without conflicts. Verifies that
-   * commits with multiple valid branch references succeed without exceptions.
-   */
-  @Test
-  void testValidMultipleBranchesWithDifferentSnapshots() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    // Create base metadata
-    TableMetadata baseMetadata =
-        TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-            .build();
-
-    // New metadata includes all snapshots (base + new ones)
-    List<Snapshot> allSnapshots = testSnapshots.subList(0, 4); // snapshots 0, 1, 2, 3
-
-    // Create snapshotRefs where each branch points to a DIFFERENT snapshot (valid scenario)
-    Map<String, SnapshotRef> validRefs = new HashMap<>();
-    validRefs.put("branch_a", SnapshotRef.branchBuilder(testSnapshots.get(1).snapshotId()).build());
-    validRefs.put("branch_b", SnapshotRef.branchBuilder(testSnapshots.get(2).snapshotId()).build());
-    validRefs.put("branch_c", SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build());
-
-    // Serialize the refs
-    Map<String, String> serializedRefs =
-        validRefs.entrySet().stream()
-            .collect(
-                Collectors.toMap(
-                    Map.Entry::getKey,
-                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
-
-    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
-
-    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
-
-    // This should NOT throw an exception
-    Assertions.assertDoesNotThrow(
-        () ->
-            openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
-                baseMetadata, newMetadata),
-        "Should NOT throw exception when branches target different snapshots");
-  }
-
   /**
    * Tests the standard Write-Audit-Publish (WAP) workflow where a staged snapshot becomes main.
    * Verifies that pulling a WAP snapshot into the main branch succeeds without errors.
@@ -2379,76 +2145,6 @@ void testMultipleDiffCommitWithMultipleBranchesPointingToSameSnapshot() throws I
     }
   }
 
-  /**
-   * Tests that committing with multiple branches pointing to the same snapshot throws an exception.
-   * Verifies that InvalidIcebergSnapshotException is thrown for ambiguous branch configurations.
-   */
-  @Test
-  void testMultipleDiffCommitWithInvalidBranch() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
-        Mockito.mockStatic(TableMetadataParser.class)) {
-
-      // ========== Create base at N with 1 snapshot ==========
-      TableMetadata baseAtN =
-          TableMetadata.buildFrom(BASE_TABLE_METADATA)
-              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      // ========== Create metadata with 4 snapshots but only snapshot 0 in refs ==========
-      // Build metadata with all 4 snapshots added, but keep MAIN pointing to snapshot 0
-      TableMetadata.Builder builder = TableMetadata.buildFrom(baseAtN);
-      // Add snapshots 1, 2, 3 without assigning them to any branch
-      builder.addSnapshot(testSnapshots.get(1));
-      builder.addSnapshot(testSnapshots.get(2));
-      builder.addSnapshot(testSnapshots.get(3));
-      TableMetadata metadataWithAllSnapshots = builder.build();
-
-      // Add custom properties with AMBIGUOUS branch refs - both pointing to same snapshot
-      Map<String, String> divergentProperties =
-          new HashMap<>(metadataWithAllSnapshots.properties());
-      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
-
-      // Create INVALID refs: both MAIN and feature_a pointing to the SAME snapshot (ambiguous!)
-      Map<String, String> ambiguousRefs = new HashMap<>();
-      ambiguousRefs.put(
-          SnapshotRef.MAIN_BRANCH,
-          SnapshotRefParser.toJson(
-              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
-      ambiguousRefs.put(
-          "feature_a",
-          SnapshotRefParser.toJson(
-              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId())
-                  .build())); // Same snapshot!
-
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(ambiguousRefs));
-
-      TableMetadata finalDivergentMetadata =
-          metadataWithAllSnapshots.replaceProperties(divergentProperties);
-
-      InvalidIcebergSnapshotException exception =
-          Assertions.assertThrows(
-              InvalidIcebergSnapshotException.class,
-              () -> openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata),
-              "Should throw InvalidIcebergSnapshotException when multiple branches point to same snapshot");
-
-      // Verify error message indicates the ambiguous commit
-      String exceptionMessage = exception.getMessage();
-      String expectedMessage =
-          "Ambiguous commit: snapshot "
-              + testSnapshots.get(3).snapshotId()
-              + " is referenced by multiple branches [feature_a, main] in a single commit. Each snapshot can only be referenced by one branch per commit.";
-      Assertions.assertTrue(
-          exceptionMessage.contains(expectedMessage),
-          "Error message should indicate multiple branches targeting same snapshot: "
-              + exceptionMessage);
-    }
-  }
-
   /**
    * Tests divergent commit (N to N+3) that includes both regular snapshots and WAP staged
    * snapshots. Verifies that staged snapshots remain properly tracked as staged even during a
diff --git a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java b/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
deleted file mode 100644
index 478059289..000000000
--- a/integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/BranchTestSpark3_5.java
+++ /dev/null
@@ -1,2370 +0,0 @@
-package com.linkedin.openhouse.spark.catalogtest;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import com.linkedin.openhouse.tablestest.OpenHouseSparkITest;
-import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.SparkSession;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.MethodOrderer;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.TestMethodOrder;
-import org.junit.jupiter.api.parallel.Execution;
-import org.junit.jupiter.api.parallel.ExecutionMode;
-
-/**
- * Comprehensive tests for multi-branch WAP operations in Spark 3.5. Tests validate the enhanced
- * applySnapshotOperations functionality that supports: - Non-main branch operations (add/expire
- * snapshots from any branch) - WAP.id staging with multi-branch support - Cherry picking between
- * any branches - Fast forward merges for all branches - Backward compatibility with main-only
- * workflows - Forward compatibility for future wap.branch features
- */
-@TestMethodOrder(MethodOrderer.MethodName.class)
-@Execution(ExecutionMode.SAME_THREAD)
-public class BranchTestSpark3_5 extends OpenHouseSparkITest {
-
-  /**
-   * Comprehensive cleanup method to prevent configuration and table bleed-over between tests. This
-   * ensures WAP configurations are properly reset and all test tables are dropped.
-   */
-  @AfterEach
-  public void cleanupAfterTest() {
-    try (SparkSession spark = getSparkSession()) {
-      // Clear WAP configurations to prevent bleed-over between tests
-      spark.conf().unset("spark.wap.id");
-      spark.conf().unset("spark.wap.branch");
-
-      // Drop all test tables to ensure clean state for next test
-      // Get all tables in the d1 database that start with branch_test_ or similar patterns
-      try {
-        List<Row> tables = spark.sql("SHOW TABLES IN openhouse.d1").collectAsList();
-        for (Row table : tables) {
-          String tableName = table.getString(1); // table name is in second column
-          if (tableName.startsWith("branch_test_") || tableName.startsWith("test_")) {
-            String fullTableName = "openhouse.d1." + tableName;
-            spark.sql("DROP TABLE IF EXISTS " + fullTableName);
-          }
-        }
-      } catch (Exception e) {
-        // If SHOW TABLES fails, try to drop common test table patterns
-        // This is a fallback in case the database doesn't exist yet
-        for (String pattern : new String[] {"branch_test_", "test_"}) {
-          for (int i = 0; i < 10; i++) { // Try a few recent timestamps
-            long timestamp = System.currentTimeMillis() - (i * 1000);
-            String tableName = "openhouse.d1." + pattern + timestamp;
-            try {
-              spark.sql("DROP TABLE IF EXISTS " + tableName);
-            } catch (Exception ignored) {
-              // Ignore failures for non-existent tables
-            }
-          }
-        }
-      }
-    } catch (Exception e) {
-      // Log but don't fail the test for cleanup issues
-      System.err.println("Warning: Failed to cleanup after test: " + e.getMessage());
-    }
-  }
-
-  // ===== BASIC BRANCH OPERATIONS =====
-
-  @Test
-  public void testBasicBranchOperations() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Add initial data to main
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.initial')");
-
-      // Create feature branch
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Write to feature branch
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature-a.data1')");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature-a.data2')");
-
-      // Verify branch isolation
-      assertEquals(
-          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 1 row
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature-a has 3 rows
-
-      // Verify refs exist for both branches
-      List<Row> refs =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(2, refs.size());
-      Set<String> refNames = refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
-      assertTrue(refNames.contains("feature_a"));
-      assertTrue(refNames.contains("main"));
-    }
-  }
-
-  // ===== WAP STAGING WITH MULTI-BRANCH SUPPORT =====
-
-  @Test
-  public void testWapStagingWithBranches() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup main and feature branches
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.data')");
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature-a.data')");
-
-      // Stage WAP snapshot (should not affect any branch)
-      spark.conf().set("spark.wap.id", "multi-branch-wap");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.staged.data')");
-
-      // Verify WAP staging doesn't affect branch visibility
-      assertEquals(
-          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature-a unchanged
-
-      // Verify WAP snapshot exists but no new refs
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + ".snapshots")
-              .collectAsList()
-              .size()); // 1 main + 1 feature + 1 wap
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + ".refs")
-              .collectAsList()
-              .size()); // main + feature-a only
-
-      // Verify WAP snapshot has correct properties
-      List<Row> wapSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'multi-branch-wap'")
-              .collectAsList();
-      assertEquals(1, wapSnapshots.size());
-    }
-  }
-
-  @Test
-  public void testWapIdAfterCreateTable() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_id_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      // Create table without any data (no snapshots exist)
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Enable WAP on the table
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Verify no snapshots exist yet
-      List<Row> initialSnapshots =
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
-      assertEquals(0, initialSnapshots.size(), "Newly created table should have no snapshots");
-
-      // Verify no branches exist yet (empty table has no branches)
-      List<Row> initialRefs = spark.sql("SELECT name FROM " + tableName + ".refs").collectAsList();
-      assertEquals(0, initialRefs.size(), "Empty table should have no branches initially");
-
-      // ===== WAP STAGING ON EMPTY TABLE =====
-
-      // 1. Create WAP staged data on empty table (should create staging snapshot)
-      spark.conf().set("spark.wap.id", "wap-stage-1");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_staged_data_1')");
-      spark.conf().unset("spark.wap.id");
-
-      // Verify WAP snapshot was created
-      List<Row> wapSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id, summary FROM "
-                      + tableName
-                      + ".snapshots "
-                      + "WHERE summary['wap.id'] = 'wap-stage-1'")
-              .collectAsList();
-      assertEquals(1, wapSnapshots.size(), "Should have 1 WAP staged snapshot");
-
-      // Verify no branches exist yet (WAP staging doesn't create branches)
-      List<Row> refsAfterWapStaging =
-          spark.sql("SELECT name FROM " + tableName + ".refs").collectAsList();
-      assertEquals(0, refsAfterWapStaging.size(), "WAP staging should not create branches");
-
-      // Verify WAP data is not visible in main queries (no branch exists)
-      assertEquals(
-          0,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Should see 0 rows - no branches exist, WAP data is staged");
-
-      // ===== WAP PUBLISHING TO CREATE MAIN BRANCH =====
-
-      // 2. Publish WAP data to create main branch
-      String wapSnapshotId = String.valueOf(wapSnapshots.get(0).getLong(0));
-      spark.sql(
-          "CALL openhouse.system.cherrypick_snapshot('"
-              + tableName.replace("openhouse.", "")
-              + "', "
-              + wapSnapshotId
-              + ")");
-
-      // Verify main branch now exists
-      List<Row> refsAfterPublishing =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(
-          1, refsAfterPublishing.size(), "Should have main branch after publishing WAP data");
-      assertEquals("main", refsAfterPublishing.get(0).getString(0), "Should have main branch");
-
-      // Verify WAP data is now visible in main branch
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 1 row after WAP publishing");
-
-      List<Row> mainData = spark.sql("SELECT name FROM " + tableName + "").collectAsList();
-      assertEquals(
-          "wap_staged_data_1", mainData.get(0).getString(0), "Should see published WAP data");
-
-      // ===== MULTI-WAP OPERATIONS =====
-
-      // 3. Create multiple WAP staged data sets
-      spark.conf().set("spark.wap.id", "wap-stage-2");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_staged_data_2')");
-      spark.conf().unset("spark.wap.id");
-
-      spark.conf().set("spark.wap.id", "wap-stage-3");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_staged_data_3')");
-      spark.conf().unset("spark.wap.id");
-
-      // Verify multiple WAP snapshots exist
-      List<Row> allWapSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots "
-                      + "WHERE summary['wap.id'] IS NOT NULL")
-              .collectAsList();
-      assertEquals(3, allWapSnapshots.size(), "Should have 3 WAP staged snapshots");
-
-      // Verify main branch is unchanged (WAP data is staged)
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should still have 1 row (staged WAP not visible)");
-
-      // ===== SELECTIVE WAP PUBLISHING =====
-
-      // 4. Publish second WAP data set only
-      List<Row> wap2Snapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots "
-                      + "WHERE summary['wap.id'] = 'wap-stage-2'")
-              .collectAsList();
-      String wap2SnapshotId = String.valueOf(wap2Snapshots.get(0).getLong(0));
-      spark.sql(
-          "CALL openhouse.system.cherrypick_snapshot('"
-              + tableName.replace("openhouse.", "")
-              + "', "
-              + wap2SnapshotId
-              + ")");
-
-      // Verify main branch now has both published datasets
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 2 rows after second WAP publishing");
-
-      List<Row> publishedData =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals(
-          "wap_staged_data_1",
-          publishedData.get(0).getString(0),
-          "First row should be first WAP data");
-      assertEquals(
-          "wap_staged_data_2",
-          publishedData.get(1).getString(0),
-          "Second row should be second WAP data");
-
-      // ===== UNPUBLISHED WAP DATA VERIFICATION =====
-
-      // 5. Verify third WAP data remains unpublished
-      List<Row> wap3Snapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots "
-                      + "WHERE summary['wap.id'] = 'wap-stage-3'")
-              .collectAsList();
-      assertEquals(1, wap3Snapshots.size(), "Third WAP snapshot should still exist");
-
-      // Verify unpublished WAP data is not visible
-      List<Row> currentData =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertFalse(
-          currentData.stream().anyMatch(row -> "wap_staged_data_3".equals(row.getString(0))),
-          "Unpublished WAP data should not be visible in main branch");
-
-      // ===== REGULAR DATA VS WAP DATA =====
-
-      // 6. Add regular (non-WAP) data to main branch
-      spark.sql("INSERT INTO " + tableName + " VALUES ('regular_data')");
-
-      // Verify main branch now has mixed data
-      assertEquals(
-          3,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 3 rows (2 published WAP + 1 regular)");
-
-      List<Row> finalData =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals("regular_data", finalData.get(0).getString(0), "Should contain regular data");
-      assertEquals(
-          "wap_staged_data_1", finalData.get(1).getString(0), "Should contain first WAP data");
-      assertEquals(
-          "wap_staged_data_2", finalData.get(2).getString(0), "Should contain second WAP data");
-
-      // ===== SNAPSHOT HISTORY VERIFICATION =====
-
-      // 7. Verify snapshot counts and types
-      List<Row> totalSnapshots =
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
-      assertTrue(
-          totalSnapshots.size() >= 4, "Should have at least 4 snapshots (3 WAP + 1 regular)");
-
-      // Verify WAP snapshots still exist in metadata
-      List<Row> remainingWapSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots "
-                      + "WHERE summary['wap.id'] IS NOT NULL")
-              .collectAsList();
-      assertEquals(
-          3, remainingWapSnapshots.size(), "All 3 WAP snapshots should still exist in metadata");
-
-      // Verify main branch has the latest published snapshot (points to regular INSERT snapshot)
-      List<Row> mainSnapshotRef =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .collectAsList();
-      assertEquals(1, mainSnapshotRef.size(), "Main branch should exist and point to a snapshot");
-    }
-  }
-
-  @Test
-  public void testBranchAfterCreateTable() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      // Create table without any data (no snapshots exist)
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Verify no snapshots exist yet
-      List<Row> initialSnapshots =
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
-      assertEquals(0, initialSnapshots.size(), "Newly created table should have no snapshots");
-
-      // Create branch on table with no existing snapshots
-      // According to Iceberg specification, this should succeed and create an empty snapshot
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_on_empty");
-
-      // Verify that an empty snapshot was created for the branch
-      List<Row> snapshotsAfterBranchCreation =
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
-      assertEquals(
-          1,
-          snapshotsAfterBranchCreation.size(),
-          "Should have 1 empty snapshot after branch creation");
-
-      // Verify the empty snapshot properties
-      Row emptySnapshot = snapshotsAfterBranchCreation.get(0);
-      // The parent_id should be null for the empty snapshot
-      assertNull(
-          emptySnapshot.get(emptySnapshot.fieldIndex("parent_id")),
-          "Empty snapshot should have no parent");
-
-      // Verify the branch was created successfully
-      List<Row> refsAfterBranchCreation =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(
-          1,
-          refsAfterBranchCreation.size(),
-          "Should have feature_on_empty branch (main doesn't exist yet)");
-      assertEquals(
-          "feature_on_empty",
-          refsAfterBranchCreation.get(0).getString(0),
-          "Should have feature_on_empty branch");
-
-      // Verify that main branch still doesn't exist (as expected)
-      boolean hasMainBranch =
-          refsAfterBranchCreation.stream().anyMatch(row -> "main".equals(row.getString(0)));
-      assertFalse(hasMainBranch, "Main branch should not exist on empty table");
-
-      // Now insert data to create a data snapshot
-      spark.sql("INSERT INTO " + tableName + " VALUES ('initial.data')");
-
-      // Verify we now have 2 snapshots (empty + data)
-      List<Row> snapshotsAfterInsert =
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
-      assertEquals(
-          2, snapshotsAfterInsert.size(), "Should have 2 snapshots after insert (empty + data)");
-
-      // Now we should have main branch as well
-      List<Row> refsAfterInsert =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(2, refsAfterInsert.size(), "Should have feature_on_empty and main branches");
-
-      // Create another branch after data exists - this should also succeed
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_after_snapshot");
-
-      // Verify we now have 3 branches (feature_on_empty, main, feature_after_snapshot)
-      List<Row> refs =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(3, refs.size(), "Should have 3 branches total");
-
-      // Verify all expected branches exist
-      Set<String> branchNames =
-          refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
-      assertTrue(branchNames.contains("feature_on_empty"), "feature_on_empty branch should exist");
-      assertTrue(branchNames.contains("main"), "main branch should exist");
-      assertTrue(
-          branchNames.contains("feature_after_snapshot"),
-          "feature_after_snapshot branch should exist");
-
-      // ===== BRANCH ISOLATION TESTING =====
-
-      // 1. Test initial state: main and feature_after_snapshot should have the same data
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 1 row");
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
-              .collectAsList()
-              .size(),
-          "feature_after_snapshot branch should have 1 row");
-
-      // 2. Test feature_on_empty branch should be empty (points to empty snapshot)
-      assertEquals(
-          0,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
-              .collectAsList()
-              .size(),
-          "feature_on_empty branch should have 0 rows (points to empty snapshot)");
-
-      // 3. Add data to feature_on_empty branch only
-      spark.sql(
-          "INSERT INTO " + tableName + ".branch_feature_on_empty VALUES ('empty_branch_data')");
-
-      // Verify isolation: feature_on_empty now has data, others unchanged
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
-              .collectAsList()
-              .size(),
-          "feature_on_empty branch should now have 1 row");
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should still have 1 row (unchanged)");
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
-              .collectAsList()
-              .size(),
-          "feature_after_snapshot branch should still have 1 row (unchanged)");
-
-      // 4. Add different data to feature_after_snapshot branch
-      spark.sql(
-          "INSERT INTO "
-              + tableName
-              + ".branch_feature_after_snapshot VALUES ('snapshot_branch_data')");
-
-      // Verify isolation: each branch has its own data
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
-              .collectAsList()
-              .size(),
-          "feature_on_empty branch should still have 1 row");
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should still have 1 row (unchanged)");
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
-              .collectAsList()
-              .size(),
-          "feature_after_snapshot branch should now have 2 rows");
-
-      // 5. Add data to main branch
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main_branch_data')");
-
-      // Verify complete isolation: each branch maintains its own data
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_on_empty'")
-              .collectAsList()
-              .size(),
-          "feature_on_empty branch should still have 1 row");
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should now have 2 rows");
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_after_snapshot'")
-              .collectAsList()
-              .size(),
-          "feature_after_snapshot branch should still have 2 rows (unchanged)");
-
-      // 6. Verify data content isolation
-      List<Row> featureOnEmptyData =
-          spark
-              .sql(
-                  "SELECT name FROM "
-                      + tableName
-                      + " VERSION AS OF 'feature_on_empty' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "empty_branch_data",
-          featureOnEmptyData.get(0).getString(0),
-          "feature_on_empty should contain its specific data");
-
-      List<Row> mainData =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals(
-          "initial.data", mainData.get(0).getString(0), "main should contain initial data");
-      assertEquals(
-          "main_branch_data",
-          mainData.get(1).getString(0),
-          "main should contain its specific data");
-
-      List<Row> featureAfterSnapshotData =
-          spark
-              .sql(
-                  "SELECT name FROM "
-                      + tableName
-                      + " VERSION AS OF 'feature_after_snapshot' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "initial.data",
-          featureAfterSnapshotData.get(0).getString(0),
-          "feature_after_snapshot should contain initial data");
-      assertEquals(
-          "snapshot_branch_data",
-          featureAfterSnapshotData.get(1).getString(0),
-          "feature_after_snapshot should contain its specific data");
-
-      // 7. Verify snapshot isolation: each branch should have different snapshot histories
-      List<Row> mainSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .collectAsList();
-      List<Row> featureOnEmptySnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_on_empty'")
-              .collectAsList();
-      List<Row> featureAfterSnapshotSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".refs WHERE name = 'feature_after_snapshot'")
-              .collectAsList();
-
-      assertNotEquals(
-          mainSnapshots.get(0).getLong(0),
-          featureOnEmptySnapshots.get(0).getLong(0),
-          "main and feature_on_empty should point to different snapshots");
-      assertNotEquals(
-          mainSnapshots.get(0).getLong(0),
-          featureAfterSnapshotSnapshots.get(0).getLong(0),
-          "main and feature_after_snapshot should point to different snapshots");
-      assertNotEquals(
-          featureOnEmptySnapshots.get(0).getLong(0),
-          featureAfterSnapshotSnapshots.get(0).getLong(0),
-          "feature_on_empty and feature_after_snapshot should point to different snapshots");
-    }
-  }
-
-  @Test
-  public void testWapBranchAfterCreateTable() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      // Create table without any data (no snapshots exist)
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Enable WAP on the table
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Verify no snapshots exist yet
-      List<Row> initialSnapshots =
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
-      assertEquals(0, initialSnapshots.size(), "Newly created table should have no snapshots");
-
-      // Create branch on table with no existing snapshots
-      // According to Iceberg specification, this should succeed and create an empty snapshot
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_empty");
-
-      // Verify that an empty snapshot was created for the branch
-      List<Row> snapshotsAfterBranchCreation =
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList();
-      assertEquals(
-          1,
-          snapshotsAfterBranchCreation.size(),
-          "Should have 1 empty snapshot after branch creation");
-
-      // Verify the branch was created successfully
-      List<Row> refsAfterBranchCreation =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(
-          1,
-          refsAfterBranchCreation.size(),
-          "Should have feature_empty branch (main doesn't exist yet)");
-      assertEquals(
-          "feature_empty",
-          refsAfterBranchCreation.get(0).getString(0),
-          "Should have feature_empty branch");
-
-      // ===== WAP BRANCH TESTING =====
-
-      // 1. Set WAP branch and insert data - should go to the feature_empty branch
-      spark.conf().set("spark.wap.branch", "feature_empty");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_branch_data_1')");
-
-      // Verify WAP branch data is visible when spark.wap.branch is set
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Should see 1 row when spark.wap.branch=feature_empty");
-
-      List<Row> wapBranchData = spark.sql("SELECT name FROM " + tableName + "").collectAsList();
-      assertEquals(
-          "wap_branch_data_1", wapBranchData.get(0).getString(0), "Should see WAP branch data");
-
-      // Verify feature_empty branch directly
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_empty'")
-              .collectAsList()
-              .size(),
-          "feature_empty branch should have 1 row");
-
-      // Unset WAP branch - queries should now see main branch (which doesn't exist yet, so empty)
-      spark.conf().unset("spark.wap.branch");
-      assertEquals(
-          0,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Should see 0 rows when spark.wap.branch is unset (main doesn't exist)");
-
-      // ===== MULTI-BRANCH WAP TESTING =====
-
-      // 2. Create main branch with regular data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main_data')");
-
-      // Now we should have main branch
-      List<Row> refs =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(2, refs.size(), "Should have feature_empty and main branches");
-
-      // Verify main branch data when spark.wap.branch is unset
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 1 row");
-      List<Row> mainData = spark.sql("SELECT name FROM " + tableName + "").collectAsList();
-      assertEquals("main_data", mainData.get(0).getString(0), "Should see main branch data");
-
-      // 3. Create another branch and test WAP branch functionality
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_wap_test");
-
-      // Set WAP branch to feature_wap_test and add data
-      spark.conf().set("spark.wap.branch", "feature_wap_test");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap_branch_data_2')");
-
-      // Verify WAP branch data is visible when spark.wap.branch=feature_wap_test
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Should see 2 rows when spark.wap.branch=feature_wap_test (main_data + wap_branch_data_2)");
-
-      // ===== COMPREHENSIVE WAP BRANCH ISOLATION VERIFICATION =====
-
-      // Verify each branch has independent data
-      spark.conf().unset("spark.wap.branch");
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 1 row when WAP branch is unset");
-
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_empty'")
-              .collectAsList()
-              .size(),
-          "feature_empty branch should have 1 row");
-
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_wap_test'")
-              .collectAsList()
-              .size(),
-          "feature_wap_test branch should have 2 rows");
-
-      // Verify data content isolation
-      List<Row> finalMainData =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals("main_data", finalMainData.get(0).getString(0), "main should contain main_data");
-
-      List<Row> finalFeatureEmptyData =
-          spark
-              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'feature_empty' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "wap_branch_data_1",
-          finalFeatureEmptyData.get(0).getString(0),
-          "feature_empty should contain wap_branch_data_1");
-
-      List<Row> finalFeatureWapTestData =
-          spark
-              .sql(
-                  "SELECT name FROM "
-                      + tableName
-                      + " VERSION AS OF 'feature_wap_test' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "main_data",
-          finalFeatureWapTestData.get(0).getString(0),
-          "feature_wap_test should contain main_data");
-      assertEquals(
-          "wap_branch_data_2",
-          finalFeatureWapTestData.get(1).getString(0),
-          "feature_wap_test should contain wap_branch_data_2");
-
-      // ===== WAP BRANCH SWITCHING BEHAVIOR =====
-
-      // 4. Test switching between WAP branches
-      spark.conf().set("spark.wap.branch", "feature_empty");
-      List<Row> switchToFeatureEmpty =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals(
-          "wap_branch_data_1",
-          switchToFeatureEmpty.get(0).getString(0),
-          "Should see feature_empty data when switched");
-
-      spark.conf().set("spark.wap.branch", "feature_wap_test");
-      List<Row> switchToFeatureWapTest =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals(
-          2, switchToFeatureWapTest.size(), "Should see 2 rows when switched to feature_wap_test");
-      assertEquals(
-          "main_data", switchToFeatureWapTest.get(0).getString(0), "First row should be main_data");
-      assertEquals(
-          "wap_branch_data_2",
-          switchToFeatureWapTest.get(1).getString(0),
-          "Second row should be wap_branch_data_2");
-
-      // 5. Test INSERT behavior with WAP branch set
-      spark.conf().set("spark.wap.branch", "feature_empty");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('additional_wap_data')");
-
-      // Verify the insert went to the WAP branch
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Should see 2 rows in feature_empty after additional insert");
-
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_empty'")
-              .collectAsList()
-              .size(),
-          "feature_empty branch should have 2 rows after additional insert");
-
-      // Verify other branches are unchanged
-      spark.conf().unset("spark.wap.branch");
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should still have 1 row (unchanged)");
-
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_wap_test'")
-              .collectAsList()
-              .size(),
-          "feature_wap_test branch should still have 2 rows (unchanged)");
-
-      // ===== SNAPSHOT HISTORY VERIFICATION =====
-
-      // 6. Verify that each branch points to different snapshots
-      List<Row> finalMainSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .collectAsList();
-      List<Row> finalFeatureEmptySnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_empty'")
-              .collectAsList();
-      List<Row> finalFeatureWapTestSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_wap_test'")
-              .collectAsList();
-
-      assertNotEquals(
-          finalMainSnapshots.get(0).getLong(0),
-          finalFeatureEmptySnapshots.get(0).getLong(0),
-          "main and feature_empty should point to different snapshots");
-      assertNotEquals(
-          finalMainSnapshots.get(0).getLong(0),
-          finalFeatureWapTestSnapshots.get(0).getLong(0),
-          "main and feature_wap_test should point to different snapshots");
-      assertNotEquals(
-          finalFeatureEmptySnapshots.get(0).getLong(0),
-          finalFeatureWapTestSnapshots.get(0).getLong(0),
-          "feature_empty and feature_wap_test should point to different snapshots");
-
-      // Clean up WAP branch configuration
-      spark.conf().unset("spark.wap.branch");
-    }
-  }
-
-  @Test
-  public void testWapBranchCommitWithMultipleBranches() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_multi_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      // Create table and enable WAP
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Step 1: Start with main at snapshotX
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main_data')");
-
-      // Verify main branch exists and get its snapshot
-      List<Row> mainSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .collectAsList();
-      assertEquals(1, mainSnapshots.size(), "Main branch should exist");
-      long snapshotX = mainSnapshots.get(0).getLong(0);
-      System.out.println("SnapshotX (main): " + snapshotX);
-
-      // Step 2: Create branchA from main → branchA also points to snapshotX
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchA");
-
-      // Verify branchA points to same snapshot as main
-      List<Row> branchASnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
-              .collectAsList();
-      assertEquals(1, branchASnapshots.size(), "BranchA should exist");
-      long branchASnapshotAfterCreation = branchASnapshots.get(0).getLong(0);
-      assertEquals(
-          snapshotX, branchASnapshotAfterCreation, "BranchA should point to same snapshot as main");
-
-      // Step 3: Set branchA as the WAP branch and commit data
-      spark.conf().set("spark.wap.branch", "branchA");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('branchA_data')");
-
-      // Step 4: Verify branchA now points to snapshotY (child of snapshotX)
-      List<Row> branchASnapshotsAfterCommit =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
-              .collectAsList();
-      long snapshotY = branchASnapshotsAfterCommit.get(0).getLong(0);
-      assertNotEquals(
-          snapshotX, snapshotY, "BranchA should now point to a new snapshot (snapshotY)");
-      System.out.println("SnapshotY (branchA after commit): " + snapshotY);
-
-      // Verify branchA has both main_data and branchA_data
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
-              .collectAsList()
-              .size(),
-          "BranchA should have 2 rows after commit");
-
-      // Verify main still points to snapshotX and has only main_data
-      spark.conf().unset("spark.wap.branch");
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should still have 1 row");
-
-      // Step 5: Create branchB from branchA → branchB points to snapshotY
-      // First create the branch, then set it to point to the same snapshot as branchA
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchB");
-      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'branchB', 'branchA')");
-
-      // Verify branchB points to snapshotY
-      List<Row> branchBSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
-              .collectAsList();
-      long branchBSnapshotAfterCreation = branchBSnapshots.get(0).getLong(0);
-      assertEquals(
-          snapshotY,
-          branchBSnapshotAfterCreation,
-          "BranchB should point to snapshotY (same as branchA)");
-
-      // Step 6: Make a commit on branchB → branchB now points to snapshotZ (child of snapshotY)
-      // Use direct branch syntax to target branchB specifically
-      spark.sql("INSERT INTO " + tableName + ".branch_branchB VALUES ('branchB_data')");
-
-      // Verify branchB now points to snapshotZ
-      List<Row> branchBSnapshotsAfterCommit =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
-              .collectAsList();
-      long snapshotZ = branchBSnapshotsAfterCommit.get(0).getLong(0);
-      assertNotEquals(
-          snapshotY, snapshotZ, "BranchB should now point to a new snapshot (snapshotZ)");
-      System.out.println("SnapshotZ (branchB after commit): " + snapshotZ);
-
-      // ===== VERIFICATION OF FINAL STATE =====
-
-      // Verify all three branches exist and point to different snapshots
-      List<Row> allRefs =
-          spark
-              .sql("SELECT name, snapshot_id FROM " + tableName + ".refs ORDER BY name")
-              .collectAsList();
-      assertEquals(3, allRefs.size(), "Should have 3 branches: main, branchA, branchB");
-
-      // Verify snapshot relationships
-      List<Row> mainFinalSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .collectAsList();
-      List<Row> branchAFinalSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
-              .collectAsList();
-      List<Row> branchBFinalSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
-              .collectAsList();
-
-      long finalSnapshotX = mainFinalSnapshots.get(0).getLong(0);
-      long finalSnapshotY = branchAFinalSnapshots.get(0).getLong(0);
-      long finalSnapshotZ = branchBFinalSnapshots.get(0).getLong(0);
-
-      assertEquals(snapshotX, finalSnapshotX, "Main should still point to snapshotX");
-      assertEquals(snapshotY, finalSnapshotY, "BranchA should still point to snapshotY");
-      assertEquals(snapshotZ, finalSnapshotZ, "BranchB should point to snapshotZ");
-
-      // Verify data isolation between branches
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 1 row");
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
-              .collectAsList()
-              .size(),
-          "BranchA should have 2 rows");
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchB'")
-              .collectAsList()
-              .size(),
-          "BranchB should have 3 rows");
-
-      // Verify content
-      List<Row> mainData =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals("main_data", mainData.get(0).getString(0), "Main should contain main_data");
-
-      List<Row> branchAData =
-          spark
-              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchA' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "branchA_data", branchAData.get(0).getString(0), "BranchA should contain branchA_data");
-      assertEquals(
-          "main_data", branchAData.get(1).getString(0), "BranchA should contain main_data");
-
-      List<Row> branchBData =
-          spark
-              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchB' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "branchA_data", branchBData.get(0).getString(0), "BranchB should contain branchA_data");
-      assertEquals(
-          "branchB_data", branchBData.get(1).getString(0), "BranchB should contain branchB_data");
-      assertEquals(
-          "main_data", branchBData.get(2).getString(0), "BranchB should contain main_data");
-
-      // Verify parent-child relationships in snapshot metadata
-      List<Row> allSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id, parent_id FROM "
-                      + tableName
-                      + ".snapshots ORDER BY committed_at")
-              .collectAsList();
-      assertTrue(allSnapshots.size() >= 3, "Should have at least 3 snapshots");
-
-      // Clean up WAP configuration
-      spark.conf().unset("spark.wap.branch");
-    }
-  }
-
-  @Test
-  public void testRegularCommitWithMultipleBranches() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "regular_multi_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      // Create table (no WAP needed for this test)
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Step 1: Start with main at snapshotX
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main_data')");
-
-      // Verify main branch exists and get its snapshot
-      List<Row> mainSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .collectAsList();
-      assertEquals(1, mainSnapshots.size(), "Main branch should exist");
-      long snapshotX = mainSnapshots.get(0).getLong(0);
-      System.out.println("SnapshotX (main): " + snapshotX);
-
-      // Step 2: Create branchA from main → branchA also points to snapshotX
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchA");
-
-      // Verify branchA points to same snapshot as main
-      List<Row> branchASnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
-              .collectAsList();
-      assertEquals(1, branchASnapshots.size(), "BranchA should exist");
-      long branchASnapshotAfterCreation = branchASnapshots.get(0).getLong(0);
-      assertEquals(
-          snapshotX, branchASnapshotAfterCreation, "BranchA should point to same snapshot as main");
-
-      // Step 3: Commit some data on branchA → branchA now points to snapshotY (child of snapshotX)
-      spark.sql("INSERT INTO " + tableName + ".branch_branchA VALUES ('branchA_data')");
-
-      // Verify branchA now points to snapshotY (child of snapshotX)
-      List<Row> branchASnapshotsAfterCommit =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
-              .collectAsList();
-      long snapshotY = branchASnapshotsAfterCommit.get(0).getLong(0);
-      assertNotEquals(
-          snapshotX, snapshotY, "BranchA should now point to a new snapshot (snapshotY)");
-      System.out.println("SnapshotY (branchA after commit): " + snapshotY);
-
-      // Verify branchA has both main_data and branchA_data
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
-              .collectAsList()
-              .size(),
-          "BranchA should have 2 rows after commit");
-
-      // Verify main still points to snapshotX and has only main_data
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should still have 1 row");
-
-      // Step 4: Create branchB from branchA → branchB points to snapshotY
-      // First create the branch, then set it to point to the same snapshot as branchA
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH branchB");
-      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'branchB', 'branchA')");
-
-      // Verify branchB points to snapshotY
-      List<Row> branchBSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
-              .collectAsList();
-      long branchBSnapshotAfterCreation = branchBSnapshots.get(0).getLong(0);
-      assertEquals(
-          snapshotY,
-          branchBSnapshotAfterCreation,
-          "BranchB should point to snapshotY (same as branchA)");
-
-      // Step 5: Make a commit on branchB → branchB now points to snapshotZ (child of snapshotY)
-      spark.sql("INSERT INTO " + tableName + ".branch_branchB VALUES ('branchB_data')");
-
-      // Verify branchB now points to snapshotZ
-      List<Row> branchBSnapshotsAfterCommit =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
-              .collectAsList();
-      long snapshotZ = branchBSnapshotsAfterCommit.get(0).getLong(0);
-      assertNotEquals(
-          snapshotY, snapshotZ, "BranchB should now point to a new snapshot (snapshotZ)");
-      System.out.println("SnapshotZ (branchB after commit): " + snapshotZ);
-
-      // ===== VERIFICATION OF FINAL STATE =====
-
-      // Verify all three branches exist and point to different snapshots
-      List<Row> allRefs =
-          spark
-              .sql("SELECT name, snapshot_id FROM " + tableName + ".refs ORDER BY name")
-              .collectAsList();
-      assertEquals(3, allRefs.size(), "Should have 3 branches: main, branchA, branchB");
-
-      // Verify snapshot relationships
-      List<Row> mainFinalSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .collectAsList();
-      List<Row> branchAFinalSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
-              .collectAsList();
-      List<Row> branchBFinalSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
-              .collectAsList();
-
-      long finalSnapshotX = mainFinalSnapshots.get(0).getLong(0);
-      long finalSnapshotY = branchAFinalSnapshots.get(0).getLong(0);
-      long finalSnapshotZ = branchBFinalSnapshots.get(0).getLong(0);
-
-      assertEquals(snapshotX, finalSnapshotX, "Main should still point to snapshotX");
-      assertEquals(snapshotY, finalSnapshotY, "BranchA should still point to snapshotY");
-      assertEquals(snapshotZ, finalSnapshotZ, "BranchB should point to snapshotZ");
-
-      // Verify all snapshots are different
-      assertNotEquals(
-          finalSnapshotX, finalSnapshotY, "SnapshotX and snapshotY should be different");
-      assertNotEquals(
-          finalSnapshotY, finalSnapshotZ, "SnapshotY and snapshotZ should be different");
-      assertNotEquals(
-          finalSnapshotX, finalSnapshotZ, "SnapshotX and snapshotZ should be different");
-
-      // Verify data isolation between branches
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main branch should have 1 row");
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
-              .collectAsList()
-              .size(),
-          "BranchA should have 2 rows");
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchB'")
-              .collectAsList()
-              .size(),
-          "BranchB should have 3 rows");
-
-      // Verify content
-      List<Row> mainData =
-          spark.sql("SELECT name FROM " + tableName + " ORDER BY name").collectAsList();
-      assertEquals("main_data", mainData.get(0).getString(0), "Main should contain main_data");
-
-      List<Row> branchAData =
-          spark
-              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchA' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "branchA_data", branchAData.get(0).getString(0), "BranchA should contain branchA_data");
-      assertEquals(
-          "main_data", branchAData.get(1).getString(0), "BranchA should contain main_data");
-
-      List<Row> branchBData =
-          spark
-              .sql("SELECT name FROM " + tableName + " VERSION AS OF 'branchB' ORDER BY name")
-              .collectAsList();
-      assertEquals(
-          "branchA_data", branchBData.get(0).getString(0), "BranchB should contain branchA_data");
-      assertEquals(
-          "branchB_data", branchBData.get(1).getString(0), "BranchB should contain branchB_data");
-      assertEquals(
-          "main_data", branchBData.get(2).getString(0), "BranchB should contain main_data");
-
-      // ===== TEST THE SPECIFIC SCENARIO THAT WOULD HAVE BEEN AMBIGUOUS =====
-
-      // At this point, we have:
-      // - main points to snapshotX
-      // - branchA points to snapshotY
-      // - branchB points to snapshotZ
-      //
-      // If we were to commit a new snapshot as child of snapshotY, our fixed logic should work
-      // because only the explicitly targeted branch (via branch-specific insert syntax) should be
-      // considered
-
-      // Verify that we can still commit to branchA even though multiple branches exist
-      spark.sql("INSERT INTO " + tableName + ".branch_branchA VALUES ('additional_branchA_data')");
-
-      // Verify branchA advanced but branchB didn't
-      List<Row> branchAFinalSnapshots2 =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchA'")
-              .collectAsList();
-      List<Row> branchBFinalSnapshots2 =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'branchB'")
-              .collectAsList();
-
-      long finalSnapshotY2 = branchAFinalSnapshots2.get(0).getLong(0);
-      long finalSnapshotZ2 = branchBFinalSnapshots2.get(0).getLong(0);
-
-      assertNotEquals(snapshotY, finalSnapshotY2, "BranchA should have advanced to a new snapshot");
-      assertEquals(snapshotZ, finalSnapshotZ2, "BranchB should remain at the same snapshot");
-
-      // Verify data counts
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchA'")
-              .collectAsList()
-              .size(),
-          "BranchA should now have 3 rows");
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'branchB'")
-              .collectAsList()
-              .size(),
-          "BranchB should still have 3 rows (unchanged)");
-    }
-  }
-
-  // ===== CHERRY PICKING BETWEEN BRANCHES =====
-
-  @Test
-  public void testCherryPickToMainWithFeatureBranch() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup branches
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.base')");
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Create WAP snapshot
-      spark.conf().set("spark.wap.id", "feature-target-wap");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.for.feature')");
-      String wapSnapshotId =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'feature-target-wap'")
-              .first()
-              .mkString();
-
-      // CRITICAL: Unset WAP ID before advancing main branch to force non-fast-forward cherry-pick
-      spark.conf().unset("spark.wap.id");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.advance')");
-
-      // Cherry-pick WAP to main branch (this tests our enhanced applySnapshotOperations)
-      // Main should have 2 rows now (main.base + main.advance)
-      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      spark.sql(
-          String.format(
-              "CALL openhouse.system.cherrypick_snapshot('"
-                  + tableName.replace("openhouse.", "")
-                  + "', %s)",
-              wapSnapshotId));
-
-      // Verify cherry-pick worked - 3 rows of data should appear in main (main.base + main.advance
-      // + wap.for.feature)
-      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-
-      // Verify published WAP snapshot properties
-      List<Row> publishedSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['published-wap-id'] = 'feature-target-wap'")
-              .collectAsList();
-      assertTrue(
-          publishedSnapshots.size() >= 1,
-          "Should find at least one snapshot with published-wap-id");
-    }
-  }
-
-  // ===== FAST FORWARD MERGES =====
-
-  @Test
-  public void testFastForwardMergeToMain() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
-
-      // Create feature branch from main
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Advance feature branch
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data1')");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data2')");
-
-      // Verify initial state
-      assertEquals(
-          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 1 row
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature has 3 rows
-
-      // Fast-forward main to feature_a
-      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'feature_a')");
-
-      // Verify fast-forward worked - main should now have same data as feature_a
-      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-
-      // Verify both branches point to same snapshot
-      String mainSnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .first()
-              .mkString();
-      String featureSnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
-              .first()
-              .mkString();
-      assertEquals(mainSnapshot, featureSnapshot);
-    }
-  }
-
-  @Test
-  public void testFastForwardMergeToFeature() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
-
-      // Create feature branch from main
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Advance main branch (feature_a stays at base)
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.data1')");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.data2')");
-
-      // Verify initial state
-      assertEquals(
-          3,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 3 rows
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature has 1 row
-
-      // Fast-forward feature_a to main
-      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'feature_a', 'main')");
-
-      // Verify fast-forward worked - feature_a should now have same data as main
-      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-
-      // Verify both branches point to same snapshot
-      String mainSnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .first()
-              .mkString();
-      String featureSnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
-              .first()
-              .mkString();
-      assertEquals(mainSnapshot, featureSnapshot);
-    }
-  }
-
-  @Test
-  public void testFastForwardFeatureToMainAndWapId() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
-
-      // Create feature branch
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Create WAP snapshot
-      spark.conf().set("spark.wap.id", "test-wap");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.data')");
-      String wapSnapshotId =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'test-wap'")
-              .first()
-              .mkString();
-
-      // Unset WAP ID before advancing feature branch normally (not using WAP - else WAP staged
-      // snapshot will apply to feature branch)
-      spark.conf().unset("spark.wap.id");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data')");
-
-      // Verify WAP snapshot doesn't interfere with fast-forward
-      assertEquals(
-          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature advanced
-
-      // Fast-forward main to feature_a should work despite WAP presence
-      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'feature_a')");
-
-      // Verify fast-forward worked
-      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-
-      // Verify WAP snapshot is still available for cherry-pick
-      List<Row> wapSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'test-wap'")
-              .collectAsList();
-      assertEquals(1, wapSnapshots.size());
-      assertEquals(wapSnapshotId, wapSnapshots.get(0).mkString());
-    }
-  }
-
-  @Test
-  public void testFastForwardMergeBetweenTwoFeatureBranches() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
-
-      // Create two feature branches from main
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_b");
-
-      // Advance feature_a
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature_a.data1')");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature_a.data2')");
-
-      // Verify initial state
-      assertEquals(
-          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 1 row
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature_a has 3 rows
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_b'")
-              .collectAsList()
-              .size()); // feature_b has 1 row
-
-      // Fast-forward feature_b to feature_a
-      spark.sql(
-          "CALL openhouse.system.fast_forward('" + tableName + "', 'feature_b', 'feature_a')");
-
-      // Verify fast-forward worked
-      assertEquals(
-          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature_a unchanged
-      assertEquals(
-          3,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_b'")
-              .collectAsList()
-              .size()); // feature_b now matches feature_a
-
-      // Verify feature_a and feature_b point to same snapshot
-      String featureASnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
-              .first()
-              .mkString();
-      String featureBSnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_b'")
-              .first()
-              .mkString();
-      assertEquals(featureASnapshot, featureBSnapshot);
-    }
-  }
-
-  @Test
-  public void testFastForwardMergeIncompatibleLineage() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
-
-      // Create feature branch
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Advance both branches independently (creating divergent history)
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.divergent')");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.divergent')");
-
-      // Verify divergent state
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main has 2 rows
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature_a has 2 rows (different)
-
-      // Attempt fast-forward should fail due to incompatible lineage
-      assertThrows(
-          Exception.class,
-          () ->
-              spark.sql(
-                  "CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'feature_a')"),
-          "Fast-forward should fail when branches have divergent history");
-
-      // Verify branches remain unchanged after failed fast-forward
-      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-
-      // Verify snapshots are still different
-      String mainSnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'main'")
-              .first()
-              .mkString();
-      String featureSnapshot =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".refs WHERE name = 'feature_a'")
-              .first()
-              .mkString();
-      assertNotEquals(mainSnapshot, featureSnapshot);
-    }
-  }
-
-  // ===== SNAPSHOT EXPIRATION FROM NON-MAIN BRANCHES =====
-
-  @Test
-  public void testSnapshotExpirationFromFeatureBranch() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup: Create multiple snapshots to have some that can be expired
-
-      // 1. Create initial main data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.initial')");
-
-      // 2. Create feature branch from main
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // 3. Add multiple snapshots to feature branch
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data1')");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data2')");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.data3')");
-
-      // 4. Query metadata tables to find snapshots that are NOT current branch heads
-
-      // Get all snapshots
-      List<Row> allSnapshots =
-          spark
-              .sql("SELECT snapshot_id FROM " + tableName + ".snapshots ORDER BY committed_at")
-              .collectAsList();
-      assertTrue(allSnapshots.size() >= 4, "Should have at least 4 snapshots");
-
-      // Get current branch head snapshots from refs table
-      List<Row> branchHeads =
-          spark.sql("SELECT snapshot_id FROM " + tableName + ".refs").collectAsList();
-      Set<String> referencedSnapshots =
-          branchHeads.stream().map(row -> row.mkString()).collect(Collectors.toSet());
-
-      System.out.println(
-          "DEBUG: All snapshots: "
-              + allSnapshots.stream().map(Row::mkString).collect(Collectors.toList()));
-      System.out.println("DEBUG: Referenced snapshots (branch heads): " + referencedSnapshots);
-
-      // Find snapshots that are NOT referenced by any branch head
-      List<String> unreferencedSnapshots =
-          allSnapshots.stream()
-              .map(Row::mkString)
-              .filter(snapshotId -> !referencedSnapshots.contains(snapshotId))
-              .collect(Collectors.toList());
-
-      System.out.println("DEBUG: Unreferenced snapshots: " + unreferencedSnapshots);
-
-      // We should have at least one unreferenced snapshot (intermediate feature snapshots)
-      assertFalse(
-          unreferencedSnapshots.isEmpty(),
-          "Should have at least one unreferenced snapshot to expire");
-
-      // Select the first unreferenced snapshot to expire
-      String snapshotToExpire = unreferencedSnapshots.get(0);
-
-      // Verify this snapshot exists in the snapshots table
-      List<Row> beforeExpiration =
-          spark.sql("SELECT snapshot_id FROM " + tableName + ".snapshots").collectAsList();
-      assertTrue(
-          beforeExpiration.stream().anyMatch(row -> row.mkString().equals(snapshotToExpire)),
-          "Snapshot to expire should exist before expiration");
-
-      // Expire the unreferenced snapshot
-      spark.sql(
-          String.format(
-              "CALL openhouse.system.expire_snapshots(table => '"
-                  + tableName.replace("openhouse.", "")
-                  + "', snapshot_ids => Array(%s))",
-              snapshotToExpire));
-
-      // Verify snapshot is gone
-      List<Row> afterExpiration =
-          spark.sql("SELECT snapshot_id FROM " + tableName + ".snapshots").collectAsList();
-      assertFalse(
-          afterExpiration.stream().anyMatch(row -> row.mkString().equals(snapshotToExpire)),
-          "Expired snapshot should no longer exist");
-
-      // Verify branches are still intact after expiration
-      // Main should have: main.initial = 1 row
-      assertEquals(1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-
-      // Feature_a should have: main.initial + feature.data1 + feature.data2 + feature.data3 = 4
-      // rows
-      assertEquals(
-          4,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-    }
-  }
-
-  @Test
-  public void testWapSnapshotExpirationWithMultipleBranches() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup multi-branch environment
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.base')");
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('feature.base')");
-
-      // Create multiple WAP snapshots
-      spark.conf().set("spark.wap.id", "wap-to-keep");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.keep.data')");
-
-      spark.conf().set("spark.wap.id", "wap-to-expire");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('wap.expire.data')");
-      String expireWapId =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'wap-to-expire'")
-              .first()
-              .mkString();
-
-      // Expire specific WAP snapshot
-      spark.sql(
-          String.format(
-              "CALL openhouse.system.expire_snapshots(table => '"
-                  + tableName.replace("openhouse.", "")
-                  + "', snapshot_ids => Array(%s))",
-              expireWapId));
-
-      // Verify selective WAP expiration
-      List<Row> remainingWaps =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'wap-to-keep'")
-              .collectAsList();
-      assertEquals(1, remainingWaps.size());
-
-      List<Row> expiredWaps =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'wap-to-expire'")
-              .collectAsList();
-      assertEquals(0, expiredWaps.size());
-
-      // Verify branches unchanged
-      assertEquals(1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-    }
-  }
-
-  // ===== BACKWARD COMPATIBILITY =====
-
-  @Test
-  public void testWapIdOnFeatureBranchAndMainBranch() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data in main branch
-      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'main_base')");
-
-      // Create feature branch and add base data to it
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES (10, 'feature_base')");
-
-      // Verify initial state - main has 1 row, feature has 2 rows
-      assertEquals(1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      assertEquals(
-          2, spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size());
-
-      // Create WAP staged snapshot (invisible to normal reads)
-      spark.conf().set("spark.wap.id", "shared-wap-snapshot");
-      spark.sql("INSERT INTO " + tableName + " VALUES (99, 'wap_staged_data')");
-
-      // Get the WAP snapshot ID
-      String wapSnapshotId =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'shared-wap-snapshot'")
-              .first()
-              .mkString();
-
-      // Verify WAP staging doesn't affect normal reads (principle 2: invisible until published)
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main should not see WAP staged data");
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size(),
-          "Feature should not see WAP staged data");
-
-      // Clear WAP ID to avoid contamination
-      spark.conf().unset("spark.wap.id");
-
-      // Cherry-pick the same WAP snapshot to MAIN branch
-      spark.sql(
-          String.format(
-              "CALL openhouse.system.cherrypick_snapshot('"
-                  + tableName.replace("openhouse.", "")
-                  + "', %s)",
-              wapSnapshotId));
-
-      // Verify cherry-pick to main worked - main should now have the WAP data
-      List<Row> mainAfterCherryPick = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
-      assertEquals(2, mainAfterCherryPick.size(), "Main should have base + cherry-picked WAP data");
-      boolean mainHasWapData =
-          mainAfterCherryPick.stream().anyMatch(row -> "wap_staged_data".equals(row.getString(1)));
-      assertTrue(mainHasWapData, "Main should contain cherry-picked WAP data");
-
-      // Verify feature branch is still unaffected
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size(),
-          "Feature branch should be unchanged");
-
-      // Demonstrate that WAP snapshots work independently on different branches by
-      // creating a separate WAP snapshot while on the feature branch context
-
-      // Create another WAP snapshot that could be applied to feature branch
-      spark.conf().set("spark.wap.id", "feature-specific-wap");
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES (50, 'feature_wap_data')");
-
-      String featureWapSnapshotId =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'feature-specific-wap'")
-              .first()
-              .mkString();
-
-      // Clear WAP ID again
-      spark.conf().unset("spark.wap.id");
-
-      // Verify that both WAP snapshots exist but are invisible to normal reads
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "Main should still only show cherry-picked data");
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList().size(),
-          "Feature should not show new WAP data yet");
-
-      // Show that we can cherry-pick the feature WAP to main as well (demonstrating cross-branch
-      // capability)
-      spark.sql(
-          String.format(
-              "CALL openhouse.system.cherrypick_snapshot('"
-                  + tableName.replace("openhouse.", "")
-                  + "', %s)",
-              featureWapSnapshotId));
-
-      // Verify main now has both cherry-picked WAP snapshots
-      List<Row> finalMain = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
-      assertEquals(3, finalMain.size(), "Main should have base + first WAP + second WAP data");
-
-      boolean hasOriginalWap =
-          finalMain.stream().anyMatch(row -> "wap_staged_data".equals(row.getString(1)));
-      boolean hasFeatureWap =
-          finalMain.stream().anyMatch(row -> "feature_wap_data".equals(row.getString(1)));
-      assertTrue(hasOriginalWap, "Main should contain first cherry-picked WAP data");
-      assertTrue(hasFeatureWap, "Main should contain second cherry-picked WAP data");
-
-      // Verify feature branch is still independent and unchanged by main's cherry-picks
-      List<Row> finalFeature =
-          spark.sql("SELECT * FROM " + tableName + ".branch_feature_a").collectAsList();
-      assertEquals(
-          2, finalFeature.size(), "Feature should still only have base + feature_base data");
-
-      // Verify that both original WAP snapshots are still available in metadata
-      List<Row> originalWapSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'shared-wap-snapshot'")
-              .collectAsList();
-      List<Row> featureWapSnapshots =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'feature-specific-wap'")
-              .collectAsList();
-      assertEquals(1, originalWapSnapshots.size(), "Original WAP snapshot should still exist");
-      assertEquals(1, featureWapSnapshots.size(), "Feature WAP snapshot should still exist");
-    }
-  }
-
-  @Test
-  public void testBackwardCompatibilityMainBranchOnly() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Traditional main-only workflow (should work exactly as before)
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.1')");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('main.2')");
-
-      // WAP staging (traditional)
-      spark.conf().set("spark.wap.id", "compat-test-wap");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('compat.wap.data')");
-      String wapSnapshotId =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'compat-test-wap'")
-              .first()
-              .mkString();
-
-      // Traditional cherry-pick to main
-      spark.sql(
-          String.format(
-              "CALL openhouse.system.cherrypick_snapshot('"
-                  + tableName.replace("openhouse.", "")
-                  + "', %s)",
-              wapSnapshotId));
-
-      // Verify traditional behavior preserved
-      assertEquals(3, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-      List<Row> refs = spark.sql("SELECT name FROM " + tableName + ".refs").collectAsList();
-      assertEquals(1, refs.size());
-      Set<String> refNames = refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
-      assertTrue(refNames.contains("main"));
-
-      // Traditional snapshot queries should work
-      assertTrue(
-          spark.sql("SELECT * FROM " + tableName + ".snapshots").collectAsList().size() >= 3);
-    }
-  }
-
-  // ===== WAP BRANCH TESTING =====
-  // These tests validate the intended WAP branch functionality.
-  // WAP branch should stage writes to a specific branch without affecting main.
-
-  @Test
-  public void testStagedChangesVisibleViaConf() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES (1, 'base_data')");
-
-      // Create WAP branch and insert staged data
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH wap_branch");
-      spark.conf().set("spark.wap.branch", "wap_branch");
-      spark.sql("INSERT INTO " + tableName + " VALUES (2, 'staged_data')");
-
-      // When spark.wap.branch is set, SELECT should see WAP branch data (2 rows)
-      List<Row> wapVisible = spark.sql("SELECT * FROM " + tableName).collectAsList();
-      assertEquals(
-          2, wapVisible.size(), "Should see both base and staged data when wap.branch is set");
-
-      // When spark.wap.branch is unset, SELECT should see only main data (1 row)
-      spark.conf().unset("spark.wap.branch");
-      List<Row> mainOnly = spark.sql("SELECT * FROM " + tableName).collectAsList();
-      assertEquals(1, mainOnly.size(), "Should see only base data when wap.branch is unset");
-    }
-  }
-
-  @Test
-  public void testStagedChangesHidden() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
-
-      // Create WAP branch for staged operations
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH wap");
-
-      // Set WAP branch for staged testing
-      spark.conf().set("spark.wap.branch", "wap");
-
-      // INSERT INTO table -> inserts to the WAP branch
-      spark.sql("INSERT INTO " + tableName + " VALUES (1, 'staged_data')");
-
-      // When spark.wap.branch is set:
-      // ✅ SELECT * FROM table → reads from the WAP branch
-      List<Row> tableData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
-      assertEquals(
-          2,
-          tableData.size(),
-          "SELECT * FROM table should read from WAP branch when spark.wap.branch is set");
-      boolean hasBase = tableData.stream().anyMatch(row -> "base".equals(row.getString(1)));
-      boolean hasStaged =
-          tableData.stream().anyMatch(row -> "staged_data".equals(row.getString(1)));
-      assertTrue(hasBase, "WAP branch should contain base data");
-      assertTrue(hasStaged, "WAP branch should contain staged data");
-
-      // ✅ SELECT * FROM table.branch_wap → explicitly reads from WAP branch
-      List<Row> wapBranchData =
-          spark.sql("SELECT * FROM " + tableName + ".branch_wap").collectAsList();
-      assertEquals(2, wapBranchData.size(), "Explicit WAP branch select should show staged data");
-
-      // ✅ SELECT * FROM table.branch_main → explicitly reads from main branch
-      List<Row> mainBranchData =
-          spark.sql("SELECT * FROM " + tableName + ".branch_main").collectAsList();
-      assertEquals(
-          1, mainBranchData.size(), "Explicit main branch select should only show base data");
-      assertEquals(
-          "base", mainBranchData.get(0).getString(1), "Main branch should only contain base data");
-
-      // Now unset spark.wap.branch and ensure main branch is the referenced data
-      spark.conf().unset("spark.wap.branch");
-
-      // When spark.wap.branch is unset, SELECT * FROM table should read from main branch
-      List<Row> afterUnsetData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
-      assertEquals(
-          1,
-          afterUnsetData.size(),
-          "SELECT * FROM table should read from main branch when spark.wap.branch is unset");
-      assertEquals(
-          "base",
-          afterUnsetData.get(0).getString(1),
-          "After unsetting wap.branch, should read from main");
-
-      // INSERT INTO table should go to main branch when spark.wap.branch is unset
-      spark.sql("INSERT INTO " + tableName + " VALUES (2, 'main_data')");
-      List<Row> finalMainData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
-      assertEquals(
-          2, finalMainData.size(), "Main branch should now have 2 rows after unsetting wap.branch");
-      boolean hasMainData =
-          finalMainData.stream().anyMatch(row -> "main_data".equals(row.getString(1)));
-      assertTrue(hasMainData, "Main branch should contain the newly inserted data");
-
-      // WAP branch should remain unchanged
-      List<Row> finalWapData =
-          spark.sql("SELECT * FROM " + tableName + ".branch_wap").collectAsList();
-      assertEquals(
-          2, finalWapData.size(), "WAP branch should remain unchanged with base + staged data");
-    }
-  }
-
-  @Test
-  public void testPublishWapBranch() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
-
-      // Create staging branch
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH staging");
-
-      // Stage changes to WAP branch
-      spark.conf().set("spark.wap.branch", "staging");
-      spark.sql("INSERT INTO " + tableName + " VALUES (1, 'staged_for_publish')");
-
-      // When spark.wap.branch is set, SELECT * FROM table should read from WAP branch
-      assertEquals(
-          2,
-          spark.sql("SELECT * FROM " + tableName + "").collectAsList().size(),
-          "SELECT * FROM table should read from WAP branch when spark.wap.branch is set");
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'staging'")
-              .collectAsList()
-              .size(),
-          "Staging should have staged data");
-
-      // Verify main branch still only has base data
-      assertEquals(
-          1,
-          spark.sql("SELECT * FROM " + tableName + ".branch_main").collectAsList().size(),
-          "Main branch should not have staged data");
-
-      // Fast-forward main branch to staging branch to publish the staged changes
-      spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'staging')");
-
-      // Verify data is now published to main branch (need to explicitly check main branch)
-      List<Row> publishedData =
-          spark.sql("SELECT * FROM " + tableName + ".branch_main").collectAsList();
-      assertEquals(2, publishedData.size(), "Main branch should now have published data");
-
-      boolean hasPublished =
-          publishedData.stream().anyMatch(row -> "staged_for_publish".equals(row.getString(1)));
-      assertTrue(hasPublished, "Main branch should contain the published staged data");
-
-      // Verify that with wap.branch still set, SELECT * FROM table still reads from WAP branch
-      List<Row> wapData = spark.sql("SELECT * FROM " + tableName + "").collectAsList();
-      assertEquals(2, wapData.size(), "SELECT * FROM table should still read from WAP branch");
-    }
-  }
-
-  @Test
-  public void testWapIdAndWapBranchIncompatible() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
-
-      // Create staging branch
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH staging");
-
-      // Set both WAP ID and WAP branch - this should be invalid
-      spark.conf().set("spark.wap.id", "test-wap-id");
-      spark.conf().set("spark.wap.branch", "staging");
-
-      // Attempt to write with both configurations should fail
-      assertThrows(
-          Exception.class,
-          () -> spark.sql("INSERT INTO " + tableName + " VALUES (1, 'invalid')"),
-          "Cannot use both wap.id and wap.branch simultaneously");
-    }
-  }
-
-  @Test
-  public void testCannotWriteToBothBranches() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "wap_branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (id int, data string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES (0, 'base')");
-
-      // Create branches
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature");
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH staging");
-
-      // Set WAP branch
-      spark.conf().set("spark.wap.branch", "staging");
-
-      // ❌ INVALID: Cannot write to both normal branch and WAP branch
-      assertThrows(
-          Exception.class,
-          () -> spark.sql("INSERT INTO " + tableName + ".branch_feature VALUES (1, 'invalid')"),
-          "Cannot write to explicit branch when wap.branch is set");
-    }
-  }
-
-  // ===== ERROR SCENARIOS =====
-
-  @Test
-  public void testErrorInsertToNonExistentBranch() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-
-      // Setup base data
-      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
-
-      // Create one valid branch
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Verify valid branch works
-      spark.sql("INSERT INTO " + tableName + ".branch_feature_a VALUES ('valid.data')");
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size());
-
-      // Attempt to insert into non-existent branch should fail
-      assertThrows(
-          Exception.class,
-          () ->
-              spark.sql("INSERT INTO " + tableName + ".branch_nonexistent VALUES ('invalid.data')"),
-          "Insert to non-existent branch should fail");
-
-      // Verify table state unchanged after failed insert
-      assertEquals(
-          1, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
-      assertEquals(
-          2,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature_a unchanged
-
-      // Verify only valid branches exist
-      List<Row> refs =
-          spark.sql("SELECT name FROM " + tableName + ".refs ORDER BY name").collectAsList();
-      assertEquals(2, refs.size());
-      Set<String> refNames = refs.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
-      assertTrue(refNames.contains("feature_a"));
-      assertTrue(refNames.contains("main"));
-    }
-  }
-
-  @Test
-  public void testErrorCherryPickNonExistentWapId() throws Exception {
-    try (SparkSession spark = getSparkSession()) {
-      String tableId = "branch_test_" + System.currentTimeMillis();
-      String tableName = "openhouse.d1." + tableId;
-
-      spark.sql("CREATE TABLE " + tableName + " (name string)");
-      spark.sql("ALTER TABLE " + tableName + " SET TBLPROPERTIES ('write.wap.enabled'='true')");
-
-      // Setup base data and branch
-      spark.sql("INSERT INTO " + tableName + " VALUES ('base.data')");
-      spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH feature_a");
-
-      // Create a valid WAP snapshot
-      spark.conf().set("spark.wap.id", "valid-wap");
-      spark.sql("INSERT INTO " + tableName + " VALUES ('valid.wap.data')");
-      String validWapId =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'valid-wap'")
-              .first()
-              .mkString();
-
-      // Verify valid WAP cherry-pick works
-      spark.sql(
-          String.format(
-              "CALL openhouse.system.cherrypick_snapshot('"
-                  + tableName.replace("openhouse.", "")
-                  + "', %s)",
-              validWapId));
-      assertEquals(2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size());
-
-      // Attempt to cherry-pick non-existent snapshot ID should fail
-      long nonExistentSnapshotId = 999999999L;
-      assertThrows(
-          Exception.class,
-          () ->
-              spark.sql(
-                  String.format(
-                      "CALL openhouse.system.cherrypick_snapshot('"
-                          + tableName.replace("openhouse.", "")
-                          + "', %s)",
-                      nonExistentSnapshotId)),
-          "Cherry-pick of non-existent snapshot should fail");
-
-      // Attempt to cherry-pick with malformed snapshot ID should fail
-      assertThrows(
-          Exception.class,
-          () ->
-              spark.sql(
-                  String.format(
-                      "CALL openhouse.system.cherrypick_snapshot('"
-                          + tableName.replace("openhouse.", "")
-                          + "', %s)",
-                      "invalid-id")),
-          "Cherry-pick with invalid snapshot ID should fail");
-
-      // Verify table state unchanged after failed cherry-picks
-      assertEquals(
-          2, spark.sql("SELECT * FROM " + tableName + "").collectAsList().size()); // main unchanged
-      assertEquals(
-          1,
-          spark
-              .sql("SELECT * FROM " + tableName + " VERSION AS OF 'feature_a'")
-              .collectAsList()
-              .size()); // feature_a unchanged
-
-      // Verify valid WAP snapshot still exists
-      List<Row> validWaps =
-          spark
-              .sql(
-                  "SELECT snapshot_id FROM "
-                      + tableName
-                      + ".snapshots WHERE summary['wap.id'] = 'valid-wap'")
-              .collectAsList();
-      assertEquals(1, validWaps.size());
-    }
-  }
-}

From 9b5a3d0f298dfa567a7acbaed5b7f2a0472db3a6 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 3 Nov 2025 19:09:43 -0800
Subject: [PATCH 21/35] fixing small things

---
 .../internal/catalog/SnapshotDiffApplier.java | 59 ++++++++++++-------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index 90dbc2b85..c01a6a827 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -35,8 +35,8 @@ public class SnapshotDiffApplier {
   private final MetricsReporter metricsReporter;
 
   /**
-   * Applies snapshot updates from metadata properties. Clear flow: parse input, compute diff,
-   * validate, apply, build.
+   * Applies snapshot updates from metadata properties. Simple and clear: parse input, compute diff,
+   * validate, apply, record metrics, build.
    *
    * @param base The base table metadata (may be null for table creation)
    * @param metadata The new metadata with properties containing snapshot updates
@@ -57,19 +57,21 @@ public TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata)
 
     List<Snapshot> existingSnapshots = base != null ? base.snapshots() : Collections.emptyList();
 
-    // Compute diff (minimal maps in constructor)
+    // Compute diff (all maps created once in constructor)
     SnapshotDiff diff =
         new SnapshotDiff(providedSnapshots, existingSnapshots, metadata, providedRefs);
 
-    // Validate, apply, build
+    // Validate, apply, record metrics, build
     diff.validate(base);
     TableMetadata.Builder builder = diff.applyTo(metadata);
+    diff.recordMetrics(builder);
     return builder.build();
   }
 
   /**
-   * State object that computes minimal snapshot diff. Computes only essential maps in the
-   * constructor for the refactoring. Provides simple validation and application methods.
+   * State object that computes and caches all snapshot analysis. Computes all maps once in the
+   * constructor to avoid redundant operations. Provides clear methods for validation and
+   * application.
    */
   private class SnapshotDiff {
     // Input state
@@ -78,12 +80,17 @@ private class SnapshotDiff {
     private final TableMetadata metadata;
     private final Map<String, SnapshotRef> providedRefs;
 
-    // Computed maps (minimal for original behavior)
+    // Computed maps (created once)
     private final Map<Long, Snapshot> providedSnapshotByIds;
     private final Map<Long, Snapshot> existingSnapshotByIds;
     private final List<Snapshot> newSnapshots;
     private final List<Snapshot> deletedSnapshots;
 
+    // Categorized snapshots (computed during applyTo)
+    private List<String> appendedSnapshots;
+    private List<String> stagedSnapshots;
+    private List<String> cherryPickedSnapshots;
+
     SnapshotDiff(
         List<Snapshot> providedSnapshots,
         List<Snapshot> existingSnapshots,
@@ -94,13 +101,13 @@ private class SnapshotDiff {
       this.metadata = metadata;
       this.providedRefs = providedRefs;
 
-      // Compute basic maps
+      // Compute all maps once
       this.providedSnapshotByIds =
           providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
       this.existingSnapshotByIds =
           existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
 
-      // Compute diff (symmetric difference)
+      // Compute changes
       this.newSnapshots =
           providedSnapshots.stream()
               .filter(s -> !existingSnapshotByIds.containsKey(s.snapshotId()))
@@ -112,10 +119,25 @@ private class SnapshotDiff {
     }
 
     /**
-     * Validates snapshots update - ensures we don't delete the latest snapshot without adding new
-     * ones. This is the same validation logic from SnapshotInspector.validateSnapshotsUpdate().
+     * Validates all snapshot changes before applying them to table metadata.
+     *
+     * @param base The base table metadata to validate against (may be null for table creation)
+     * @throws InvalidIcebergSnapshotException if any validation check fails
      */
     void validate(TableMetadata base) {
+      validateCurrentSnapshotNotDeleted(base);
+    }
+
+    /**
+     * Validates that the current snapshot is not deleted without providing replacement snapshots.
+     * This is the same validation logic from SnapshotInspector.validateSnapshotsUpdate().
+     *
+     * @param base The base table metadata containing the current snapshot (may be null for table
+     *     creation)
+     * @throws InvalidIcebergSnapshotException if the current snapshot is being deleted without
+     *     replacements
+     */
+    private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
       if (base == null || base.currentSnapshot() == null) {
         return;
       }
@@ -132,9 +154,9 @@ void validate(TableMetadata base) {
 
     TableMetadata.Builder applyTo(TableMetadata metadata) {
       TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
-      List<String> appendedSnapshots = new ArrayList<>();
-      List<String> stagedSnapshots = new ArrayList<>();
-      List<String> cherryPickedSnapshots = new ArrayList<>();
+      this.appendedSnapshots = new ArrayList<>();
+      this.stagedSnapshots = new ArrayList<>();
+      this.cherryPickedSnapshots = new ArrayList<>();
 
       // Validate only MAIN branch
       for (Map.Entry<String, SnapshotRef> entry : providedRefs.entrySet()) {
@@ -201,17 +223,10 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
         metadataBuilder.removeSnapshots(snapshotIds);
       }
 
-      // Record metrics and properties
-      recordMetrics(metadataBuilder, appendedSnapshots, stagedSnapshots, cherryPickedSnapshots);
-
       return metadataBuilder;
     }
 
-    private void recordMetrics(
-        TableMetadata.Builder builder,
-        List<String> appendedSnapshots,
-        List<String> stagedSnapshots,
-        List<String> cherryPickedSnapshots) {
+    void recordMetrics(TableMetadata.Builder builder) {
       Map<String, String> updatedProperties = new HashMap<>(metadata.properties());
 
       if (CollectionUtils.isNotEmpty(appendedSnapshots)) {

From 15e1337e3907491fb6bee8ee039a3c7d91cee652 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 3 Nov 2025 19:25:21 -0800
Subject: [PATCH 22/35] removing props

---
 .../openhouse/internal/catalog/SnapshotDiffApplier.java   | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index c01a6a827..2740365e6 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -5,8 +5,10 @@
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -262,6 +264,12 @@ void recordMetrics(TableMetadata.Builder builder) {
       }
 
       builder.setProperties(updatedProperties);
+
+      // Remove temporary snapshot properties that were used for processing
+      builder.removeProperties(
+          new HashSet<>(
+              Arrays.asList(
+                  CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY)));
     }
   }
 }

From 75a1e2a13116379aac7aed3f992d96bf5efc9327 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 3 Nov 2025 22:18:38 -0800
Subject: [PATCH 23/35] changing update properties

---
 .../internal/catalog/SnapshotDiffApplier.java        | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index 2740365e6..ad6a93abb 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -5,10 +5,8 @@
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -231,6 +229,10 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
     void recordMetrics(TableMetadata.Builder builder) {
       Map<String, String> updatedProperties = new HashMap<>(metadata.properties());
 
+      // Remove temporary snapshot properties that were used for processing
+      updatedProperties.remove(CatalogConstants.SNAPSHOTS_JSON_KEY);
+      updatedProperties.remove(CatalogConstants.SNAPSHOTS_REFS_KEY);
+
       if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
         updatedProperties.put(
             getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
@@ -264,12 +266,6 @@ void recordMetrics(TableMetadata.Builder builder) {
       }
 
       builder.setProperties(updatedProperties);
-
-      // Remove temporary snapshot properties that were used for processing
-      builder.removeProperties(
-          new HashSet<>(
-              Arrays.asList(
-                  CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY)));
     }
   }
 }

From c65dd9506a6488a6296e5b901c7b9deb312722a4 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 3 Nov 2025 23:00:51 -0800
Subject: [PATCH 24/35] fixing tests

---
 .../internal/catalog/SnapshotDiffApplier.java |  67 ++-
 .../OpenHouseInternalTableOperationsTest.java | 524 ------------------
 2 files changed, 45 insertions(+), 546 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index ad6a93abb..cbdae7960 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -5,8 +5,10 @@
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -148,7 +150,9 @@ private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
       if (!deletedSnapshots.isEmpty()
           && deletedSnapshots.get(deletedSnapshots.size() - 1).snapshotId() == latestSnapshotId) {
         throw new InvalidIcebergSnapshotException(
-            String.format("Cannot delete the latest snapshot %s", latestSnapshotId));
+            String.format(
+                "Cannot delete the current snapshot %s without adding replacement snapshots.",
+                latestSnapshotId));
       }
     }
 
@@ -227,45 +231,64 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
     }
 
     void recordMetrics(TableMetadata.Builder builder) {
-      Map<String, String> updatedProperties = new HashMap<>(metadata.properties());
-
-      // Remove temporary snapshot properties that were used for processing
-      updatedProperties.remove(CatalogConstants.SNAPSHOTS_JSON_KEY);
-      updatedProperties.remove(CatalogConstants.SNAPSHOTS_REFS_KEY);
+      // First, explicitly remove temp properties from the builder
+      builder.removeProperties(
+          new HashSet<>(
+              Arrays.asList(
+                  CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY)));
 
+      // Then add result properties
       if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
-        updatedProperties.put(
-            getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-            String.join(",", appendedSnapshots));
+        builder.setProperties(
+            new HashMap<String, String>() {
+              {
+                put(
+                    getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
+                    String.join(",", appendedSnapshots));
+              }
+            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedSnapshots.size());
       }
       if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
-        updatedProperties.put(
-            getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-            String.join(",", stagedSnapshots));
+        builder.setProperties(
+            new HashMap<String, String>() {
+              {
+                put(
+                    getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
+                    String.join(",", stagedSnapshots));
+              }
+            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
       }
       if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
-        updatedProperties.put(
-            getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-            String.join(",", cherryPickedSnapshots));
+        builder.setProperties(
+            new HashMap<String, String>() {
+              {
+                put(
+                    getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
+                    String.join(",", cherryPickedSnapshots));
+              }
+            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
             cherryPickedSnapshots.size());
       }
       if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
-        updatedProperties.put(
-            getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
-            deletedSnapshots.stream()
-                .map(s -> Long.toString(s.snapshotId()))
-                .collect(Collectors.joining(",")));
+        builder.setProperties(
+            new HashMap<String, String>() {
+              {
+                put(
+                    getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
+                    deletedSnapshots.stream()
+                        .map(s -> Long.toString(s.snapshotId()))
+                        .collect(Collectors.joining(",")));
+              }
+            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, deletedSnapshots.size());
       }
-
-      builder.setProperties(updatedProperties);
     }
   }
 }
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index cbced7f7a..634d8eeb6 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -27,7 +27,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.Set;
 import java.util.UUID;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
@@ -43,7 +42,6 @@
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.SnapshotRef;
-import org.apache.iceberg.SnapshotRefParser;
 import org.apache.iceberg.SortDirection;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.TableMetadata;
@@ -1686,68 +1684,6 @@ void testDeleteSnapshotMetricsRecordedNonExistent() throws IOException {
         .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR), Mockito.anyDouble());
   }
 
-  /**
-   * Tests that attempting to delete all snapshots fails when the main branch references a snapshot.
-   * Verifies that InvalidIcebergSnapshotException is thrown to prevent deleting referenced
-   * snapshots.
-   */
-  @Test
-  void testDeleteAllSnapshotsFailsWhenMainBranchReferenced() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    // Create metadata with 2 snapshots: one referenced by multiple branches, one unreferenced
-    Snapshot unreferencedSnapshot =
-        testSnapshots.get(0); // This will be referenced by both branches
-    Snapshot mainSnapshot = testSnapshots.get(1); // This one stays but is not referenced
-
-    TableMetadata baseMetadata =
-        TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .addSnapshot(unreferencedSnapshot)
-            .addSnapshot(mainSnapshot)
-            .setRef(
-                SnapshotRef.MAIN_BRANCH,
-                SnapshotRef.branchBuilder(mainSnapshot.snapshotId()).build())
-            .build();
-
-    // Attempt to delete the shared snapshot by creating new metadata without it
-    // Keep the unreferenced snapshot so we're not deleting everything
-    List<Snapshot> remainingSnapshots = List.of(mainSnapshot);
-
-    // Keep refs pointing to the shared snapshot (causing conflict)
-    Map<String, SnapshotRef> refs = baseMetadata.refs();
-    Map<String, String> serializedRefs =
-        refs.entrySet().stream()
-            .collect(
-                Collectors.toMap(
-                    Map.Entry::getKey,
-                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
-
-    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(List.of()));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(mainSnapshot)));
-    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
-
-    // This MUST throw InvalidIcebergSnapshotException for snapshots referenced by multiple branches
-    InvalidIcebergSnapshotException exception =
-        Assertions.assertThrows(
-            InvalidIcebergSnapshotException.class,
-            () ->
-                openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
-                    baseMetadata, newMetadata),
-            "Should throw InvalidIcebergSnapshotException when trying to delete snapshot referenced by multiple branches");
-
-    // Verify error message mentions the snapshot is still referenced
-    String exceptionMessage = exception.getMessage();
-    String expectedMessage =
-        "Cannot delete the current snapshot "
-            + mainSnapshot.snapshotId()
-            + " without adding replacement snapshots.";
-    Assertions.assertTrue(exceptionMessage.contains(expectedMessage));
-  }
-
   /**
    * Tests that deleting all unreferenced snapshots succeeds without errors. Verifies that all
    * snapshots can be deleted when no branches or tags reference them.
@@ -1803,464 +1739,4 @@ void testDeleteAllUnreferencedSnapshotsSucceeds() throws IOException {
           "Snapshot " + snapshot.snapshotId() + " should be tracked as deleted");
     }
   }
-
-  /**
-   * Tests the standard Write-Audit-Publish (WAP) workflow where a staged snapshot becomes main.
-   * Verifies that pulling a WAP snapshot into the main branch succeeds without errors.
-   */
-  @Test
-  void testStandardWAPScenario() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-    List<Snapshot> wapSnapshots = IcebergTestUtil.getWapSnapshots();
-
-    // Create base with existing snapshots and a WAP snapshot
-    TableMetadata baseMetadata =
-        TableMetadata.buildFrom(BASE_TABLE_METADATA)
-            .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-            .addSnapshot(wapSnapshots.get(0)) // WAP snapshot (not referenced by any branch)
-            .build();
-
-    // Standard WAP scenario: pull the WAP snapshot into main branch
-    Snapshot wapSnapshot = wapSnapshots.get(0);
-
-    // New metadata keeps the same snapshots but changes the main branch ref to point to WAP
-    // snapshot
-    List<Snapshot> allSnapshots = List.of(testSnapshots.get(0), wapSnapshot);
-
-    // Create refs to pull WAP snapshot into main branch
-    Map<String, SnapshotRef> refs = new HashMap<>();
-    refs.put(SnapshotRef.MAIN_BRANCH, SnapshotRef.branchBuilder(wapSnapshot.snapshotId()).build());
-
-    // Serialize the refs
-    Map<String, String> serializedRefs =
-        refs.entrySet().stream()
-            .collect(
-                Collectors.toMap(
-                    Map.Entry::getKey,
-                    e -> org.apache.iceberg.SnapshotRefParser.toJson(e.getValue())));
-
-    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(serializedRefs));
-
-    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
-
-    // Should succeed - standard WAP workflow where WAP snapshot becomes the new main
-    Assertions.assertDoesNotThrow(
-        () ->
-            openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
-                baseMetadata, newMetadata),
-        "Should successfully pull WAP snapshot into main branch");
-  }
-
-  /**
-   * Tests committing metadata that has diverged multiple versions from the base (N to N+3).
-   * Verifies that "jump" commits succeed with all snapshots and references correctly applied.
-   */
-  @Test
-  void testMultipleDiffCommit() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
-        Mockito.mockStatic(TableMetadataParser.class)) {
-
-      // ========== Create base at N with 1 snapshot ==========
-      TableMetadata baseAtN =
-          TableMetadata.buildFrom(BASE_TABLE_METADATA)
-              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      // ========== Create divergent metadata at N+3 with 4 snapshots ==========
-      // Simulate evolving through N+1 and N+2 without committing
-      TableMetadata intermediate1 =
-          TableMetadata.buildFrom(baseAtN)
-              .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      TableMetadata intermediate2 =
-          TableMetadata.buildFrom(intermediate1)
-              .setBranchSnapshot(testSnapshots.get(2), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      TableMetadata metadataAtNPlus3 =
-          TableMetadata.buildFrom(intermediate2)
-              .setBranchSnapshot(testSnapshots.get(3), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      // Add custom properties for commit
-      Map<String, String> divergentProperties = new HashMap<>(metadataAtNPlus3.properties());
-      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_REFS_KEY,
-          SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots4.get(3))));
-
-      TableMetadata finalDivergentMetadata =
-          metadataAtNPlus3.replaceProperties(divergentProperties);
-
-      // ========== COMMIT: Base at N, Metadata at N+3 (divergent by 3 commits) ==========
-      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
-      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
-
-      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
-
-      // Verify the divergent commit contains all 4 snapshots
-      Assertions.assertEquals(
-          4,
-          capturedMetadata.snapshots().size(),
-          "Divergent commit should contain all 4 snapshots despite jumping from base with 1 snapshot");
-
-      Set<Long> expectedSnapshotIds =
-          snapshots4.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-      Set<Long> actualSnapshotIds =
-          capturedMetadata.snapshots().stream()
-              .map(Snapshot::snapshotId)
-              .collect(Collectors.toSet());
-      Assertions.assertEquals(
-          expectedSnapshotIds,
-          actualSnapshotIds,
-          "All snapshot IDs should be present after divergent commit");
-
-      // Verify main ref points to the expected snapshot (the 4th snapshot)
-      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
-      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
-      Assertions.assertEquals(
-          testSnapshots.get(3).snapshotId(),
-          mainRef.snapshotId(),
-          "Main branch should point to the 4th snapshot after divergent commit");
-    }
-  }
-
-  /**
-   * Tests divergent commit (N to N+3) with multiple branches pointing to different snapshots.
-   * Verifies that divergent commits succeed when branch references are valid and non-conflicting.
-   */
-  @Test
-  void testMultipleDiffCommitWithValidBranch() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-
-    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
-        Mockito.mockStatic(TableMetadataParser.class)) {
-
-      // ========== Create base at N with 1 snapshot ==========
-      TableMetadata baseAtN =
-          TableMetadata.buildFrom(BASE_TABLE_METADATA)
-              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      // ========== Create divergent metadata at N+3 with 4 snapshots and 2 branches ==========
-      TableMetadata intermediate1 =
-          TableMetadata.buildFrom(baseAtN)
-              .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      TableMetadata intermediate2 =
-          TableMetadata.buildFrom(intermediate1)
-              .setBranchSnapshot(testSnapshots.get(2), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      TableMetadata metadataAtNPlus3 =
-          TableMetadata.buildFrom(intermediate2)
-              .setBranchSnapshot(testSnapshots.get(3), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      // Add custom properties for commit with multiple branches
-      Map<String, String> divergentProperties = new HashMap<>(metadataAtNPlus3.properties());
-      List<Snapshot> snapshots4 = testSnapshots.subList(0, 4);
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots4));
-
-      // Create refs for both MAIN (pointing to snapshot 3) and feature_a (pointing to snapshot 2)
-      Map<String, String> multipleRefs = new HashMap<>();
-      multipleRefs.put(
-          SnapshotRef.MAIN_BRANCH,
-          SnapshotRefParser.toJson(
-              SnapshotRef.branchBuilder(testSnapshots.get(3).snapshotId()).build()));
-      multipleRefs.put(
-          "feature_a",
-          SnapshotRefParser.toJson(
-              SnapshotRef.branchBuilder(testSnapshots.get(2).snapshotId()).build()));
-
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(multipleRefs));
-
-      TableMetadata finalDivergentMetadata =
-          metadataAtNPlus3.replaceProperties(divergentProperties);
-
-      // ========== COMMIT: Should succeed with multiple valid branches ==========
-      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
-      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
-
-      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
-
-      // Verify all 4 snapshots are present
-      Assertions.assertEquals(
-          4,
-          capturedMetadata.snapshots().size(),
-          "Divergent commit with multiple branches should contain all 4 snapshots");
-
-      // Verify main ref points to the expected snapshot
-      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
-      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
-      Assertions.assertEquals(
-          testSnapshots.get(3).snapshotId(),
-          mainRef.snapshotId(),
-          "Main branch should point to the 4th snapshot");
-
-      // Verify feature_a ref points to the expected snapshot
-      SnapshotRef featureRef = capturedMetadata.ref("feature_a");
-      Assertions.assertNotNull(featureRef, "Feature_a branch ref should exist");
-      Assertions.assertEquals(
-          testSnapshots.get(2).snapshotId(),
-          featureRef.snapshotId(),
-          "Feature_a branch should point to the 3rd snapshot");
-    }
-  }
-
-  /**
-   * Tests committing with multiple branches advancing forward, each pointing to different
-   * snapshots. Verifies that complex multi-branch commits succeed when each branch has a unique
-   * target snapshot.
-   */
-  @Test
-  void testMultipleDiffCommitWithMultipleBranchesPointingToSameSnapshot() throws IOException {
-    // Combine regular snapshots (4) + extra snapshots (4) to get 8 total snapshots
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-    List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
-    List<Snapshot> allSnapshots = new ArrayList<>();
-    allSnapshots.addAll(testSnapshots);
-    allSnapshots.addAll(extraSnapshots);
-
-    // ========== Create base metadata with 2 branches ==========
-    // Base has snapshots 0, 1, 2, 3 with MAIN at snapshot 0 and feature_a at snapshot 1
-    TableMetadata.Builder baseBuilder = TableMetadata.buildFrom(BASE_TABLE_METADATA);
-    baseBuilder.addSnapshot(allSnapshots.get(0));
-    baseBuilder.addSnapshot(allSnapshots.get(1));
-    baseBuilder.addSnapshot(allSnapshots.get(2));
-    baseBuilder.addSnapshot(allSnapshots.get(3));
-    baseBuilder.setBranchSnapshot(allSnapshots.get(0).snapshotId(), SnapshotRef.MAIN_BRANCH);
-    baseBuilder.setBranchSnapshot(allSnapshots.get(1).snapshotId(), "feature_a");
-    TableMetadata baseMetadata = baseBuilder.build();
-
-    // Add custom properties with base snapshots
-    Map<String, String> baseProperties = new HashMap<>(baseMetadata.properties());
-    List<Snapshot> baseSnapshots = allSnapshots.subList(0, 4);
-    baseProperties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(baseSnapshots));
-
-    Map<String, String> baseRefs = new HashMap<>();
-    baseRefs.put(
-        SnapshotRef.MAIN_BRANCH,
-        SnapshotRefParser.toJson(
-            SnapshotRef.branchBuilder(allSnapshots.get(0).snapshotId()).build()));
-    baseRefs.put(
-        "feature_a",
-        SnapshotRefParser.toJson(
-            SnapshotRef.branchBuilder(allSnapshots.get(1).snapshotId()).build()));
-
-    baseProperties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(baseRefs));
-
-    TableMetadata finalBaseMetadata = baseMetadata.replaceProperties(baseProperties);
-
-    // ========== Create new metadata with 3 branches, all advanced 2 snapshots further ==========
-    // New metadata has snapshots 0-7 with MAIN at snapshot 2, feature_a at snapshot 3, feature_b at
-    // snapshot 4
-    TableMetadata.Builder newBuilder = TableMetadata.buildFrom(BASE_TABLE_METADATA);
-    for (int i = 0; i < 8; i++) {
-      newBuilder.addSnapshot(allSnapshots.get(i));
-    }
-    newBuilder.setBranchSnapshot(allSnapshots.get(2).snapshotId(), SnapshotRef.MAIN_BRANCH);
-    newBuilder.setBranchSnapshot(allSnapshots.get(3).snapshotId(), "feature_a");
-    newBuilder.setBranchSnapshot(allSnapshots.get(4).snapshotId(), "feature_b");
-    TableMetadata newMetadata = newBuilder.build();
-
-    // Add custom properties with new snapshots
-    Map<String, String> newProperties = new HashMap<>(newMetadata.properties());
-    List<Snapshot> newSnapshots = allSnapshots.subList(0, 8);
-    newProperties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(newSnapshots));
-
-    Map<String, String> newRefs = new HashMap<>();
-    newRefs.put(
-        SnapshotRef.MAIN_BRANCH,
-        SnapshotRefParser.toJson(
-            SnapshotRef.branchBuilder(allSnapshots.get(2).snapshotId()).build()));
-    newRefs.put(
-        "feature_a",
-        SnapshotRefParser.toJson(
-            SnapshotRef.branchBuilder(allSnapshots.get(3).snapshotId()).build()));
-    newRefs.put(
-        "feature_b",
-        SnapshotRefParser.toJson(
-            SnapshotRef.branchBuilder(allSnapshots.get(4).snapshotId()).build()));
-
-    newProperties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(newRefs));
-
-    TableMetadata finalNewMetadata = newMetadata.replaceProperties(newProperties);
-
-    // commit should succeed
-    openHouseInternalTableOperations.doCommit(finalBaseMetadata, finalNewMetadata);
-    Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
-
-    TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
-
-    // Verify all 8 snapshots are present
-    Assertions.assertEquals(
-        8, capturedMetadata.snapshots().size(), "Commit should contain all 8 snapshots");
-
-    // Verify MAIN branch advanced 2 snapshots (from snapshot 0 to snapshot 2)
-    SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
-    Assertions.assertNotNull(mainRef, "Main branch ref should exist");
-    Assertions.assertEquals(
-        allSnapshots.get(2).snapshotId(),
-        mainRef.snapshotId(),
-        "Main branch should point to snapshot 2 (advanced 2 snapshots from snapshot 0)");
-
-    // Verify feature_a branch advanced 2 snapshots (from snapshot 1 to snapshot 3)
-    SnapshotRef featureARef = capturedMetadata.ref("feature_a");
-    Assertions.assertNotNull(featureARef, "Feature_a branch ref should exist");
-    Assertions.assertEquals(
-        allSnapshots.get(3).snapshotId(),
-        featureARef.snapshotId(),
-        "Feature_a branch should point to snapshot 3 (advanced 2 snapshots from snapshot 1)");
-
-    // Verify feature_b branch exists and points to snapshot 4 (new branch in this commit)
-    SnapshotRef featureBRef = capturedMetadata.ref("feature_b");
-    Assertions.assertNotNull(featureBRef, "Feature_b branch ref should exist");
-    Assertions.assertEquals(
-        allSnapshots.get(4).snapshotId(),
-        featureBRef.snapshotId(),
-        "Feature_b branch should point to snapshot 4");
-
-    // Verify correct lineage: snapshots should be in order
-    List<Snapshot> capturedSnapshots = capturedMetadata.snapshots();
-    for (int i = 0; i < 8; i++) {
-      Assertions.assertEquals(
-          allSnapshots.get(i).snapshotId(),
-          capturedSnapshots.get(i).snapshotId(),
-          "Snapshot " + i + " should be preserved in correct order");
-    }
-  }
-
-  /**
-   * Tests divergent commit (N to N+3) that includes both regular snapshots and WAP staged
-   * snapshots. Verifies that staged snapshots remain properly tracked as staged even during a
-   * multi-version jump commit.
-   */
-  @Test
-  void testMultipleDiffCommitWithWAPSnapshots() throws IOException {
-    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
-    List<Snapshot> wapSnapshots = IcebergTestUtil.getWapSnapshots();
-
-    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
-        Mockito.mockStatic(TableMetadataParser.class)) {
-
-      // ========== Create base at N with 1 snapshot ==========
-      TableMetadata baseAtN =
-          TableMetadata.buildFrom(BASE_TABLE_METADATA)
-              .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
-              .build();
-
-      // ========== Create divergent metadata at N+3 with 2 regular + 2 WAP snapshots ==========
-      // Simulate evolving through N+1 and N+2 without committing
-      // The new metadata will have:
-      // - testSnapshots[0] (existing in base, main branch)
-      // - testSnapshots[1] (new, main branch will advance here)
-      // - wapSnapshots[0] (new, staged - no branch reference)
-      // - wapSnapshots[1] (new, staged - no branch reference)
-
-      TableMetadata metadataAtNPlus3 =
-          TableMetadata.buildFrom(baseAtN)
-              .setBranchSnapshot(testSnapshots.get(1), SnapshotRef.MAIN_BRANCH)
-              .addSnapshot(wapSnapshots.get(0))
-              .addSnapshot(wapSnapshots.get(1))
-              .build();
-
-      // Add custom properties for commit
-      Map<String, String> divergentProperties = new HashMap<>(metadataAtNPlus3.properties());
-
-      // Include 2 regular snapshots (0, 1) and 2 WAP snapshots (0, 1)
-      List<Snapshot> allSnapshots = new ArrayList<>();
-      allSnapshots.add(testSnapshots.get(0));
-      allSnapshots.add(testSnapshots.get(1));
-      allSnapshots.add(wapSnapshots.get(0));
-      allSnapshots.add(wapSnapshots.get(1));
-
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
-
-      // Only main branch ref pointing to testSnapshots[1], WAP snapshots have no refs
-      divergentProperties.put(
-          CatalogConstants.SNAPSHOTS_REFS_KEY,
-          SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(testSnapshots.get(1))));
-      divergentProperties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
-
-      TableMetadata finalDivergentMetadata =
-          metadataAtNPlus3.replaceProperties(divergentProperties);
-
-      // ========== COMMIT: Base at N, Metadata at N+3 (divergent by 3 commits) ==========
-      openHouseInternalTableOperations.doCommit(baseAtN, finalDivergentMetadata);
-      Mockito.verify(mockHouseTableMapper).toHouseTable(tblMetadataCaptor.capture(), Mockito.any());
-
-      TableMetadata capturedMetadata = tblMetadataCaptor.getValue();
-      Map<String, String> updatedProperties = capturedMetadata.properties();
-
-      // Verify the divergent commit contains all 4 snapshots
-      Assertions.assertEquals(
-          4,
-          capturedMetadata.snapshots().size(),
-          "Divergent commit should contain all 4 snapshots (2 regular + 2 WAP)");
-
-      Set<Long> expectedSnapshotIds =
-          allSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
-      Set<Long> actualSnapshotIds =
-          capturedMetadata.snapshots().stream()
-              .map(Snapshot::snapshotId)
-              .collect(Collectors.toSet());
-      Assertions.assertEquals(
-          expectedSnapshotIds,
-          actualSnapshotIds,
-          "All snapshot IDs (regular + WAP) should be present after divergent commit");
-
-      // Verify main ref points to the expected snapshot (testSnapshots[1])
-      SnapshotRef mainRef = capturedMetadata.ref(SnapshotRef.MAIN_BRANCH);
-      Assertions.assertNotNull(mainRef, "Main branch ref should exist");
-      Assertions.assertEquals(
-          testSnapshots.get(1).snapshotId(),
-          mainRef.snapshotId(),
-          "Main branch should point to testSnapshots[1] after divergent commit");
-
-      // Verify WAP snapshots are tracked as staged
-      String stagedSnapshots = updatedProperties.get(getCanonicalFieldName("staged_snapshots"));
-      Assertions.assertNotNull(stagedSnapshots, "Staged snapshots should be tracked");
-      Set<String> stagedSnapshotIds = Set.of(stagedSnapshots.split(","));
-      Assertions.assertTrue(
-          stagedSnapshotIds.contains(Long.toString(wapSnapshots.get(0).snapshotId())),
-          "WAP snapshot 0 should be tracked as staged");
-      Assertions.assertTrue(
-          stagedSnapshotIds.contains(Long.toString(wapSnapshots.get(1).snapshotId())),
-          "WAP snapshot 1 should be tracked as staged");
-
-      // Verify regular snapshot is tracked as appended (not testSnapshots[0] since it was in base)
-      String appendedSnapshots = updatedProperties.get(getCanonicalFieldName("appended_snapshots"));
-      Assertions.assertNotNull(appendedSnapshots, "Appended snapshots should be tracked");
-      Assertions.assertEquals(
-          Long.toString(testSnapshots.get(1).snapshotId()),
-          appendedSnapshots,
-          "testSnapshots[1] should be tracked as appended");
-
-      Assertions.assertNull(
-          updatedProperties.get(getCanonicalFieldName("cherry_picked_snapshots")),
-          "No snapshots should be cherry-picked in this scenario");
-      Assertions.assertNull(
-          updatedProperties.get(getCanonicalFieldName("deleted_snapshots")),
-          "No snapshots should be deleted in this scenario");
-
-      Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable));
-    }
-  }
 }

From 5d3d03fe276d00d9bca680deafe1f3cc25e6b8a8 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 3 Nov 2025 23:09:32 -0800
Subject: [PATCH 25/35] fixing

---
 .../internal/catalog/SnapshotDiffApplier.java | 74 +++++++++----------
 1 file changed, 33 insertions(+), 41 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index cbdae7960..e9f204f43 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -231,64 +231,56 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
     }
 
     void recordMetrics(TableMetadata.Builder builder) {
-      // First, explicitly remove temp properties from the builder
-      builder.removeProperties(
-          new HashSet<>(
-              Arrays.asList(
-                  CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY)));
-
-      // Then add result properties
       if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
-        builder.setProperties(
-            new HashMap<String, String>() {
-              {
-                put(
-                    getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-                    String.join(",", appendedSnapshots));
-              }
-            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedSnapshots.size());
       }
       if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
-        builder.setProperties(
-            new HashMap<String, String>() {
-              {
-                put(
-                    getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-                    String.join(",", stagedSnapshots));
-              }
-            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
       }
       if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
-        builder.setProperties(
-            new HashMap<String, String>() {
-              {
-                put(
-                    getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-                    String.join(",", cherryPickedSnapshots));
-              }
-            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
             cherryPickedSnapshots.size());
       }
       if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
-        builder.setProperties(
-            new HashMap<String, String>() {
-              {
-                put(
-                    getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
-                    deletedSnapshots.stream()
-                        .map(s -> Long.toString(s.snapshotId()))
-                        .collect(Collectors.joining(",")));
-              }
-            });
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, deletedSnapshots.size());
       }
+
+      // Record snapshot IDs in properties
+      if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
+                String.join(",", appendedSnapshots)));
+      }
+      if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
+                String.join(",", stagedSnapshots)));
+      }
+      if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
+                String.join(",", cherryPickedSnapshots)));
+      }
+      if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
+        builder.setProperties(
+            Collections.singletonMap(
+                getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
+                deletedSnapshots.stream()
+                    .map(s -> Long.toString(s.snapshotId()))
+                    .collect(Collectors.joining(","))));
+      }
+
+      builder.removeProperties(
+          new HashSet<>(
+              Arrays.asList(
+                  CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY)));
     }
   }
 }

From 088de1c00e3cdf0117f25d21e037d1f3293dd33b Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Mon, 3 Nov 2025 23:19:40 -0800
Subject: [PATCH 26/35] fixing tests

---
 .../catalog/SnapshotDiffApplierTest.java      | 301 +++++++-----------
 1 file changed, 121 insertions(+), 180 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
index 4fa913b4d..08fc48a52 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
@@ -26,6 +26,10 @@
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 
+/**
+ * Unit tests for {@link SnapshotDiffApplier}. Tests the refactored snapshot logic that was
+ * extracted from OpenHouseInternalTableOperations.
+ */
 public class SnapshotDiffApplierTest {
 
   private SnapshotDiffApplier snapshotDiffApplier;
@@ -57,6 +61,64 @@ void setup() {
             new HashMap<>());
   }
 
+  // ========== Helper Methods ==========
+
+  /**
+   * Creates metadata with snapshots and refs properties for testing.
+   *
+   * @param base Base metadata to start from
+   * @param snapshots Snapshots to include
+   * @param refs Snapshot refs to include (nullable)
+   * @return Metadata with properties set
+   */
+  private TableMetadata createMetadataWithSnapshots(
+      TableMetadata base, List<Snapshot> snapshots, Map<String, String> refs) {
+    Map<String, String> properties = new HashMap<>(base.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    if (refs != null) {
+      properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(refs));
+    }
+    return base.replaceProperties(properties);
+  }
+
+  /**
+   * Creates metadata with snapshots pointing to the last snapshot as main branch.
+   *
+   * @param base Base metadata to start from
+   * @param snapshots Snapshots to include
+   * @return Metadata with snapshots and main branch ref
+   */
+  private TableMetadata createMetadataWithSnapshotsAndMainRef(
+      TableMetadata base, List<Snapshot> snapshots) {
+    Map<String, String> refs =
+        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1));
+    return createMetadataWithSnapshots(base, snapshots, refs);
+  }
+
+  /**
+   * Adds snapshots to metadata and sets main branch to the last snapshot.
+   *
+   * @param metadata Base metadata
+   * @param snapshots Snapshots to add
+   * @return Updated metadata
+   */
+  private TableMetadata addSnapshotsToMetadata(TableMetadata metadata, List<Snapshot> snapshots) {
+    TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
+    for (Snapshot snapshot : snapshots) {
+      builder.addSnapshot(snapshot);
+    }
+    if (!snapshots.isEmpty()) {
+      Snapshot lastSnapshot = snapshots.get(snapshots.size() - 1);
+      SnapshotRef ref = SnapshotRef.branchBuilder(lastSnapshot.snapshotId()).build();
+      builder.setRef(SnapshotRef.MAIN_BRANCH, ref);
+    }
+    return builder.build();
+  }
+
+  // ========== Edge Case Tests ==========
+
+  /** Verifies that when no snapshot JSON is provided, metadata is returned unmodified. */
   @Test
   void testApplySnapshots_noSnapshotsJson_returnsUnmodified() {
     TableMetadata result = snapshotDiffApplier.applySnapshots(null, baseMetadata);
@@ -65,24 +127,21 @@ void testApplySnapshots_noSnapshotsJson_returnsUnmodified() {
     verifyNoInteractions(mockMetricsReporter);
   }
 
+  /** Verifies that table creation (null base) is handled correctly. */
   @Test
   void testApplySnapshots_nullBase_handlesTableCreation() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
-    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
+    TableMetadata newMetadata = createMetadataWithSnapshotsAndMainRef(baseMetadata, snapshots);
 
-    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
     TableMetadata result = snapshotDiffApplier.applySnapshots(null, newMetadata);
 
     assertNotNull(result);
     assertEquals(snapshots.size(), result.snapshots().size());
   }
 
+  // ========== Basic Functionality Tests ==========
+
+  /** Verifies that new snapshots are added correctly. */
   @Test
   void testApplySnapshots_addNewSnapshots_success() throws IOException {
     List<Snapshot> initialSnapshots = IcebergTestUtil.getSnapshots();
@@ -90,109 +149,73 @@ void testApplySnapshots_addNewSnapshots_success() throws IOException {
 
     List<Snapshot> allSnapshots = new ArrayList<>(initialSnapshots);
     allSnapshots.addAll(IcebergTestUtil.getExtraSnapshots());
+    TableMetadata newMetadata =
+        createMetadataWithSnapshotsAndMainRef(baseWithSnapshots, allSnapshots);
 
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
-                allSnapshots.get(allSnapshots.size() - 1))));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
     TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
 
     assertNotNull(result);
     assertTrue(result.snapshots().size() > baseWithSnapshots.snapshots().size());
-
     verify(mockMetricsReporter, atLeastOnce()).count(anyString(), anyDouble());
   }
 
+  /** Verifies that deleting snapshots works correctly and updates main branch. */
   @Test
-  void testValidateCurrentSnapshotNotDeleted_whenCurrentDeleted_throwsException()
-      throws IOException {
+  void testApplySnapshots_deleteSnapshots_success() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY,
-        SnapshotsUtil.serializedSnapshots(Collections.emptyList()));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(new HashMap<>()));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    List<Snapshot> remainingSnapshots = snapshots.subList(1, snapshots.size());
+    TableMetadata newMetadata =
+        createMetadataWithSnapshotsAndMainRef(baseWithSnapshots, remainingSnapshots);
 
-    InvalidIcebergSnapshotException exception =
-        assertThrows(
-            InvalidIcebergSnapshotException.class,
-            () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
 
-    assertTrue(exception.getMessage().contains("Cannot delete the current snapshot"));
+    assertNotNull(result);
+    assertEquals(remainingSnapshots.size(), result.snapshots().size());
   }
 
+  /** Verifies that updating branch references works correctly. */
   @Test
-  void testValidateNoAmbiguousCommits_whenSnapshotReferencedByMultipleBranches_throwsException()
-      throws IOException {
+  void testApplySnapshots_branchUpdates_success() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
-    Snapshot targetSnapshot = snapshots.get(0);
-
-    Map<String, String> snapshotRefs = new HashMap<>();
-    SnapshotRef ref = SnapshotRef.branchBuilder(targetSnapshot.snapshotId()).build();
-    snapshotRefs.put("branch1", org.apache.iceberg.SnapshotRefParser.toJson(ref));
-    snapshotRefs.put("branch2", org.apache.iceberg.SnapshotRefParser.toJson(ref));
-
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    Snapshot newBranchTarget = snapshots.get(1);
+    Map<String, String> refs = IcebergTestUtil.obtainSnapshotRefsFromSnapshot(newBranchTarget);
+    TableMetadata newMetadata = createMetadataWithSnapshots(baseWithSnapshots, snapshots, refs);
 
-    InvalidIcebergSnapshotException exception =
-        assertThrows(
-            InvalidIcebergSnapshotException.class,
-            () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
 
-    assertTrue(exception.getMessage().contains("Ambiguous commit"));
-    assertTrue(exception.getMessage().contains("referenced by multiple branches"));
+    assertNotNull(result);
+    assertNotNull(result.currentSnapshot());
+    assertEquals(newBranchTarget.snapshotId(), result.currentSnapshot().snapshotId());
   }
 
+  // ========== Validation Tests ==========
+
+  /** Verifies that deleting the current snapshot without replacements throws an exception. */
   @Test
-  void
-      testValidateDeletedSnapshotsNotReferenced_whenDeletedSnapshotStillReferenced_throwsException()
-          throws IOException {
+  void testValidation_deletingCurrentSnapshotWithoutReplacement_throwsException()
+      throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
-    Snapshot snapshotToDelete = snapshots.get(0);
-    List<Snapshot> remainingSnapshots = snapshots.subList(1, snapshots.size());
-
-    Map<String, String> snapshotRefs = new HashMap<>();
-    SnapshotRef ref = SnapshotRef.branchBuilder(snapshotToDelete.snapshotId()).build();
-    snapshotRefs.put(SnapshotRef.MAIN_BRANCH, org.apache.iceberg.SnapshotRefParser.toJson(ref));
-
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
+    TableMetadata newMetadata =
+        createMetadataWithSnapshots(baseWithSnapshots, Collections.emptyList(), new HashMap<>());
 
     InvalidIcebergSnapshotException exception =
         assertThrows(
             InvalidIcebergSnapshotException.class,
             () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
 
-    assertTrue(exception.getMessage().contains("Cannot delete snapshots"));
-    assertTrue(exception.getMessage().contains("still referenced"));
+    assertTrue(exception.getMessage().contains("Cannot delete the current snapshot"));
   }
+  // ========== Metrics Tests ==========
 
+  /** Verifies that WAP (staged) snapshots trigger the correct metrics. */
   @Test
-  void testApplySnapshots_withWapSnapshots_recordsCorrectMetrics() throws IOException {
+  void testMetrics_wapSnapshots_recordsStagedCounter() throws IOException {
     List<Snapshot> baseSnapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, baseSnapshots);
 
@@ -200,160 +223,78 @@ void testApplySnapshots_withWapSnapshots_recordsCorrectMetrics() throws IOExcept
     List<Snapshot> allSnapshots = new ArrayList<>(baseSnapshots);
     allSnapshots.addAll(wapSnapshots);
 
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
-                baseSnapshots.get(baseSnapshots.size() - 1))));
+    Map<String, String> refs =
+        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(baseSnapshots.get(baseSnapshots.size() - 1));
+    TableMetadata newMetadata = createMetadataWithSnapshots(baseWithSnapshots, allSnapshots, refs);
 
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
     TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
 
     assertNotNull(result);
-
     verify(mockMetricsReporter)
         .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR), anyDouble());
   }
 
+  /** Verifies that deleting snapshots triggers the correct metrics. */
   @Test
-  void testApplySnapshots_deleteSnapshots_recordsCorrectMetrics() throws IOException {
+  void testMetrics_deleteSnapshots_recordsDeletedCounter() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
     List<Snapshot> remainingSnapshots = snapshots.subList(1, snapshots.size());
+    TableMetadata newMetadata =
+        createMetadataWithSnapshotsAndMainRef(baseWithSnapshots, remainingSnapshots);
 
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(remainingSnapshots));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
-                remainingSnapshots.get(remainingSnapshots.size() - 1))));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
     TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
 
     assertNotNull(result);
     assertEquals(remainingSnapshots.size(), result.snapshots().size());
-
     verify(mockMetricsReporter)
         .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR), eq(1.0));
   }
 
+  // ========== Property Management Tests ==========
+
+  /** Verifies that appended snapshot IDs are recorded in properties. */
   @Test
-  void testApplySnapshots_recordsSnapshotIdsInProperties() throws IOException {
+  void testProperties_appendedSnapshots_recordedCorrectly() throws IOException {
     List<Snapshot> baseSnapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, baseSnapshots);
 
     List<Snapshot> newSnapshotsList = IcebergTestUtil.getExtraSnapshots();
     List<Snapshot> allSnapshots = new ArrayList<>(baseSnapshots);
     allSnapshots.addAll(newSnapshotsList);
+    TableMetadata newMetadata =
+        createMetadataWithSnapshotsAndMainRef(baseWithSnapshots, allSnapshots);
 
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
-                allSnapshots.get(allSnapshots.size() - 1))));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
     TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
 
     assertNotNull(result);
-
     String appendedSnapshots =
         result.properties().get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS));
     assertNotNull(appendedSnapshots, "Appended snapshots should be recorded in properties");
 
-    assertTrue(appendedSnapshots.contains(",") || !appendedSnapshots.isEmpty());
+    // Verify actual snapshot IDs are present
+    for (Snapshot newSnapshot : newSnapshotsList) {
+      assertTrue(
+          appendedSnapshots.contains(String.valueOf(newSnapshot.snapshotId())),
+          "Snapshot ID " + newSnapshot.snapshotId() + " should be in appended_snapshots");
+    }
   }
 
+  /** Verifies that temporary snapshot processing keys are removed from final properties. */
   @Test
-  void testApplySnapshots_removesSnapshotKeysFromProperties() throws IOException {
+  void testProperties_tempKeysRemoved_success() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata newMetadata = createMetadataWithSnapshotsAndMainRef(baseMetadata, snapshots);
 
-    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
-    properties.put(
-        CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
-
-    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
     TableMetadata result = snapshotDiffApplier.applySnapshots(null, newMetadata);
 
     assertNotNull(result);
-
     assertFalse(
         result.properties().containsKey(CatalogConstants.SNAPSHOTS_JSON_KEY),
-        "Snapshots JSON key should be removed from final properties");
+        "Temp snapshots JSON key should be removed");
     assertFalse(
         result.properties().containsKey(CatalogConstants.SNAPSHOTS_REFS_KEY),
-        "Snapshots refs key should be removed from final properties");
-  }
-
-  @Test
-  void testApplySnapshots_branchUpdates_appliesCorrectly() throws IOException {
-    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
-    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
-
-    Snapshot newBranchTarget = snapshots.get(1);
-    Map<String, String> snapshotRefs =
-        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(newBranchTarget);
-
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
-    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
-
-    assertNotNull(result);
-    assertNotNull(result.currentSnapshot());
-    assertEquals(newBranchTarget.snapshotId(), result.currentSnapshot().snapshotId());
-  }
-
-  @Test
-  void testApplySnapshots_multipleBranchUpdates_success() throws IOException {
-    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
-    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
-
-    Map<String, String> snapshotRefs = new HashMap<>();
-    SnapshotRef mainRef = SnapshotRef.branchBuilder(snapshots.get(0).snapshotId()).build();
-    SnapshotRef devRef = SnapshotRef.branchBuilder(snapshots.get(1).snapshotId()).build();
-    snapshotRefs.put(SnapshotRef.MAIN_BRANCH, org.apache.iceberg.SnapshotRefParser.toJson(mainRef));
-    snapshotRefs.put("dev", org.apache.iceberg.SnapshotRefParser.toJson(devRef));
-
-    Map<String, String> properties = new HashMap<>(baseWithSnapshots.properties());
-    properties.put(
-        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
-    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(snapshotRefs));
-
-    TableMetadata newMetadata = baseWithSnapshots.replaceProperties(properties);
-    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
-
-    assertNotNull(result);
-    assertEquals(2, result.refs().size());
-  }
-
-  private TableMetadata addSnapshotsToMetadata(TableMetadata metadata, List<Snapshot> snapshots) {
-    TableMetadata.Builder builder = TableMetadata.buildFrom(metadata);
-    for (Snapshot snapshot : snapshots) {
-      builder.addSnapshot(snapshot);
-    }
-    if (!snapshots.isEmpty()) {
-      Snapshot lastSnapshot = snapshots.get(snapshots.size() - 1);
-      SnapshotRef ref = SnapshotRef.branchBuilder(lastSnapshot.snapshotId()).build();
-      builder.setRef(SnapshotRef.MAIN_BRANCH, ref);
-    }
-    return builder.build();
+        "Temp snapshots refs key should be removed");
   }
 }

From 3d1a758b0f87dcba7f0ca8ffbbf03b87559c4fa1 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 4 Nov 2025 12:20:10 -0800
Subject: [PATCH 27/35] small refactor

---
 .../internal/catalog/SnapshotDiffApplier.java | 109 +++++++++---------
 1 file changed, 56 insertions(+), 53 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index e9f204f43..33db2fabb 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -88,10 +88,10 @@ private class SnapshotDiff {
     private final List<Snapshot> newSnapshots;
     private final List<Snapshot> deletedSnapshots;
 
-    // Categorized snapshots (computed during applyTo)
-    private List<String> appendedSnapshots;
-    private List<String> stagedSnapshots;
-    private List<String> cherryPickedSnapshots;
+    // Categorized snapshots
+    private final List<Snapshot> stagedSnapshots;
+    private final List<Snapshot> regularSnapshots;
+    private final List<Snapshot> cherryPickedSnapshots;
 
     SnapshotDiff(
         List<Snapshot> providedSnapshots,
@@ -118,6 +118,23 @@ private class SnapshotDiff {
           existingSnapshots.stream()
               .filter(s -> !providedSnapshotByIds.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
+
+      // Categorize snapshots (simple logic for PR1 - just check summary properties)
+      this.stagedSnapshots =
+          newSnapshots.stream()
+              .filter(s -> s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
+              .collect(Collectors.toList());
+      this.cherryPickedSnapshots =
+          newSnapshots.stream()
+              .filter(s -> s.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP))
+              .collect(Collectors.toList());
+      this.regularSnapshots =
+          newSnapshots.stream()
+              .filter(
+                  s ->
+                      !s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)
+                          && !s.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP))
+              .collect(Collectors.toList());
     }
 
     /**
@@ -158,9 +175,6 @@ private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
 
     TableMetadata.Builder applyTo(TableMetadata metadata) {
       TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
-      this.appendedSnapshots = new ArrayList<>();
-      this.stagedSnapshots = new ArrayList<>();
-      this.cherryPickedSnapshots = new ArrayList<>();
 
       // Validate only MAIN branch
       for (Map.Entry<String, SnapshotRef> entry : providedRefs.entrySet()) {
@@ -170,53 +184,32 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
       }
 
       /**
-       * First check if there are new snapshots to be appended to current TableMetadata. If yes,
-       * following are the cases to be handled:
-       *
-       * <p>[1] A regular (non-wap) snapshot is being added to the MAIN branch.
+       * Apply categorized snapshots to metadata:
        *
-       * <p>[2] A staged (wap) snapshot is being created on top of current snapshot as its base.
-       * Recognized by STAGED_WAP_ID_PROP.
+       * <p>[1] Staged (WAP) snapshots - added without branch reference
        *
-       * <p>[3] A staged (wap) snapshot is being cherry picked to the MAIN branch wherein current
-       * snapshot in the MAIN branch is not the same as the base snapshot the staged (wap) snapshot
-       * was created on. Recognized by SOURCE_SNAPSHOT_ID_PROP. This case is called non-fast forward
-       * cherry pick.
+       * <p>[2] Cherry-picked snapshots - set as main branch snapshot
        *
-       * <p>In case no new snapshots are to be appended to current TableMetadata, there could be a
-       * cherrypick of a staged (wap) snapshot on top of the current snapshot in the MAIN branch
-       * which is the same as the base snapshot the staged (wap) snapshot was created on. This case
-       * is called fast forward cherry pick.
+       * <p>[3] Regular snapshots - set as main branch snapshot
        */
-      if (CollectionUtils.isNotEmpty(newSnapshots)) {
-        for (Snapshot snapshot : newSnapshots) {
-          if (snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)) {
-            // a stage only snapshot using wap.id
-            metadataBuilder.addSnapshot(snapshot);
-            stagedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-          } else if (snapshot.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
-            // a snapshot created on a non fast-forward cherry-pick snapshot
-            metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
-            appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-            cherryPickedSnapshots.add(
-                String.valueOf(snapshot.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)));
-          } else {
-            // a regular snapshot
-            metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
-            appendedSnapshots.add(String.valueOf(snapshot.snapshotId()));
-          }
-        }
-      } else if (MapUtils.isNotEmpty(providedRefs)) {
-        // Updated ref in the main branch with no new snapshot means this is a
-        // fast-forward cherry-pick or rollback operation.
+      for (Snapshot snapshot : stagedSnapshots) {
+        metadataBuilder.addSnapshot(snapshot);
+      }
+
+      for (Snapshot snapshot : cherryPickedSnapshots) {
+        metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+      }
+
+      for (Snapshot snapshot : regularSnapshots) {
+        metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+      }
+
+      // Handle fast-forward cherry-pick (ref update without new snapshot)
+      if (newSnapshots.isEmpty() && MapUtils.isNotEmpty(providedRefs)) {
         long newSnapshotId = providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
-        // Either the current snapshot is null or the current snapshot is not equal
-        // to the new snapshot indicates an update. The first case happens when the
-        // stage/wap snapshot being cherry-picked is the first snapshot.
         if (MapUtils.isEmpty(metadata.refs())
             || metadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId() != newSnapshotId) {
           metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
-          cherryPickedSnapshots.add(String.valueOf(newSnapshotId));
         }
       }
 
@@ -231,9 +224,11 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
     }
 
     void recordMetrics(TableMetadata.Builder builder) {
-      if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
-        metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedSnapshots.size());
+      // Compute appended snapshots (regular + cherry-picked)
+      int appendedCount = regularSnapshots.size() + cherryPickedSnapshots.size();
+
+      if (appendedCount > 0) {
+        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedCount);
       }
       if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
         metricsReporter.count(
@@ -250,23 +245,31 @@ void recordMetrics(TableMetadata.Builder builder) {
       }
 
       // Record snapshot IDs in properties
-      if (CollectionUtils.isNotEmpty(appendedSnapshots)) {
+      if (appendedCount > 0) {
+        List<Snapshot> appendedSnapshots = new ArrayList<>(regularSnapshots);
+        appendedSnapshots.addAll(cherryPickedSnapshots);
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-                String.join(",", appendedSnapshots)));
+                appendedSnapshots.stream()
+                    .map(s -> Long.toString(s.snapshotId()))
+                    .collect(Collectors.joining(","))));
       }
       if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-                String.join(",", stagedSnapshots)));
+                stagedSnapshots.stream()
+                    .map(s -> Long.toString(s.snapshotId()))
+                    .collect(Collectors.joining(","))));
       }
       if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-                String.join(",", cherryPickedSnapshots)));
+                cherryPickedSnapshots.stream()
+                    .map(s -> s.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP))
+                    .collect(Collectors.joining(","))));
       }
       if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
         builder.setProperties(

From 65666d23f19f4ab10894649db284380bd4004e92 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 4 Nov 2025 16:08:37 -0800
Subject: [PATCH 28/35] updating containers

---
 .../internal/catalog/SnapshotDiffApplier.java | 76 ++++++++++++++-----
 1 file changed, 56 insertions(+), 20 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index 33db2fabb..ece059b27 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -16,8 +16,6 @@
 import java.util.stream.Collectors;
 import lombok.AllArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.commons.collections.MapUtils;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.SnapshotRef;
 import org.apache.iceberg.SnapshotSummary;
@@ -58,10 +56,12 @@ public TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata)
             .orElse(new HashMap<>());
 
     List<Snapshot> existingSnapshots = base != null ? base.snapshots() : Collections.emptyList();
+    Map<String, SnapshotRef> existingRefs = base != null ? base.refs() : Collections.emptyMap();
 
     // Compute diff (all maps created once in constructor)
     SnapshotDiff diff =
-        new SnapshotDiff(providedSnapshots, existingSnapshots, metadata, providedRefs);
+        new SnapshotDiff(
+            providedSnapshots, existingSnapshots, metadata, providedRefs, existingRefs);
 
     // Validate, apply, record metrics, build
     diff.validate(base);
@@ -81,10 +81,13 @@ private class SnapshotDiff {
     private final List<Snapshot> existingSnapshots;
     private final TableMetadata metadata;
     private final Map<String, SnapshotRef> providedRefs;
+    private final Map<String, SnapshotRef> existingRefs;
 
     // Computed maps (created once)
     private final Map<Long, Snapshot> providedSnapshotByIds;
     private final Map<Long, Snapshot> existingSnapshotByIds;
+    private final Set<Long> existingBranchRefIds;
+    private final Set<Long> providedBranchRefIds;
     private final List<Snapshot> newSnapshots;
     private final List<Snapshot> deletedSnapshots;
 
@@ -97,17 +100,27 @@ private class SnapshotDiff {
         List<Snapshot> providedSnapshots,
         List<Snapshot> existingSnapshots,
         TableMetadata metadata,
-        Map<String, SnapshotRef> providedRefs) {
+        Map<String, SnapshotRef> providedRefs,
+        Map<String, SnapshotRef> existingRefs) {
       this.providedSnapshots = providedSnapshots;
       this.existingSnapshots = existingSnapshots;
       this.metadata = metadata;
       this.providedRefs = providedRefs;
+      this.existingRefs = existingRefs;
 
       // Compute all maps once
       this.providedSnapshotByIds =
           providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
       this.existingSnapshotByIds =
           existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
+      this.existingBranchRefIds =
+          existingRefs.values().stream()
+              .map(SnapshotRef::snapshotId)
+              .collect(Collectors.toSet());
+      this.providedBranchRefIds =
+          providedRefs.values().stream()
+              .map(SnapshotRef::snapshotId)
+              .collect(Collectors.toSet());
 
       // Compute changes
       this.newSnapshots =
@@ -125,8 +138,24 @@ private class SnapshotDiff {
               .filter(s -> s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
       this.cherryPickedSnapshots =
-          newSnapshots.stream()
-              .filter(s -> s.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP))
+          providedSnapshots.stream()
+              .filter(
+                  s -> {
+                    // New snapshot with SOURCE_SNAPSHOT_ID_PROP (actual cherry-pick)
+                    if (!existingSnapshotByIds.containsKey(s.snapshotId())
+                        && s.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
+                      return true;
+                    }
+                    // WAP snapshot being published (staged → branch transition)
+                    // For new snapshots: WAP created and immediately published
+                    // For existing snapshots: existing WAP being published (fast-forward)
+                    boolean hasWapId =
+                        s.summary() != null
+                            && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
+                    boolean wasStaged = !existingBranchRefIds.contains(s.snapshotId());
+                    boolean isNowOnBranch = providedBranchRefIds.contains(s.snapshotId());
+                    return hasWapId && wasStaged && isNowOnBranch;
+                  })
               .collect(Collectors.toList());
       this.regularSnapshots =
           newSnapshots.stream()
@@ -196,8 +225,12 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
         metadataBuilder.addSnapshot(snapshot);
       }
 
+      // Only apply NEW cherry-picked snapshots
+      // Existing cherry-picked snapshots are handled by fast-forward block below
       for (Snapshot snapshot : cherryPickedSnapshots) {
-        metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+        if (newSnapshots.contains(snapshot)) {
+          metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+        }
       }
 
       for (Snapshot snapshot : regularSnapshots) {
@@ -205,16 +238,16 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
       }
 
       // Handle fast-forward cherry-pick (ref update without new snapshot)
-      if (newSnapshots.isEmpty() && MapUtils.isNotEmpty(providedRefs)) {
+      if (newSnapshots.isEmpty() && !providedRefs.isEmpty()) {
         long newSnapshotId = providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
-        if (MapUtils.isEmpty(metadata.refs())
+        if (metadata.refs().isEmpty()
             || metadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId() != newSnapshotId) {
           metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
         }
       }
 
       // Delete snapshots
-      if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
+      if (!deletedSnapshots.isEmpty()) {
         Set<Long> snapshotIds =
             deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
         metadataBuilder.removeSnapshots(snapshotIds);
@@ -224,22 +257,25 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
     }
 
     void recordMetrics(TableMetadata.Builder builder) {
-      // Compute appended snapshots (regular + cherry-picked)
-      int appendedCount = regularSnapshots.size() + cherryPickedSnapshots.size();
+      // Compute appended snapshots (regular + NEW cherry-picked only)
+      // Existing cherry-picked snapshots (fast-forward) are not appended
+      List<Snapshot> newCherryPicked =
+          cherryPickedSnapshots.stream().filter(newSnapshots::contains).collect(Collectors.toList());
+      int appendedCount = regularSnapshots.size() + newCherryPicked.size();
 
       if (appendedCount > 0) {
         metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedCount);
       }
-      if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
+      if (!stagedSnapshots.isEmpty()) {
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
       }
-      if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
+      if (!cherryPickedSnapshots.isEmpty()) {
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
             cherryPickedSnapshots.size());
       }
-      if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
+      if (!deletedSnapshots.isEmpty()) {
         metricsReporter.count(
             InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, deletedSnapshots.size());
       }
@@ -247,7 +283,7 @@ void recordMetrics(TableMetadata.Builder builder) {
       // Record snapshot IDs in properties
       if (appendedCount > 0) {
         List<Snapshot> appendedSnapshots = new ArrayList<>(regularSnapshots);
-        appendedSnapshots.addAll(cherryPickedSnapshots);
+        appendedSnapshots.addAll(newCherryPicked);
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
@@ -255,7 +291,7 @@ void recordMetrics(TableMetadata.Builder builder) {
                     .map(s -> Long.toString(s.snapshotId()))
                     .collect(Collectors.joining(","))));
       }
-      if (CollectionUtils.isNotEmpty(stagedSnapshots)) {
+      if (!stagedSnapshots.isEmpty()) {
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
@@ -263,15 +299,15 @@ void recordMetrics(TableMetadata.Builder builder) {
                     .map(s -> Long.toString(s.snapshotId()))
                     .collect(Collectors.joining(","))));
       }
-      if (CollectionUtils.isNotEmpty(cherryPickedSnapshots)) {
+      if (!cherryPickedSnapshots.isEmpty()) {
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
                 cherryPickedSnapshots.stream()
-                    .map(s -> s.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP))
+                    .map(s -> Long.toString(s.snapshotId()))
                     .collect(Collectors.joining(","))));
       }
-      if (CollectionUtils.isNotEmpty(deletedSnapshots)) {
+      if (!deletedSnapshots.isEmpty()) {
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),

From 062f38628ff8497774f6fa127c193f5dfb72d7f7 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 4 Nov 2025 16:09:25 -0800
Subject: [PATCH 29/35] updating containers

---
 .../internal/catalog/SnapshotDiffApplier.java        | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index ece059b27..bfd3438d9 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -114,13 +114,9 @@ private class SnapshotDiff {
       this.existingSnapshotByIds =
           existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
       this.existingBranchRefIds =
-          existingRefs.values().stream()
-              .map(SnapshotRef::snapshotId)
-              .collect(Collectors.toSet());
+          existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
       this.providedBranchRefIds =
-          providedRefs.values().stream()
-              .map(SnapshotRef::snapshotId)
-              .collect(Collectors.toSet());
+          providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
 
       // Compute changes
       this.newSnapshots =
@@ -260,7 +256,9 @@ void recordMetrics(TableMetadata.Builder builder) {
       // Compute appended snapshots (regular + NEW cherry-picked only)
       // Existing cherry-picked snapshots (fast-forward) are not appended
       List<Snapshot> newCherryPicked =
-          cherryPickedSnapshots.stream().filter(newSnapshots::contains).collect(Collectors.toList());
+          cherryPickedSnapshots.stream()
+              .filter(newSnapshots::contains)
+              .collect(Collectors.toList());
       int appendedCount = regularSnapshots.size() + newCherryPicked.size();
 
       if (appendedCount > 0) {

From 85d8696b9054b066744581b60aa4aeb391e030bc Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Tue, 4 Nov 2025 22:17:30 -0800
Subject: [PATCH 30/35] fixing tests

---
 .../internal/catalog/SnapshotDiffApplier.java | 67 ++++++++++---------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index bfd3438d9..de5112442 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -4,7 +4,6 @@
 
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -133,32 +132,47 @@ private class SnapshotDiff {
           newSnapshots.stream()
               .filter(s -> s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
+
+      // Compute source IDs for cherry-pick operations (from ForReference.java)
+      Set<Long> cherryPickSourceIds =
+          providedSnapshots.stream()
+              .filter(
+                  s ->
+                      s.summary() != null
+                          && s.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP))
+              .map(s -> Long.parseLong(s.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)))
+              .collect(Collectors.toSet());
+
       this.cherryPickedSnapshots =
           providedSnapshots.stream()
               .filter(
-                  s -> {
-                    // New snapshot with SOURCE_SNAPSHOT_ID_PROP (actual cherry-pick)
-                    if (!existingSnapshotByIds.containsKey(s.snapshotId())
-                        && s.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)) {
+                  provided -> {
+                    // Only consider EXISTING snapshots as cherry-picked (from ForReference.java)
+                    Snapshot existing = existingSnapshotByIds.get(provided.snapshotId());
+                    if (existing == null) {
+                      return false;
+                    }
+
+                    // Is source of cherry-pick (from ForReference.java)
+                    if (cherryPickSourceIds.contains(provided.snapshotId())) {
                       return true;
                     }
+
                     // WAP snapshot being published (staged → branch transition)
-                    // For new snapshots: WAP created and immediately published
-                    // For existing snapshots: existing WAP being published (fast-forward)
                     boolean hasWapId =
-                        s.summary() != null
-                            && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
-                    boolean wasStaged = !existingBranchRefIds.contains(s.snapshotId());
-                    boolean isNowOnBranch = providedBranchRefIds.contains(s.snapshotId());
+                        provided.summary() != null
+                            && provided.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
+                    boolean wasStaged = !existingBranchRefIds.contains(provided.snapshotId());
+                    boolean isNowOnBranch = providedBranchRefIds.contains(provided.snapshotId());
                     return hasWapId && wasStaged && isNowOnBranch;
                   })
               .collect(Collectors.toList());
+      // Regular snapshots = all new snapshots that are not staged WAP
+      // (From ForReference.java: everything that's not cherry-picked and not WAP)
+      // Note: NEW snapshots with SOURCE_SNAPSHOT_ID_PROP are regular (new commits being appended)
       this.regularSnapshots =
           newSnapshots.stream()
-              .filter(
-                  s ->
-                      !s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP)
-                          && !s.summary().containsKey(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP))
+              .filter(s -> !s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
     }
 
@@ -221,13 +235,8 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
         metadataBuilder.addSnapshot(snapshot);
       }
 
-      // Only apply NEW cherry-picked snapshots
-      // Existing cherry-picked snapshots are handled by fast-forward block below
-      for (Snapshot snapshot : cherryPickedSnapshots) {
-        if (newSnapshots.contains(snapshot)) {
-          metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
-        }
-      }
+      // Cherry-picked snapshots are all existing, handled by fast-forward block below
+      // (No need to apply them here)
 
       for (Snapshot snapshot : regularSnapshots) {
         metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
@@ -253,13 +262,9 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
     }
 
     void recordMetrics(TableMetadata.Builder builder) {
-      // Compute appended snapshots (regular + NEW cherry-picked only)
-      // Existing cherry-picked snapshots (fast-forward) are not appended
-      List<Snapshot> newCherryPicked =
-          cherryPickedSnapshots.stream()
-              .filter(newSnapshots::contains)
-              .collect(Collectors.toList());
-      int appendedCount = regularSnapshots.size() + newCherryPicked.size();
+      // Compute appended snapshots (only regular snapshots)
+      // Cherry-picked snapshots are all existing, not appended
+      int appendedCount = regularSnapshots.size();
 
       if (appendedCount > 0) {
         metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedCount);
@@ -280,12 +285,10 @@ void recordMetrics(TableMetadata.Builder builder) {
 
       // Record snapshot IDs in properties
       if (appendedCount > 0) {
-        List<Snapshot> appendedSnapshots = new ArrayList<>(regularSnapshots);
-        appendedSnapshots.addAll(newCherryPicked);
         builder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-                appendedSnapshots.stream()
+                regularSnapshots.stream()
                     .map(s -> Long.toString(s.snapshotId()))
                     .collect(Collectors.joining(","))));
       }

From d64f57b00f2a9c33d3fa7c84218029cf2fd2723b Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Wed, 5 Nov 2025 15:11:03 -0800
Subject: [PATCH 31/35] cleaning up practices

---
 .../internal/catalog/SnapshotDiffApplier.java | 257 +++++++++++-------
 1 file changed, 166 insertions(+), 91 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index de5112442..16c26d1d1 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -23,9 +23,8 @@
 /**
  * Service responsible for applying snapshot changes to Iceberg table metadata.
  *
- * <p>This class extracts snapshot logic from OpenHouseInternalTableOperations while maintaining the
- * same behavior. The main entry point applySnapshots() has a clear flow: parse input → compute diff
- * → validate → apply.
+ * <p>The main entry point applySnapshots() has a clear flow: parse input → compute diff → validate
+ * → apply.
  */
 @AllArgsConstructor
 @Slf4j
@@ -37,36 +36,44 @@ public class SnapshotDiffApplier {
    * Applies snapshot updates from metadata properties. Simple and clear: parse input, compute diff,
    * validate, apply, record metrics, build.
    *
-   * @param base The base table metadata (may be null for table creation)
-   * @param metadata The new metadata with properties containing snapshot updates
+   * @param existingMetadata The existing table metadata (may be null for table creation)
+   * @param providedMetadata The new metadata with properties containing snapshot updates
    * @return Updated metadata with snapshots applied
    */
-  public TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata) {
-    String snapshotsJson = metadata.properties().get(CatalogConstants.SNAPSHOTS_JSON_KEY);
+  public TableMetadata applySnapshots(
+      TableMetadata existingMetadata, TableMetadata providedMetadata) {
+    String snapshotsJson = providedMetadata.properties().get(CatalogConstants.SNAPSHOTS_JSON_KEY);
     if (snapshotsJson == null) {
-      return metadata;
+      return providedMetadata;
     }
 
     // Parse input
     List<Snapshot> providedSnapshots = SnapshotsUtil.parseSnapshots(null, snapshotsJson);
     Map<String, SnapshotRef> providedRefs =
-        Optional.ofNullable(metadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
+        Optional.ofNullable(providedMetadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
             .map(SnapshotsUtil::parseSnapshotRefs)
             .orElse(new HashMap<>());
 
-    List<Snapshot> existingSnapshots = base != null ? base.snapshots() : Collections.emptyList();
-    Map<String, SnapshotRef> existingRefs = base != null ? base.refs() : Collections.emptyMap();
+    List<Snapshot> existingSnapshots =
+        existingMetadata != null ? existingMetadata.snapshots() : Collections.emptyList();
+    Map<String, SnapshotRef> existingRefs =
+        existingMetadata != null ? existingMetadata.refs() : Collections.emptyMap();
 
-    // Compute diff (all maps created once in constructor)
+    // Compute diff (all maps created once in factory method)
     SnapshotDiff diff =
-        new SnapshotDiff(
-            providedSnapshots, existingSnapshots, metadata, providedRefs, existingRefs);
-
-    // Validate, apply, record metrics, build
-    diff.validate(base);
-    TableMetadata.Builder builder = diff.applyTo(metadata);
-    diff.recordMetrics(builder);
-    return builder.build();
+        SnapshotDiff.create(
+            metricsReporter,
+            existingMetadata,
+            providedSnapshots,
+            existingSnapshots,
+            providedMetadata,
+            providedRefs,
+            existingRefs);
+
+    // Validate, apply, record metrics
+    diff.validate();
+    diff.recordMetrics();
+    return diff.applyTo();
   }
 
   /**
@@ -74,11 +81,15 @@ public TableMetadata applySnapshots(TableMetadata base, TableMetadata metadata)
    * constructor to avoid redundant operations. Provides clear methods for validation and
    * application.
    */
-  private class SnapshotDiff {
+  private static class SnapshotDiff {
+    // Injected dependency
+    private final MetricsReporter metricsReporter;
+
     // Input state
+    private final TableMetadata existingMetadata;
     private final List<Snapshot> providedSnapshots;
     private final List<Snapshot> existingSnapshots;
-    private final TableMetadata metadata;
+    private final TableMetadata providedMetadata;
     private final Map<String, SnapshotRef> providedRefs;
     private final Map<String, SnapshotRef> existingRefs;
 
@@ -94,46 +105,56 @@ private class SnapshotDiff {
     private final List<Snapshot> stagedSnapshots;
     private final List<Snapshot> regularSnapshots;
     private final List<Snapshot> cherryPickedSnapshots;
+    private final int appendedCount;
 
-    SnapshotDiff(
+    /**
+     * Creates a SnapshotDiff by computing all snapshot analysis from the provided inputs.
+     *
+     * @param metricsReporter Metrics reporter for recording snapshot operations
+     * @param existingMetadata The existing table metadata (may be null for table creation)
+     * @param providedSnapshots Snapshots provided in the update
+     * @param existingSnapshots Snapshots currently in the table
+     * @param providedMetadata The new metadata with properties containing snapshot updates
+     * @param providedRefs Snapshot refs provided in the update
+     * @param existingRefs Snapshot refs currently in the table
+     * @return A new SnapshotDiff with all analysis computed
+     */
+    static SnapshotDiff create(
+        MetricsReporter metricsReporter,
+        TableMetadata existingMetadata,
         List<Snapshot> providedSnapshots,
         List<Snapshot> existingSnapshots,
-        TableMetadata metadata,
+        TableMetadata providedMetadata,
         Map<String, SnapshotRef> providedRefs,
         Map<String, SnapshotRef> existingRefs) {
-      this.providedSnapshots = providedSnapshots;
-      this.existingSnapshots = existingSnapshots;
-      this.metadata = metadata;
-      this.providedRefs = providedRefs;
-      this.existingRefs = existingRefs;
 
-      // Compute all maps once
-      this.providedSnapshotByIds =
+      // Compute all index maps once
+      Map<Long, Snapshot> providedSnapshotByIds =
           providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
-      this.existingSnapshotByIds =
+      Map<Long, Snapshot> existingSnapshotByIds =
           existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
-      this.existingBranchRefIds =
+      Set<Long> existingBranchRefIds =
           existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
-      this.providedBranchRefIds =
+      Set<Long> providedBranchRefIds =
           providedRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
 
       // Compute changes
-      this.newSnapshots =
+      List<Snapshot> newSnapshots =
           providedSnapshots.stream()
               .filter(s -> !existingSnapshotByIds.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
-      this.deletedSnapshots =
+      List<Snapshot> deletedSnapshots =
           existingSnapshots.stream()
               .filter(s -> !providedSnapshotByIds.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
 
-      // Categorize snapshots (simple logic for PR1 - just check summary properties)
-      this.stagedSnapshots =
+      // Categorize snapshots
+      List<Snapshot> stagedSnapshots =
           newSnapshots.stream()
               .filter(s -> s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
 
-      // Compute source IDs for cherry-pick operations (from ForReference.java)
+      // Compute source IDs for cherry-pick operations
       Set<Long> cherryPickSourceIds =
           providedSnapshots.stream()
               .filter(
@@ -143,17 +164,17 @@ private class SnapshotDiff {
               .map(s -> Long.parseLong(s.summary().get(SnapshotSummary.SOURCE_SNAPSHOT_ID_PROP)))
               .collect(Collectors.toSet());
 
-      this.cherryPickedSnapshots =
+      List<Snapshot> cherryPickedSnapshots =
           providedSnapshots.stream()
               .filter(
                   provided -> {
-                    // Only consider EXISTING snapshots as cherry-picked (from ForReference.java)
+                    // Only consider EXISTING snapshots as cherry-picked
                     Snapshot existing = existingSnapshotByIds.get(provided.snapshotId());
                     if (existing == null) {
                       return false;
                     }
 
-                    // Is source of cherry-pick (from ForReference.java)
+                    // Is source of cherry-pick
                     if (cherryPickSourceIds.contains(provided.snapshotId())) {
                       return true;
                     }
@@ -167,42 +188,97 @@ private class SnapshotDiff {
                     return hasWapId && wasStaged && isNowOnBranch;
                   })
               .collect(Collectors.toList());
+
       // Regular snapshots = all new snapshots that are not staged WAP
-      // (From ForReference.java: everything that's not cherry-picked and not WAP)
-      // Note: NEW snapshots with SOURCE_SNAPSHOT_ID_PROP are regular (new commits being appended)
-      this.regularSnapshots =
+      List<Snapshot> regularSnapshots =
           newSnapshots.stream()
               .filter(s -> !s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
+
+      // Compute appended count (only regular snapshots, not cherry-picked)
+      int appendedCount = regularSnapshots.size();
+
+      return new SnapshotDiff(
+          metricsReporter,
+          existingMetadata,
+          providedSnapshots,
+          existingSnapshots,
+          providedMetadata,
+          providedRefs,
+          existingRefs,
+          providedSnapshotByIds,
+          existingSnapshotByIds,
+          existingBranchRefIds,
+          providedBranchRefIds,
+          newSnapshots,
+          deletedSnapshots,
+          stagedSnapshots,
+          regularSnapshots,
+          cherryPickedSnapshots,
+          appendedCount);
+    }
+
+    /** Private constructor that accepts all pre-computed values. Use {@link #create} instead. */
+    private SnapshotDiff(
+        MetricsReporter metricsReporter,
+        TableMetadata existingMetadata,
+        List<Snapshot> providedSnapshots,
+        List<Snapshot> existingSnapshots,
+        TableMetadata providedMetadata,
+        Map<String, SnapshotRef> providedRefs,
+        Map<String, SnapshotRef> existingRefs,
+        Map<Long, Snapshot> providedSnapshotByIds,
+        Map<Long, Snapshot> existingSnapshotByIds,
+        Set<Long> existingBranchRefIds,
+        Set<Long> providedBranchRefIds,
+        List<Snapshot> newSnapshots,
+        List<Snapshot> deletedSnapshots,
+        List<Snapshot> stagedSnapshots,
+        List<Snapshot> regularSnapshots,
+        List<Snapshot> cherryPickedSnapshots,
+        int appendedCount) {
+      this.metricsReporter = metricsReporter;
+      this.existingMetadata = existingMetadata;
+      this.providedSnapshots = providedSnapshots;
+      this.existingSnapshots = existingSnapshots;
+      this.providedMetadata = providedMetadata;
+      this.providedRefs = providedRefs;
+      this.existingRefs = existingRefs;
+      this.providedSnapshotByIds = providedSnapshotByIds;
+      this.existingSnapshotByIds = existingSnapshotByIds;
+      this.existingBranchRefIds = existingBranchRefIds;
+      this.providedBranchRefIds = providedBranchRefIds;
+      this.newSnapshots = newSnapshots;
+      this.deletedSnapshots = deletedSnapshots;
+      this.stagedSnapshots = stagedSnapshots;
+      this.regularSnapshots = regularSnapshots;
+      this.cherryPickedSnapshots = cherryPickedSnapshots;
+      this.appendedCount = appendedCount;
     }
 
     /**
      * Validates all snapshot changes before applying them to table metadata.
      *
-     * @param base The base table metadata to validate against (may be null for table creation)
      * @throws InvalidIcebergSnapshotException if any validation check fails
      */
-    void validate(TableMetadata base) {
-      validateCurrentSnapshotNotDeleted(base);
+    void validate() {
+      validateCurrentSnapshotNotDeleted();
     }
 
     /**
      * Validates that the current snapshot is not deleted without providing replacement snapshots.
-     * This is the same validation logic from SnapshotInspector.validateSnapshotsUpdate().
      *
-     * @param base The base table metadata containing the current snapshot (may be null for table
-     *     creation)
      * @throws InvalidIcebergSnapshotException if the current snapshot is being deleted without
      *     replacements
      */
-    private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
-      if (base == null || base.currentSnapshot() == null) {
+    private void validateCurrentSnapshotNotDeleted() {
+      if (this.existingMetadata == null || this.existingMetadata.currentSnapshot() == null) {
         return;
       }
       if (!newSnapshots.isEmpty()) {
         return;
       }
-      long latestSnapshotId = base.currentSnapshot().snapshotId();
+      long latestSnapshotId = this.existingMetadata.currentSnapshot().snapshotId();
       if (!deletedSnapshots.isEmpty()
           && deletedSnapshots.get(deletedSnapshots.size() - 1).snapshotId() == latestSnapshotId) {
         throw new InvalidIcebergSnapshotException(
@@ -212,8 +288,8 @@ private void validateCurrentSnapshotNotDeleted(TableMetadata base) {
       }
     }
 
-    TableMetadata.Builder applyTo(TableMetadata metadata) {
-      TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(metadata);
+    TableMetadata applyTo() {
+      TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(this.providedMetadata);
 
       // Validate only MAIN branch
       for (Map.Entry<String, SnapshotRef> entry : providedRefs.entrySet()) {
@@ -245,8 +321,9 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
       // Handle fast-forward cherry-pick (ref update without new snapshot)
       if (newSnapshots.isEmpty() && !providedRefs.isEmpty()) {
         long newSnapshotId = providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
-        if (metadata.refs().isEmpty()
-            || metadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId() != newSnapshotId) {
+        if (this.providedMetadata.refs().isEmpty()
+            || this.providedMetadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId()
+                != newSnapshotId) {
           metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
         }
       }
@@ -258,34 +335,9 @@ TableMetadata.Builder applyTo(TableMetadata metadata) {
         metadataBuilder.removeSnapshots(snapshotIds);
       }
 
-      return metadataBuilder;
-    }
-
-    void recordMetrics(TableMetadata.Builder builder) {
-      // Compute appended snapshots (only regular snapshots)
-      // Cherry-picked snapshots are all existing, not appended
-      int appendedCount = regularSnapshots.size();
-
-      if (appendedCount > 0) {
-        metricsReporter.count(InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, appendedCount);
-      }
-      if (!stagedSnapshots.isEmpty()) {
-        metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, stagedSnapshots.size());
-      }
-      if (!cherryPickedSnapshots.isEmpty()) {
-        metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
-            cherryPickedSnapshots.size());
-      }
-      if (!deletedSnapshots.isEmpty()) {
-        metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, deletedSnapshots.size());
-      }
-
-      // Record snapshot IDs in properties
-      if (appendedCount > 0) {
-        builder.setProperties(
+      // Record snapshot IDs in properties and cleanup input properties
+      if (this.appendedCount > 0) {
+        metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
                 regularSnapshots.stream()
@@ -293,7 +345,7 @@ void recordMetrics(TableMetadata.Builder builder) {
                     .collect(Collectors.joining(","))));
       }
       if (!stagedSnapshots.isEmpty()) {
-        builder.setProperties(
+        metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
                 stagedSnapshots.stream()
@@ -301,7 +353,7 @@ void recordMetrics(TableMetadata.Builder builder) {
                     .collect(Collectors.joining(","))));
       }
       if (!cherryPickedSnapshots.isEmpty()) {
-        builder.setProperties(
+        metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
                 cherryPickedSnapshots.stream()
@@ -309,18 +361,41 @@ void recordMetrics(TableMetadata.Builder builder) {
                     .collect(Collectors.joining(","))));
       }
       if (!deletedSnapshots.isEmpty()) {
-        builder.setProperties(
+        metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
                 deletedSnapshots.stream()
                     .map(s -> Long.toString(s.snapshotId()))
                     .collect(Collectors.joining(","))));
       }
-
-      builder.removeProperties(
+      metadataBuilder.removeProperties(
           new HashSet<>(
               Arrays.asList(
                   CatalogConstants.SNAPSHOTS_JSON_KEY, CatalogConstants.SNAPSHOTS_REFS_KEY)));
+
+      return metadataBuilder.build();
+    }
+
+    void recordMetrics() {
+      // Record metrics for appended snapshots (only regular snapshots)
+      // Cherry-picked snapshots are all existing, not appended
+      if (this.appendedCount > 0) {
+        this.metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, this.appendedCount);
+      }
+      if (!this.stagedSnapshots.isEmpty()) {
+        this.metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, this.stagedSnapshots.size());
+      }
+      if (!this.cherryPickedSnapshots.isEmpty()) {
+        this.metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
+            this.cherryPickedSnapshots.size());
+      }
+      if (!this.deletedSnapshots.isEmpty()) {
+        this.metricsReporter.count(
+            InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, this.deletedSnapshots.size());
+      }
     }
   }
 }

From 37af32c2f318ed2c4a32414883d45677f2c8f610 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Wed, 5 Nov 2025 15:44:55 -0800
Subject: [PATCH 32/35] small cleanup

---
 .../internal/catalog/SnapshotDiffApplier.java | 81 ++++++++++++-------
 1 file changed, 51 insertions(+), 30 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index 16c26d1d1..ca1bc60b7 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -10,6 +10,7 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -39,9 +40,13 @@ public class SnapshotDiffApplier {
    * @param existingMetadata The existing table metadata (may be null for table creation)
    * @param providedMetadata The new metadata with properties containing snapshot updates
    * @return Updated metadata with snapshots applied
+   * @throws NullPointerException if providedMetadata is null
    */
   public TableMetadata applySnapshots(
       TableMetadata existingMetadata, TableMetadata providedMetadata) {
+    // Validate at system boundary
+    Objects.requireNonNull(providedMetadata, "providedMetadata cannot be null");
+
     String snapshotsJson = providedMetadata.properties().get(CatalogConstants.SNAPSHOTS_JSON_KEY);
     if (snapshotsJson == null) {
       return providedMetadata;
@@ -70,15 +75,16 @@ public TableMetadata applySnapshots(
             providedRefs,
             existingRefs);
 
-    // Validate, apply, record metrics
+    // Validate, apply, record metrics (in correct order)
     diff.validate();
+    TableMetadata result = diff.applyTo();
     diff.recordMetrics();
-    return diff.applyTo();
+    return result;
   }
 
   /**
    * State object that computes and caches all snapshot analysis. Computes all maps once in the
-   * constructor to avoid redundant operations. Provides clear methods for validation and
+   * factory method to avoid redundant operations. Provides clear methods for validation and
    * application.
    */
   private static class SnapshotDiff {
@@ -110,6 +116,9 @@ private static class SnapshotDiff {
     /**
      * Creates a SnapshotDiff by computing all snapshot analysis from the provided inputs.
      *
+     * <p>Preconditions: All parameters except existingMetadata must be non-null. Collections should
+     * be empty rather than null.
+     *
      * @param metricsReporter Metrics reporter for recording snapshot operations
      * @param existingMetadata The existing table metadata (may be null for table creation)
      * @param providedSnapshots Snapshots provided in the update
@@ -130,9 +139,15 @@ static SnapshotDiff create(
 
       // Compute all index maps once
       Map<Long, Snapshot> providedSnapshotByIds =
-          providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
+          providedSnapshots.stream()
+              .collect(
+                  Collectors.toMap(
+                      Snapshot::snapshotId, s -> s, (existing, replacement) -> existing));
       Map<Long, Snapshot> existingSnapshotByIds =
-          existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
+          existingSnapshots.stream()
+              .collect(
+                  Collectors.toMap(
+                      Snapshot::snapshotId, s -> s, (existing, replacement) -> existing));
       Set<Long> existingBranchRefIds =
           existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
       Set<Long> providedBranchRefIds =
@@ -275,12 +290,14 @@ private void validateCurrentSnapshotNotDeleted() {
       if (this.existingMetadata == null || this.existingMetadata.currentSnapshot() == null) {
         return;
       }
-      if (!newSnapshots.isEmpty()) {
+      if (!this.newSnapshots.isEmpty()) {
         return;
       }
       long latestSnapshotId = this.existingMetadata.currentSnapshot().snapshotId();
-      if (!deletedSnapshots.isEmpty()
-          && deletedSnapshots.get(deletedSnapshots.size() - 1).snapshotId() == latestSnapshotId) {
+      // Check if the last deleted snapshot is the current one (snapshots are ordered by time)
+      if (!this.deletedSnapshots.isEmpty()
+          && this.deletedSnapshots.get(this.deletedSnapshots.size() - 1).snapshotId()
+              == latestSnapshotId) {
         throw new InvalidIcebergSnapshotException(
             String.format(
                 "Cannot delete the current snapshot %s without adding replacement snapshots.",
@@ -292,7 +309,7 @@ TableMetadata applyTo() {
       TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(this.providedMetadata);
 
       // Validate only MAIN branch
-      for (Map.Entry<String, SnapshotRef> entry : providedRefs.entrySet()) {
+      for (Map.Entry<String, SnapshotRef> entry : this.providedRefs.entrySet()) {
         if (!entry.getKey().equals(SnapshotRef.MAIN_BRANCH)) {
           throw new UnsupportedOperationException("OpenHouse supports only MAIN branch");
         }
@@ -307,20 +324,20 @@ TableMetadata applyTo() {
        *
        * <p>[3] Regular snapshots - set as main branch snapshot
        */
-      for (Snapshot snapshot : stagedSnapshots) {
+      for (Snapshot snapshot : this.stagedSnapshots) {
         metadataBuilder.addSnapshot(snapshot);
       }
 
       // Cherry-picked snapshots are all existing, handled by fast-forward block below
       // (No need to apply them here)
 
-      for (Snapshot snapshot : regularSnapshots) {
+      for (Snapshot snapshot : this.regularSnapshots) {
         metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
       }
 
       // Handle fast-forward cherry-pick (ref update without new snapshot)
-      if (newSnapshots.isEmpty() && !providedRefs.isEmpty()) {
-        long newSnapshotId = providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
+      if (this.newSnapshots.isEmpty() && !this.providedRefs.isEmpty()) {
+        long newSnapshotId = this.providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
         if (this.providedMetadata.refs().isEmpty()
             || this.providedMetadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId()
                 != newSnapshotId) {
@@ -329,9 +346,9 @@ TableMetadata applyTo() {
       }
 
       // Delete snapshots
-      if (!deletedSnapshots.isEmpty()) {
+      if (!this.deletedSnapshots.isEmpty()) {
         Set<Long> snapshotIds =
-            deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
+            this.deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
         metadataBuilder.removeSnapshots(snapshotIds);
       }
 
@@ -340,33 +357,25 @@ TableMetadata applyTo() {
         metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-                regularSnapshots.stream()
-                    .map(s -> Long.toString(s.snapshotId()))
-                    .collect(Collectors.joining(","))));
+                formatSnapshotIds(this.regularSnapshots)));
       }
-      if (!stagedSnapshots.isEmpty()) {
+      if (!this.stagedSnapshots.isEmpty()) {
         metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-                stagedSnapshots.stream()
-                    .map(s -> Long.toString(s.snapshotId()))
-                    .collect(Collectors.joining(","))));
+                formatSnapshotIds(this.stagedSnapshots)));
       }
-      if (!cherryPickedSnapshots.isEmpty()) {
+      if (!this.cherryPickedSnapshots.isEmpty()) {
         metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS),
-                cherryPickedSnapshots.stream()
-                    .map(s -> Long.toString(s.snapshotId()))
-                    .collect(Collectors.joining(","))));
+                formatSnapshotIds(this.cherryPickedSnapshots)));
       }
-      if (!deletedSnapshots.isEmpty()) {
+      if (!this.deletedSnapshots.isEmpty()) {
         metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS),
-                deletedSnapshots.stream()
-                    .map(s -> Long.toString(s.snapshotId()))
-                    .collect(Collectors.joining(","))));
+                formatSnapshotIds(this.deletedSnapshots)));
       }
       metadataBuilder.removeProperties(
           new HashSet<>(
@@ -397,5 +406,17 @@ void recordMetrics() {
             InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, this.deletedSnapshots.size());
       }
     }
+
+    /**
+     * Helper method to format a list of snapshots into a comma-separated string of snapshot IDs.
+     *
+     * @param snapshots List of snapshots to format
+     * @return Comma-separated string of snapshot IDs
+     */
+    private static String formatSnapshotIds(List<Snapshot> snapshots) {
+      return snapshots.stream()
+          .map(s -> Long.toString(s.snapshotId()))
+          .collect(Collectors.joining(","));
+    }
   }
 }

From 71bebbe6e3a13df7f31dfa902a1214db698a1b56 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Sat, 8 Nov 2025 13:00:09 -0800
Subject: [PATCH 33/35] responding to comments

---
 .../internal/catalog/SnapshotDiffApplier.java | 253 ++++++++-----
 .../OpenHouseInternalTableOperationsTest.java |  97 +++++
 .../catalog/SnapshotDiffApplierTest.java      | 337 ++++++++++++++++--
 3 files changed, 571 insertions(+), 116 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index ca1bc60b7..b1055ae3d 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -48,16 +48,24 @@ public TableMetadata applySnapshots(
     Objects.requireNonNull(providedMetadata, "providedMetadata cannot be null");
 
     String snapshotsJson = providedMetadata.properties().get(CatalogConstants.SNAPSHOTS_JSON_KEY);
+    Map<String, SnapshotRef> providedRefs =
+        Optional.ofNullable(providedMetadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
+            .map(SnapshotsUtil::parseSnapshotRefs)
+            .orElse(new HashMap<>());
+
+    // Validate MAIN-only restriction early (PR1 limitation)
+    for (Map.Entry<String, SnapshotRef> entry : providedRefs.entrySet()) {
+      if (!entry.getKey().equals(SnapshotRef.MAIN_BRANCH)) {
+        throw new UnsupportedOperationException("OpenHouse supports only MAIN branch");
+      }
+    }
+
     if (snapshotsJson == null) {
       return providedMetadata;
     }
 
     // Parse input
     List<Snapshot> providedSnapshots = SnapshotsUtil.parseSnapshots(null, snapshotsJson);
-    Map<String, SnapshotRef> providedRefs =
-        Optional.ofNullable(providedMetadata.properties().get(CatalogConstants.SNAPSHOTS_REFS_KEY))
-            .map(SnapshotsUtil::parseSnapshotRefs)
-            .orElse(new HashMap<>());
 
     List<Snapshot> existingSnapshots =
         existingMetadata != null ? existingMetadata.snapshots() : Collections.emptyList();
@@ -69,11 +77,11 @@ public TableMetadata applySnapshots(
         SnapshotDiff.create(
             metricsReporter,
             existingMetadata,
-            providedSnapshots,
-            existingSnapshots,
             providedMetadata,
-            providedRefs,
-            existingRefs);
+            existingSnapshots,
+            providedSnapshots,
+            existingRefs,
+            providedRefs);
 
     // Validate, apply, record metrics (in correct order)
     diff.validate();
@@ -93,11 +101,12 @@ private static class SnapshotDiff {
 
     // Input state
     private final TableMetadata existingMetadata;
-    private final List<Snapshot> providedSnapshots;
-    private final List<Snapshot> existingSnapshots;
     private final TableMetadata providedMetadata;
-    private final Map<String, SnapshotRef> providedRefs;
+    private final String databaseId;
+    private final List<Snapshot> existingSnapshots;
+    private final List<Snapshot> providedSnapshots;
     private final Map<String, SnapshotRef> existingRefs;
+    private final Map<String, SnapshotRef> providedRefs;
 
     // Computed maps (created once)
     private final Map<Long, Snapshot> providedSnapshotByIds;
@@ -106,10 +115,11 @@ private static class SnapshotDiff {
     private final Set<Long> providedBranchRefIds;
     private final List<Snapshot> newSnapshots;
     private final List<Snapshot> deletedSnapshots;
+    private final Set<Long> deletedIds;
 
     // Categorized snapshots
-    private final List<Snapshot> stagedSnapshots;
-    private final List<Snapshot> regularSnapshots;
+    private final List<Snapshot> newStagedSnapshots;
+    private final List<Snapshot> newMainBranchSnapshots;
     private final List<Snapshot> cherryPickedSnapshots;
     private final int appendedCount;
 
@@ -121,33 +131,27 @@ private static class SnapshotDiff {
      *
      * @param metricsReporter Metrics reporter for recording snapshot operations
      * @param existingMetadata The existing table metadata (may be null for table creation)
-     * @param providedSnapshots Snapshots provided in the update
-     * @param existingSnapshots Snapshots currently in the table
      * @param providedMetadata The new metadata with properties containing snapshot updates
-     * @param providedRefs Snapshot refs provided in the update
+     * @param existingSnapshots Snapshots currently in the table
+     * @param providedSnapshots Snapshots provided in the update
      * @param existingRefs Snapshot refs currently in the table
+     * @param providedRefs Snapshot refs provided in the update
      * @return A new SnapshotDiff with all analysis computed
      */
     static SnapshotDiff create(
         MetricsReporter metricsReporter,
         TableMetadata existingMetadata,
-        List<Snapshot> providedSnapshots,
-        List<Snapshot> existingSnapshots,
         TableMetadata providedMetadata,
-        Map<String, SnapshotRef> providedRefs,
-        Map<String, SnapshotRef> existingRefs) {
+        List<Snapshot> existingSnapshots,
+        List<Snapshot> providedSnapshots,
+        Map<String, SnapshotRef> existingRefs,
+        Map<String, SnapshotRef> providedRefs) {
 
       // Compute all index maps once
       Map<Long, Snapshot> providedSnapshotByIds =
-          providedSnapshots.stream()
-              .collect(
-                  Collectors.toMap(
-                      Snapshot::snapshotId, s -> s, (existing, replacement) -> existing));
+          providedSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
       Map<Long, Snapshot> existingSnapshotByIds =
-          existingSnapshots.stream()
-              .collect(
-                  Collectors.toMap(
-                      Snapshot::snapshotId, s -> s, (existing, replacement) -> existing));
+          existingSnapshots.stream().collect(Collectors.toMap(Snapshot::snapshotId, s -> s));
       Set<Long> existingBranchRefIds =
           existingRefs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet());
       Set<Long> providedBranchRefIds =
@@ -162,9 +166,11 @@ static SnapshotDiff create(
           existingSnapshots.stream()
               .filter(s -> !providedSnapshotByIds.containsKey(s.snapshotId()))
               .collect(Collectors.toList());
+      Set<Long> deletedIds =
+          deletedSnapshots.stream().map(Snapshot::snapshotId).collect(Collectors.toSet());
 
       // Categorize snapshots
-      List<Snapshot> stagedSnapshots =
+      List<Snapshot> newStagedSnapshots =
           newSnapshots.stream()
               .filter(s -> s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
@@ -198,37 +204,50 @@ static SnapshotDiff create(
                     boolean hasWapId =
                         provided.summary() != null
                             && provided.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP);
+                    // TODO: This works for MAIN branch only,  but fails in the branch scenario and
+                    // should be revisited in followup PR
+                    // Snapshot exists on branch-A
+                    // Cherry-pick to branch-B
+                    // Would be classified as NOT wasStaged (because it's in existingBranchRefIds)
+                    // Wouldn't be detected as cherry-picked
                     boolean wasStaged = !existingBranchRefIds.contains(provided.snapshotId());
                     boolean isNowOnBranch = providedBranchRefIds.contains(provided.snapshotId());
                     return hasWapId && wasStaged && isNowOnBranch;
                   })
               .collect(Collectors.toList());
 
-      // Regular snapshots = all new snapshots that are not staged WAP
-      List<Snapshot> regularSnapshots =
+      // New main branch snapshots = all new snapshots that are not staged WAP
+      // (includes both regular commits and cherry-pick result snapshots)
+      List<Snapshot> newMainBranchSnapshots =
           newSnapshots.stream()
               .filter(s -> !s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
 
-      // Compute appended count (only regular snapshots, not cherry-picked)
-      int appendedCount = regularSnapshots.size();
+      // Compute appended count
+      int appendedCount = newMainBranchSnapshots.size();
+
+      // Extract database ID from metadata properties
+      String databaseId =
+          providedMetadata.properties().get(CatalogConstants.OPENHOUSE_DATABASEID_KEY);
 
       return new SnapshotDiff(
           metricsReporter,
           existingMetadata,
-          providedSnapshots,
-          existingSnapshots,
           providedMetadata,
-          providedRefs,
+          databaseId,
+          existingSnapshots,
+          providedSnapshots,
           existingRefs,
+          providedRefs,
           providedSnapshotByIds,
           existingSnapshotByIds,
           existingBranchRefIds,
           providedBranchRefIds,
           newSnapshots,
           deletedSnapshots,
-          stagedSnapshots,
-          regularSnapshots,
+          deletedIds,
+          newStagedSnapshots,
+          newMainBranchSnapshots,
           cherryPickedSnapshots,
           appendedCount);
     }
@@ -237,36 +256,40 @@ static SnapshotDiff create(
     private SnapshotDiff(
         MetricsReporter metricsReporter,
         TableMetadata existingMetadata,
-        List<Snapshot> providedSnapshots,
-        List<Snapshot> existingSnapshots,
         TableMetadata providedMetadata,
-        Map<String, SnapshotRef> providedRefs,
+        String databaseId,
+        List<Snapshot> existingSnapshots,
+        List<Snapshot> providedSnapshots,
         Map<String, SnapshotRef> existingRefs,
+        Map<String, SnapshotRef> providedRefs,
         Map<Long, Snapshot> providedSnapshotByIds,
         Map<Long, Snapshot> existingSnapshotByIds,
         Set<Long> existingBranchRefIds,
         Set<Long> providedBranchRefIds,
         List<Snapshot> newSnapshots,
         List<Snapshot> deletedSnapshots,
-        List<Snapshot> stagedSnapshots,
-        List<Snapshot> regularSnapshots,
+        Set<Long> deletedIds,
+        List<Snapshot> newStagedSnapshots,
+        List<Snapshot> newMainBranchSnapshots,
         List<Snapshot> cherryPickedSnapshots,
         int appendedCount) {
       this.metricsReporter = metricsReporter;
       this.existingMetadata = existingMetadata;
-      this.providedSnapshots = providedSnapshots;
-      this.existingSnapshots = existingSnapshots;
       this.providedMetadata = providedMetadata;
-      this.providedRefs = providedRefs;
+      this.databaseId = databaseId;
+      this.existingSnapshots = existingSnapshots;
+      this.providedSnapshots = providedSnapshots;
       this.existingRefs = existingRefs;
+      this.providedRefs = providedRefs;
       this.providedSnapshotByIds = providedSnapshotByIds;
       this.existingSnapshotByIds = existingSnapshotByIds;
       this.existingBranchRefIds = existingBranchRefIds;
       this.providedBranchRefIds = providedBranchRefIds;
       this.newSnapshots = newSnapshots;
       this.deletedSnapshots = deletedSnapshots;
-      this.stagedSnapshots = stagedSnapshots;
-      this.regularSnapshots = regularSnapshots;
+      this.deletedIds = deletedIds;
+      this.newStagedSnapshots = newStagedSnapshots;
+      this.newMainBranchSnapshots = newMainBranchSnapshots;
       this.cherryPickedSnapshots = cherryPickedSnapshots;
       this.appendedCount = appendedCount;
     }
@@ -278,6 +301,7 @@ private SnapshotDiff(
      */
     void validate() {
       validateCurrentSnapshotNotDeleted();
+      validateDeletedSnapshotsNotReferenced();
     }
 
     /**
@@ -293,6 +317,8 @@ private void validateCurrentSnapshotNotDeleted() {
       if (!this.newSnapshots.isEmpty()) {
         return;
       }
+      // TODO -- validate what are the requirements around deleting the latest snapshot on a
+      // "branch".
       long latestSnapshotId = this.existingMetadata.currentSnapshot().snapshotId();
       // Check if the last deleted snapshot is the current one (snapshots are ordered by time)
       if (!this.deletedSnapshots.isEmpty()
@@ -305,44 +331,69 @@ private void validateCurrentSnapshotNotDeleted() {
       }
     }
 
-    TableMetadata applyTo() {
-      TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(this.providedMetadata);
+    /**
+     * Validates that snapshots being deleted are not still referenced by any branches or tags. This
+     * prevents data loss and maintains referential integrity by ensuring that all branch and tag
+     * pointers reference valid snapshots that will continue to exist after the commit.
+     *
+     * @throws InvalidIcebergSnapshotException if any deleted snapshot is still referenced by a
+     *     branch or tag
+     */
+    private void validateDeletedSnapshotsNotReferenced() {
+      Map<Long, List<String>> referencedIdsToRefs =
+          providedRefs.entrySet().stream()
+              .collect(
+                  Collectors.groupingBy(
+                      e -> e.getValue().snapshotId(),
+                      Collectors.mapping(Map.Entry::getKey, Collectors.toList())));
+
+      List<String> invalidDeleteDetails =
+          deletedIds.stream()
+              .filter(referencedIdsToRefs::containsKey)
+              .map(
+                  id ->
+                      String.format(
+                          "snapshot %s (referenced by: %s)",
+                          id, String.join(", ", referencedIdsToRefs.get(id))))
+              .collect(Collectors.toList());
 
-      // Validate only MAIN branch
-      for (Map.Entry<String, SnapshotRef> entry : this.providedRefs.entrySet()) {
-        if (!entry.getKey().equals(SnapshotRef.MAIN_BRANCH)) {
-          throw new UnsupportedOperationException("OpenHouse supports only MAIN branch");
-        }
+      if (!invalidDeleteDetails.isEmpty()) {
+        throw new InvalidIcebergSnapshotException(
+            String.format(
+                "Cannot delete snapshots that are still referenced by branches/tags: %s",
+                String.join("; ", invalidDeleteDetails)));
       }
+    }
+
+    TableMetadata applyTo() {
+      TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(this.providedMetadata);
 
       /**
        * Apply categorized snapshots to metadata:
        *
        * <p>[1] Staged (WAP) snapshots - added without branch reference
        *
-       * <p>[2] Cherry-picked snapshots - set as main branch snapshot
+       * <p>[2] New main branch snapshots - added without branch reference (branch pointer set
+       * below)
        *
-       * <p>[3] Regular snapshots - set as main branch snapshot
+       * <p>[3] Cherry-picked snapshots - existing snapshots, branch pointer set below
        */
-      for (Snapshot snapshot : this.stagedSnapshots) {
-        metadataBuilder.addSnapshot(snapshot);
-      }
-
-      // Cherry-picked snapshots are all existing, handled by fast-forward block below
-      // (No need to apply them here)
-
-      for (Snapshot snapshot : this.regularSnapshots) {
-        metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
-      }
-
-      // Handle fast-forward cherry-pick (ref update without new snapshot)
-      if (this.newSnapshots.isEmpty() && !this.providedRefs.isEmpty()) {
+      // Add staged snapshots in timestamp order (explicit ordering for consistency)
+      this.newStagedSnapshots.stream()
+          .sorted(java.util.Comparator.comparingLong(Snapshot::timestampMillis))
+          .forEach(metadataBuilder::addSnapshot);
+
+      // Add new main branch snapshots in timestamp order (explicit ordering)
+      // Note: While the branch pointer (not list order) determines currentSnapshot(),
+      // other code assumes snapshots are time-ordered (e.g., validation at line 308)
+      this.newMainBranchSnapshots.stream()
+          .sorted(java.util.Comparator.comparingLong(Snapshot::timestampMillis))
+          .forEach(metadataBuilder::addSnapshot);
+
+      // Set branch pointer once using providedRefs (covers both new snapshots and cherry-pick)
+      if (!this.providedRefs.isEmpty()) {
         long newSnapshotId = this.providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
-        if (this.providedMetadata.refs().isEmpty()
-            || this.providedMetadata.refs().get(SnapshotRef.MAIN_BRANCH).snapshotId()
-                != newSnapshotId) {
-          metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
-        }
+        metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
       }
 
       // Delete snapshots
@@ -357,13 +408,13 @@ TableMetadata applyTo() {
         metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS),
-                formatSnapshotIds(this.regularSnapshots)));
+                formatSnapshotIds(this.newMainBranchSnapshots)));
       }
-      if (!this.stagedSnapshots.isEmpty()) {
+      if (!this.newStagedSnapshots.isEmpty()) {
         metadataBuilder.setProperties(
             Collections.singletonMap(
                 getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS),
-                formatSnapshotIds(this.stagedSnapshots)));
+                formatSnapshotIds(this.newStagedSnapshots)));
       }
       if (!this.cherryPickedSnapshots.isEmpty()) {
         metadataBuilder.setProperties(
@@ -386,24 +437,34 @@ TableMetadata applyTo() {
     }
 
     void recordMetrics() {
-      // Record metrics for appended snapshots (only regular snapshots)
-      // Cherry-picked snapshots are all existing, not appended
-      if (this.appendedCount > 0) {
-        this.metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, this.appendedCount);
-      }
-      if (!this.stagedSnapshots.isEmpty()) {
-        this.metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, this.stagedSnapshots.size());
-      }
-      if (!this.cherryPickedSnapshots.isEmpty()) {
-        this.metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
-            this.cherryPickedSnapshots.size());
-      }
-      if (!this.deletedSnapshots.isEmpty()) {
-        this.metricsReporter.count(
-            InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, this.deletedSnapshots.size());
+      // Record metrics for appended snapshots (includes regular commits and cherry-pick results)
+      // Note: cherryPickedSnapshots list contains existing source snapshots, not the new results
+      recordMetricWithDatabaseTag(
+          InternalCatalogMetricsConstant.SNAPSHOTS_ADDED_CTR, this.appendedCount);
+      recordMetricWithDatabaseTag(
+          InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR, this.newStagedSnapshots.size());
+      recordMetricWithDatabaseTag(
+          InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR,
+          this.cherryPickedSnapshots.size());
+      recordMetricWithDatabaseTag(
+          InternalCatalogMetricsConstant.SNAPSHOTS_DELETED_CTR, this.deletedSnapshots.size());
+    }
+
+    /**
+     * Helper method to record a metric with database tag if count is greater than zero.
+     *
+     * @param metricName The name of the metric to record
+     * @param count The count value to record
+     */
+    private void recordMetricWithDatabaseTag(String metricName, int count) {
+      if (count > 0) {
+        // Only add database tag if databaseId is present; otherwise record metric without tag
+        if (this.databaseId != null) {
+          this.metricsReporter.count(
+              metricName, count, InternalCatalogMetricsConstant.DATABASE_TAG, this.databaseId);
+        } else {
+          this.metricsReporter.count(metricName, count);
+        }
       }
     }
 
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index 634d8eeb6..476435a61 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -42,6 +42,7 @@
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.SnapshotRef;
+import org.apache.iceberg.SnapshotSummary;
 import org.apache.iceberg.SortDirection;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.TableMetadata;
@@ -489,6 +490,102 @@ void testDoCommitDoesntPersistForStagedTable() {
             .get());
   }
 
+  /**
+   * Tests staged table creation with no snapshots (initial version). Verifies that the table
+   * metadata is set locally but no persistence occurs to the repository.
+   */
+  @Test
+  void testStagedTableCreationWithoutSnapshots() throws IOException {
+    Map<String, String> properties = new HashMap<>(BASE_TABLE_METADATA.properties());
+    properties.put(CatalogConstants.IS_STAGE_CREATE_KEY, "true");
+
+    TableMetadata metadata = BASE_TABLE_METADATA.replaceProperties(properties);
+
+    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
+        Mockito.mockStatic(TableMetadataParser.class, Mockito.CALLS_REAL_METHODS)) {
+      openHouseInternalTableOperations.doCommit(null, metadata);
+
+      // Verify TableMetadata is set locally
+      Assertions.assertNotNull(openHouseInternalTableOperations.currentMetadataLocation());
+      Assertions.assertNotNull(openHouseInternalTableOperations.current());
+
+      // Verify no snapshots were added
+      Assertions.assertEquals(0, openHouseInternalTableOperations.current().snapshots().size());
+
+      // Verify no persistence to repository
+      verify(mockHouseTableRepository, times(0)).save(any());
+
+      // Verify no snapshot properties were set
+      Map<String, String> resultProperties =
+          openHouseInternalTableOperations.current().properties();
+      Assertions.assertNull(resultProperties.get(getCanonicalFieldName("appended_snapshots")));
+      Assertions.assertNull(resultProperties.get(getCanonicalFieldName("staged_snapshots")));
+      Assertions.assertNull(resultProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
+      Assertions.assertNull(resultProperties.get(getCanonicalFieldName("deleted_snapshots")));
+    }
+  }
+
+  /**
+   * Tests staged table creation with staged (WAP) snapshots. Verifies that staged snapshots are
+   * added to the table but no persistence occurs to the repository.
+   */
+  @Test
+  void testStagedTableCreationWithStagedSnapshots() throws IOException {
+    List<Snapshot> testWapSnapshots = IcebergTestUtil.getWapSnapshots().subList(0, 2);
+    Map<String, String> properties = new HashMap<>(BASE_TABLE_METADATA.properties());
+    properties.put(CatalogConstants.IS_STAGE_CREATE_KEY, "true");
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(testWapSnapshots));
+
+    TableMetadata metadata = BASE_TABLE_METADATA.replaceProperties(properties);
+
+    try (MockedStatic<TableMetadataParser> ignoreWriteMock =
+        Mockito.mockStatic(TableMetadataParser.class, Mockito.CALLS_REAL_METHODS)) {
+      openHouseInternalTableOperations.doCommit(null, metadata);
+
+      // Verify TableMetadata is set locally
+      Assertions.assertNotNull(openHouseInternalTableOperations.currentMetadataLocation());
+      Assertions.assertNotNull(openHouseInternalTableOperations.current());
+
+      // Verify staged snapshots were added
+      TableMetadata currentMetadata = openHouseInternalTableOperations.current();
+      Assertions.assertEquals(
+          testWapSnapshots.size(),
+          currentMetadata.snapshots().size(),
+          "Staged snapshots should be added");
+
+      // Verify all snapshots are staged (have WAP ID)
+      for (Snapshot snapshot : currentMetadata.snapshots()) {
+        Assertions.assertTrue(
+            snapshot.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP),
+            "All snapshots should be staged with WAP ID");
+      }
+
+      // Verify no branch references exist (staged snapshots should not be on main)
+      Assertions.assertTrue(
+          currentMetadata.refs().isEmpty()
+              || !currentMetadata.refs().containsKey(SnapshotRef.MAIN_BRANCH),
+          "Staged snapshots should not have main branch reference");
+
+      // Verify no persistence to repository
+      verify(mockHouseTableRepository, times(0)).save(any());
+
+      // Verify snapshot properties tracking
+      Map<String, String> resultProperties = currentMetadata.properties();
+      Assertions.assertEquals(
+          testWapSnapshots.stream()
+              .map(s -> Long.toString(s.snapshotId()))
+              .collect(Collectors.joining(",")),
+          resultProperties.get(getCanonicalFieldName("staged_snapshots")),
+          "Staged snapshots should be tracked in properties");
+      Assertions.assertNull(
+          resultProperties.get(getCanonicalFieldName("appended_snapshots")),
+          "No snapshots should be appended to main");
+      Assertions.assertNull(resultProperties.get(getCanonicalFieldName("cherry_picked_snapshots")));
+      Assertions.assertNull(resultProperties.get(getCanonicalFieldName("deleted_snapshots")));
+    }
+  }
+
   /**
    * Tests that repository exceptions are properly converted to Iceberg exceptions. Verifies that
    * various repository exceptions map to CommitFailedException or CommitStateUnknownException.
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
index 08fc48a52..f325459df 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
@@ -4,6 +4,8 @@
 import static org.junit.jupiter.api.Assertions.*;
 import static org.mockito.Mockito.*;
 
+import com.google.gson.Gson;
+import com.google.gson.JsonObject;
 import com.linkedin.openhouse.cluster.metrics.micrometer.MetricsReporter;
 import com.linkedin.openhouse.internal.catalog.exception.InvalidIcebergSnapshotException;
 import java.io.IOException;
@@ -19,8 +21,10 @@
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.SnapshotRef;
+import org.apache.iceberg.SnapshotRefParser;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.TableMetadata;
+import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.types.Types;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -116,8 +120,6 @@ private TableMetadata addSnapshotsToMetadata(TableMetadata metadata, List<Snapsh
     return builder.build();
   }
 
-  // ========== Edge Case Tests ==========
-
   /** Verifies that when no snapshot JSON is provided, metadata is returned unmodified. */
   @Test
   void testApplySnapshots_noSnapshotsJson_returnsUnmodified() {
@@ -127,9 +129,9 @@ void testApplySnapshots_noSnapshotsJson_returnsUnmodified() {
     verifyNoInteractions(mockMetricsReporter);
   }
 
-  /** Verifies that table creation (null base) is handled correctly. */
+  /** Verifies that table creation (null base) with main branch is handled correctly. */
   @Test
-  void testApplySnapshots_nullBase_handlesTableCreation() throws IOException {
+  void testApplySnapshots_nullBase_handlesTableCreationWithMainBranch() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata newMetadata = createMetadataWithSnapshotsAndMainRef(baseMetadata, snapshots);
 
@@ -141,9 +143,9 @@ void testApplySnapshots_nullBase_handlesTableCreation() throws IOException {
 
   // ========== Basic Functionality Tests ==========
 
-  /** Verifies that new snapshots are added correctly. */
+  /** Verifies that new snapshots are added correctly to the main branch. */
   @Test
-  void testApplySnapshots_addNewSnapshots_success() throws IOException {
+  void testApplySnapshots_addNewSnapshotsToMainBranch_success() throws IOException {
     List<Snapshot> initialSnapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, initialSnapshots);
 
@@ -159,9 +161,9 @@ void testApplySnapshots_addNewSnapshots_success() throws IOException {
     verify(mockMetricsReporter, atLeastOnce()).count(anyString(), anyDouble());
   }
 
-  /** Verifies that deleting snapshots works correctly and updates main branch. */
+  /** Verifies that deleting snapshots from main branch works correctly. */
   @Test
-  void testApplySnapshots_deleteSnapshots_success() throws IOException {
+  void testApplySnapshots_deleteSnapshotsFromMainBranch_success() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
@@ -175,9 +177,9 @@ void testApplySnapshots_deleteSnapshots_success() throws IOException {
     assertEquals(remainingSnapshots.size(), result.snapshots().size());
   }
 
-  /** Verifies that updating branch references works correctly. */
+  /** Verifies that updating main branch references works correctly. */
   @Test
-  void testApplySnapshots_branchUpdates_success() throws IOException {
+  void testApplySnapshots_mainBranchUpdates_success() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
@@ -192,11 +194,52 @@ void testApplySnapshots_branchUpdates_success() throws IOException {
     assertEquals(newBranchTarget.snapshotId(), result.currentSnapshot().snapshotId());
   }
 
+  /** Verifies that snapshots are added in timestamp order to the main branch. */
+  @Test
+  void testApplySnapshots_snapshotsOrderedByTimestamp_success() throws IOException {
+    List<Snapshot> initialSnapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, initialSnapshots);
+
+    // Add extra snapshots which may have different timestamps
+    List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
+    List<Snapshot> allSnapshots = new ArrayList<>(initialSnapshots);
+    allSnapshots.addAll(extraSnapshots);
+
+    // Deliberately shuffle to ensure ordering is not dependent on input order
+    List<Snapshot> shuffledSnapshots = new ArrayList<>(allSnapshots);
+    Collections.shuffle(shuffledSnapshots);
+
+    TableMetadata newMetadata =
+        createMetadataWithSnapshotsAndMainRef(baseWithSnapshots, shuffledSnapshots);
+
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    assertNotNull(result);
+    // Verify snapshots are ordered by timestamp
+    List<Snapshot> resultSnapshots = result.snapshots();
+    assertTrue(resultSnapshots.size() > 0, "Should have snapshots");
+
+    // Verify each snapshot timestamp is <= the next one
+    for (int i = 1; i < resultSnapshots.size(); i++) {
+      Snapshot prev = resultSnapshots.get(i - 1);
+      Snapshot current = resultSnapshots.get(i);
+      assertTrue(
+          prev.timestampMillis() <= current.timestampMillis(),
+          String.format(
+              "Snapshots should be ordered by timestamp: snapshot[%d].timestamp=%d "
+                  + "should be <= snapshot[%d].timestamp=%d",
+              i - 1, prev.timestampMillis(), i, current.timestampMillis()));
+    }
+  }
+
   // ========== Validation Tests ==========
 
-  /** Verifies that deleting the current snapshot without replacements throws an exception. */
+  /**
+   * Verifies that deleting the current snapshot from main branch without replacements throws an
+   * exception.
+   */
   @Test
-  void testValidation_deletingCurrentSnapshotWithoutReplacement_throwsException()
+  void testApplySnapshots_deletingCurrentSnapshotFromMainBranchWithoutReplacement_throwsException()
       throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
@@ -211,11 +254,32 @@ void testValidation_deletingCurrentSnapshotWithoutReplacement_throwsException()
 
     assertTrue(exception.getMessage().contains("Cannot delete the current snapshot"));
   }
+
+  /** Verifies that duplicate snapshot IDs in provided snapshots throw an exception. */
+  @Test
+  void testApplySnapshots_duplicateSnapshotIds_throwsException() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    // Create a list with duplicate snapshots (same snapshot ID appears twice)
+    List<Snapshot> duplicateSnapshots = new ArrayList<>();
+    duplicateSnapshots.add(snapshots.get(0));
+    duplicateSnapshots.add(snapshots.get(0)); // Duplicate
+
+    TableMetadata newMetadata =
+        createMetadataWithSnapshotsAndMainRef(baseWithSnapshots, duplicateSnapshots);
+
+    // Should throw IllegalStateException due to duplicate keys in toMap collector
+    assertThrows(
+        IllegalStateException.class,
+        () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+  }
+
   // ========== Metrics Tests ==========
 
-  /** Verifies that WAP (staged) snapshots trigger the correct metrics. */
+  /** Verifies that staged snapshots (not on main branch) trigger the correct metrics. */
   @Test
-  void testMetrics_wapSnapshots_recordsStagedCounter() throws IOException {
+  void testMetrics_addStagedSnapshots_recordsStagedCounter() throws IOException {
     List<Snapshot> baseSnapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, baseSnapshots);
 
@@ -234,9 +298,9 @@ void testMetrics_wapSnapshots_recordsStagedCounter() throws IOException {
         .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_STAGED_CTR), anyDouble());
   }
 
-  /** Verifies that deleting snapshots triggers the correct metrics. */
+  /** Verifies that deleting snapshots from main branch triggers the correct metrics. */
   @Test
-  void testMetrics_deleteSnapshots_recordsDeletedCounter() throws IOException {
+  void testMetrics_deleteSnapshotsFromMainBranch_recordsDeletedCounter() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
@@ -254,9 +318,9 @@ void testMetrics_deleteSnapshots_recordsDeletedCounter() throws IOException {
 
   // ========== Property Management Tests ==========
 
-  /** Verifies that appended snapshot IDs are recorded in properties. */
+  /** Verifies that appended snapshot IDs to main branch are recorded in properties. */
   @Test
-  void testProperties_appendedSnapshots_recordedCorrectly() throws IOException {
+  void testProperties_appendedSnapshotsToMainBranch_recordedCorrectly() throws IOException {
     List<Snapshot> baseSnapshots = IcebergTestUtil.getSnapshots();
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, baseSnapshots);
 
@@ -281,9 +345,12 @@ void testProperties_appendedSnapshots_recordedCorrectly() throws IOException {
     }
   }
 
-  /** Verifies that temporary snapshot processing keys are removed from final properties. */
+  /**
+   * Verifies that temporary snapshot processing keys are removed from final properties when adding
+   * to main branch.
+   */
   @Test
-  void testProperties_tempKeysRemoved_success() throws IOException {
+  void testProperties_tempKeysRemovedForMainBranch_success() throws IOException {
     List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
     TableMetadata newMetadata = createMetadataWithSnapshotsAndMainRef(baseMetadata, snapshots);
 
@@ -297,4 +364,234 @@ void testProperties_tempKeysRemoved_success() throws IOException {
         result.properties().containsKey(CatalogConstants.SNAPSHOTS_REFS_KEY),
         "Temp snapshots refs key should be removed");
   }
+
+  /** Verifies that providing a non-MAIN branch reference throws UnsupportedOperationException. */
+  @Test
+  void testApplySnapshots_nonMainBranchReference_throwsUnsupportedOperationException()
+      throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    Snapshot lastSnapshot = snapshots.get(snapshots.size() - 1);
+
+    // Create refs with a feature branch instead of MAIN
+    Map<String, String> refs = new HashMap<>();
+    SnapshotRef featureBranchRef = SnapshotRef.branchBuilder(lastSnapshot.snapshotId()).build();
+    refs.put("feature-branch", SnapshotRefParser.toJson(featureBranchRef));
+
+    TableMetadata newMetadata = createMetadataWithSnapshots(baseMetadata, snapshots, refs);
+
+    UnsupportedOperationException exception =
+        assertThrows(
+            UnsupportedOperationException.class,
+            () -> snapshotDiffApplier.applySnapshots(null, newMetadata));
+
+    assertTrue(exception.getMessage().contains("OpenHouse supports only MAIN branch"));
+  }
+
+  /**
+   * Verifies that providing a branch ref pointing to a non-existent snapshot ID causes an
+   * exception. This tests a critical bug where no validation exists before calling
+   * setBranchSnapshot.
+   */
+  @Test
+  void testApplySnapshots_refPointingToNonExistentSnapshot_throwsException() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+
+    // Create a ref pointing to a snapshot ID that doesn't exist in the snapshot list
+    long nonExistentSnapshotId = 999999999L;
+    Map<String, String> refs = new HashMap<>();
+    SnapshotRef invalidRef = SnapshotRef.branchBuilder(nonExistentSnapshotId).build();
+    refs.put(SnapshotRef.MAIN_BRANCH, SnapshotRefParser.toJson(invalidRef));
+
+    TableMetadata newMetadata = createMetadataWithSnapshots(baseMetadata, snapshots, refs);
+
+    // Iceberg's setBranchSnapshot should throw ValidationException when snapshot doesn't exist
+    assertThrows(
+        ValidationException.class, () -> snapshotDiffApplier.applySnapshots(null, newMetadata));
+  }
+
+  /**
+   * Verifies that attempting to set a ref to a snapshot being deleted throws an exception. The
+   * validation correctly catches this case where a commit attempts to both delete a snapshot and
+   * set the main branch to point to that deleted snapshot. This prevents leaving the table in an
+   * invalid state.
+   */
+  @Test
+  void testApplySnapshots_settingRefToDeletedSnapshot_throwsException() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    // Try to delete the first snapshot, then point main branch to the first (deleted) one
+    Snapshot snapshotToDelete = snapshots.get(0);
+    List<Snapshot> remainingSnapshots = snapshots.subList(1, snapshots.size());
+
+    // Create refs pointing to the snapshot we're trying to delete
+    Map<String, String> refs = new HashMap<>();
+    SnapshotRef mainRef = SnapshotRef.branchBuilder(snapshotToDelete.snapshotId()).build();
+    refs.put(SnapshotRef.MAIN_BRANCH, SnapshotRefParser.toJson(mainRef));
+
+    TableMetadata newMetadata =
+        createMetadataWithSnapshots(baseWithSnapshots, remainingSnapshots, refs);
+
+    // This should throw an exception because we're trying to delete a snapshot
+    // while setting a branch reference to it
+    InvalidIcebergSnapshotException exception =
+        assertThrows(
+            InvalidIcebergSnapshotException.class,
+            () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+
+    assertTrue(
+        exception
+            .getMessage()
+            .contains("Cannot delete snapshots that are still referenced by branches/tags"));
+    assertTrue(exception.getMessage().contains("snapshot " + snapshotToDelete.snapshotId()));
+    assertTrue(exception.getMessage().contains("main"));
+  }
+
+  /**
+   * Verifies that a snapshot with an invalid (non-numeric) source snapshot ID in cherry-pick causes
+   * JsonSyntaxException during parsing. NOTE: This fails at the JSON parsing stage due to Iceberg's
+   * strict validation, not at the cherry-pick categorization stage.
+   */
+  @Test
+  void testApplySnapshots_invalidCherryPickSourceSnapshotId_failsAtParsingStage() {
+    // Create a custom snapshot JSON with invalid source-snapshot-id using Gson
+    // Note: Iceberg validates snapshot structure strictly, so this fails at Gson parsing
+    Gson gson = new Gson();
+    JsonObject snapshotJson = new JsonObject();
+    snapshotJson.addProperty("snapshot-id", 1234567890123456789L);
+    snapshotJson.addProperty("timestamp-ms", 1669126937912L);
+    JsonObject summary = new JsonObject();
+    summary.addProperty("operation", "append");
+    summary.addProperty("source-snapshot-id", "not-a-number");
+    snapshotJson.add("summary", summary);
+    snapshotJson.addProperty("manifest-list", "/tmp/test.avro");
+    snapshotJson.addProperty("schema-id", 0);
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(CatalogConstants.SNAPSHOTS_JSON_KEY, "[" + gson.toJson(snapshotJson) + "]");
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
+    // Should throw JsonSyntaxException when Gson tries to parse the invalid source-snapshot-id
+    assertThrows(
+        com.google.gson.JsonSyntaxException.class,
+        () -> snapshotDiffApplier.applySnapshots(null, newMetadata));
+  }
+
+  /**
+   * Verifies that a snapshot with null summary is handled correctly during WAP detection. Tests
+   * lines 172, 180, 202 which check snapshot.summary(). NOTE: This currently fails at Iceberg's
+   * parsing stage due to strict validation.
+   */
+  @Test
+  void testApplySnapshots_snapshotWithNullSummary_failsAtParsingStage() {
+    // Create a custom snapshot JSON with null/missing summary using Gson
+    // Note: Iceberg validates snapshot structure strictly, so this fails at parsing
+    Gson gson = new Gson();
+    JsonObject snapshotJson = new JsonObject();
+    snapshotJson.addProperty("snapshot-id", 1234567890123456789L);
+    snapshotJson.addProperty("timestamp-ms", 1669126937912L);
+    snapshotJson.addProperty("manifest-list", "/tmp/test.avro");
+    snapshotJson.addProperty("schema-id", 0);
+
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(CatalogConstants.SNAPSHOTS_JSON_KEY, "[" + gson.toJson(snapshotJson) + "]");
+
+    // Add a main branch ref pointing to this snapshot
+    Map<String, String> refs = new HashMap<>();
+    SnapshotRef mainRef = SnapshotRef.branchBuilder(1234567890123456789L).build();
+    refs.put(SnapshotRef.MAIN_BRANCH, SnapshotRefParser.toJson(mainRef));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, SnapshotsUtil.serializeMap(refs));
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
+    // Should throw JsonSyntaxException during Iceberg parsing due to missing required summary
+    assertThrows(
+        com.google.gson.JsonSyntaxException.class,
+        () -> snapshotDiffApplier.applySnapshots(null, newMetadata));
+  }
+
+  /**
+   * Verifies behavior when provided snapshots are empty but refs are not. Tests that a ref pointing
+   * to nothing causes an exception.
+   */
+  @Test
+  void testApplySnapshots_emptySnapshotsWithNonEmptyRefs_throwsException() {
+    // Create refs pointing to a snapshot that doesn't exist
+    Map<String, String> refs = new HashMap<>();
+    SnapshotRef mainRef = SnapshotRef.branchBuilder(123456789L).build();
+    refs.put(SnapshotRef.MAIN_BRANCH, SnapshotRefParser.toJson(mainRef));
+
+    TableMetadata newMetadata =
+        createMetadataWithSnapshots(baseMetadata, Collections.emptyList(), refs);
+
+    // Should throw ValidationException because ref points to non-existent snapshot
+    assertThrows(
+        org.apache.iceberg.exceptions.ValidationException.class,
+        () -> snapshotDiffApplier.applySnapshots(null, newMetadata));
+  }
+
+  /** Verifies that null providedMetadata throws NullPointerException. */
+  @Test
+  void testApplySnapshots_nullProvidedMetadata_throwsNullPointerException() {
+    NullPointerException exception =
+        assertThrows(
+            NullPointerException.class,
+            () -> snapshotDiffApplier.applySnapshots(baseMetadata, null));
+
+    assertTrue(exception.getMessage().contains("providedMetadata cannot be null"));
+  }
+
+  /** Verifies that malformed JSON in SNAPSHOTS_JSON_KEY property throws exception. */
+  @Test
+  void testApplySnapshots_malformedSnapshotsJson_throwsException() {
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(CatalogConstants.SNAPSHOTS_JSON_KEY, "{ invalid json {{");
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
+    // Should throw JsonSyntaxException or similar from Gson
+    assertThrows(
+        com.google.gson.JsonSyntaxException.class,
+        () -> snapshotDiffApplier.applySnapshots(null, newMetadata));
+  }
+
+  /** Verifies that malformed JSON in SNAPSHOTS_REFS_KEY property throws exception. */
+  @Test
+  void testApplySnapshots_malformedRefsJson_throwsException() throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    Map<String, String> properties = new HashMap<>(baseMetadata.properties());
+    properties.put(
+        CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(snapshots));
+    properties.put(CatalogConstants.SNAPSHOTS_REFS_KEY, "{ invalid json {{");
+
+    TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
+
+    // Should throw JsonSyntaxException or similar from Gson
+    assertThrows(
+        com.google.gson.JsonSyntaxException.class,
+        () -> snapshotDiffApplier.applySnapshots(null, newMetadata));
+  }
+
+  /**
+   * Verifies behavior when attempting to delete all snapshots with no replacement. This should be
+   * caught by the existing validation.
+   */
+  @Test
+  void testApplySnapshots_deletingAllSnapshotsWithNoReplacement_throwsException()
+      throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
+
+    // Try to delete all snapshots without providing replacements
+    TableMetadata newMetadata =
+        createMetadataWithSnapshots(baseWithSnapshots, Collections.emptyList(), new HashMap<>());
+
+    InvalidIcebergSnapshotException exception =
+        assertThrows(
+            InvalidIcebergSnapshotException.class,
+            () -> snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata));
+
+    assertTrue(exception.getMessage().contains("Cannot delete the current snapshot"));
+  }
 }

From 323aa5a2481acfd04551f8e448a75ee64aa669a6 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Sun, 9 Nov 2025 02:55:41 -0800
Subject: [PATCH 34/35] adding more tests, and fixing small bug

---
 .../internal/catalog/SnapshotDiffApplier.java |  26 +-
 .../catalog/SnapshotDiffApplierTest.java      | 249 ++++++++++++++++++
 2 files changed, 265 insertions(+), 10 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index b1055ae3d..08db88ba1 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -85,7 +85,7 @@ public TableMetadata applySnapshots(
 
     // Validate, apply, record metrics (in correct order)
     diff.validate();
-    TableMetadata result = diff.applyTo();
+    TableMetadata result = diff.apply();
     diff.recordMetrics();
     return result;
   }
@@ -365,7 +365,7 @@ private void validateDeletedSnapshotsNotReferenced() {
       }
     }
 
-    TableMetadata applyTo() {
+    TableMetadata apply() {
       TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(this.providedMetadata);
 
       /**
@@ -378,22 +378,28 @@ TableMetadata applyTo() {
        *
        * <p>[3] Cherry-picked snapshots - existing snapshots, branch pointer set below
        */
-      // Add staged snapshots in timestamp order (explicit ordering for consistency)
+      // Add staged snapshots in sequence number order (ensures correct commit ordering)
       this.newStagedSnapshots.stream()
-          .sorted(java.util.Comparator.comparingLong(Snapshot::timestampMillis))
+          .sorted(java.util.Comparator.comparingLong(Snapshot::sequenceNumber))
           .forEach(metadataBuilder::addSnapshot);
 
-      // Add new main branch snapshots in timestamp order (explicit ordering)
-      // Note: While the branch pointer (not list order) determines currentSnapshot(),
-      // other code assumes snapshots are time-ordered (e.g., validation at line 308)
-      this.newMainBranchSnapshots.stream()
-          .sorted(java.util.Comparator.comparingLong(Snapshot::timestampMillis))
-          .forEach(metadataBuilder::addSnapshot);
+      // Add new main branch snapshots in sequence number order (ensures correct commit ordering)
+      List<Snapshot> sortedMainBranchSnapshots =
+          this.newMainBranchSnapshots.stream()
+              .sorted(java.util.Comparator.comparingLong(Snapshot::sequenceNumber))
+              .collect(Collectors.toList());
+      sortedMainBranchSnapshots.forEach(metadataBuilder::addSnapshot);
 
       // Set branch pointer once using providedRefs (covers both new snapshots and cherry-pick)
       if (!this.providedRefs.isEmpty()) {
         long newSnapshotId = this.providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
         metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
+      } else if (!sortedMainBranchSnapshots.isEmpty()) {
+        // Auto-append to main: if no refs provided but there are new main branch snapshots,
+        // set MAIN to the last snapshot (latest by sequence number due to sort above)
+        Snapshot latestSnapshot =
+            sortedMainBranchSnapshots.get(sortedMainBranchSnapshots.size() - 1);
+        metadataBuilder.setBranchSnapshot(latestSnapshot.snapshotId(), SnapshotRef.MAIN_BRANCH);
       }
 
       // Delete snapshots
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
index f325459df..5a7bec3d7 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
@@ -594,4 +594,253 @@ void testApplySnapshots_deletingAllSnapshotsWithNoReplacement_throwsException()
 
     assertTrue(exception.getMessage().contains("Cannot delete the current snapshot"));
   }
+
+  /**
+   * Verifies transition from table with unreferenced snapshots to having a MAIN branch. Tests
+   * ref-only update without snapshot changes.
+   */
+  @Test
+  void testApplySnapshots_baseWithUnreferencedSnapshotsOnly_addFirstMainBranch()
+      throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+
+    // Create base with snapshots but no refs (all unreferenced)
+    TableMetadata base = baseMetadata;
+    for (Snapshot snapshot : snapshots) {
+      base = TableMetadata.buildFrom(base).addSnapshot(snapshot).build();
+    }
+    // Verify no refs in base
+    assertTrue(base.refs().isEmpty() || !base.refs().containsKey(SnapshotRef.MAIN_BRANCH));
+
+    // Provided: same snapshots + MAIN ref to one of them
+    Snapshot mainSnapshot = snapshots.get(2);
+    Map<String, String> refs = IcebergTestUtil.obtainSnapshotRefsFromSnapshot(mainSnapshot);
+    TableMetadata newMetadata = createMetadataWithSnapshots(base, snapshots, refs);
+
+    TableMetadata result = snapshotDiffApplier.applySnapshots(base, newMetadata);
+
+    // Verify MAIN ref is set
+    assertNotNull(result.currentSnapshot());
+    assertEquals(mainSnapshot.snapshotId(), result.currentSnapshot().snapshotId());
+
+    // Verify no add/delete operations (ref-only update)
+    assertEquals(snapshots.size(), result.snapshots().size());
+    Map<String, String> resultProps = result.properties();
+    assertNull(resultProps.get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS)));
+    assertNull(resultProps.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS)));
+  }
+
+  /**
+   * Verifies table creation with no snapshots (empty state). Tests that an empty table can be
+   * created successfully.
+   */
+  @Test
+  void testApplySnapshots_nullBaseEmptySnapshotsEmptyRefs_createsEmptyTable() {
+    // Provided: empty snapshots list, empty refs
+    TableMetadata newMetadata =
+        createMetadataWithSnapshots(baseMetadata, Collections.emptyList(), new HashMap<>());
+
+    TableMetadata result = snapshotDiffApplier.applySnapshots(null, newMetadata);
+
+    // Verify empty table created
+    assertNotNull(result);
+    assertEquals(0, result.snapshots().size());
+    assertNull(result.currentSnapshot());
+    assertTrue(result.refs().isEmpty() || !result.refs().containsKey(SnapshotRef.MAIN_BRANCH));
+
+    // Verify no snapshot operations tracked
+    Map<String, String> resultProps = result.properties();
+    assertNull(resultProps.get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS)));
+    assertNull(resultProps.get(getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS)));
+    assertNull(resultProps.get(getCanonicalFieldName(CatalogConstants.DELETED_SNAPSHOTS)));
+  }
+
+  /**
+   * Verifies adding both regular and staged snapshots in a single commit. Tests that snapshot
+   * categorization correctly handles mixed types.
+   */
+  @Test
+  void testApplySnapshots_addRegularAndStagedSimultaneously() throws IOException {
+    // Start from empty base (no existing snapshots)
+    // Simulate a commit that adds both regular and staged snapshots simultaneously
+
+    List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
+
+    // Create a custom WAP snapshot without hardcoded sequence number to avoid conflicts
+    // Build snapshot JSON manually and wrap it in a Gson array
+    String wapSnapshotJson =
+        String.format(
+            "{\"snapshot-id\":%d,\"timestamp-ms\":%d,\"summary\":%s,\"manifest-list\":\"%s\",\"schema-id\":%d}",
+            999940701710231339L,
+            1669126937912L,
+            new Gson()
+                .toJson(
+                    Map.of(
+                        "operation", "append",
+                        "wap.id", "test-wap",
+                        "spark.app.id", "local-1669126906634",
+                        "added-data-files", "1",
+                        "added-records", "1")),
+            "/data/test.avro",
+            0);
+    String wapSnapshotArrayJson = new Gson().toJson(List.of(wapSnapshotJson));
+    List<Snapshot> customWapSnapshots = SnapshotsUtil.parseSnapshots(null, wapSnapshotArrayJson);
+
+    List<Snapshot> allSnapshots = new ArrayList<>();
+    allSnapshots.add(extraSnapshots.get(0)); // New regular snapshot
+    allSnapshots.add(customWapSnapshots.get(0)); // New staged snapshot
+
+    // MAIN ref points to the new regular snapshot
+    Map<String, String> refs =
+        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(extraSnapshots.get(0));
+    TableMetadata newMetadata = createMetadataWithSnapshots(baseMetadata, allSnapshots, refs);
+
+    TableMetadata result = snapshotDiffApplier.applySnapshots(null, newMetadata);
+
+    // Verify both snapshots added
+    assertEquals(2, result.snapshots().size());
+
+    // Verify regular snapshot is on MAIN
+    assertNotNull(result.currentSnapshot());
+    assertEquals(extraSnapshots.get(0).snapshotId(), result.currentSnapshot().snapshotId());
+
+    // Verify tracking: regular appended, staged tracked separately
+    Map<String, String> resultProps = result.properties();
+    String appendedSnapshotsStr =
+        resultProps.get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS));
+    String stagedSnapshotsStr =
+        resultProps.get(getCanonicalFieldName(CatalogConstants.STAGED_SNAPSHOTS));
+
+    assertNotNull(appendedSnapshotsStr);
+    assertTrue(appendedSnapshotsStr.contains(Long.toString(extraSnapshots.get(0).snapshotId())));
+
+    assertNotNull(stagedSnapshotsStr);
+    assertTrue(stagedSnapshotsStr.contains(Long.toString(customWapSnapshots.get(0).snapshotId())));
+  }
+
+  /**
+   * Verifies cherry-picking a staged snapshot while adding a new snapshot in the same commit. Tests
+   * compound operation tracking.
+   */
+  @Test
+  void testApplySnapshots_cherryPickAndAddNewSimultaneously() throws IOException {
+    List<Snapshot> testWapSnapshots = IcebergTestUtil.getWapSnapshots();
+
+    // Base: MAIN snapshot + staged snapshot
+    TableMetadata base =
+        TableMetadata.buildFrom(baseMetadata)
+            .setBranchSnapshot(testWapSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+            .addSnapshot(testWapSnapshots.get(1)) // Staged snapshot
+            .build();
+
+    // Provided: existing + new snapshot becomes MAIN, staged is cherry-picked
+    List<Snapshot> allSnapshots = new ArrayList<>();
+    allSnapshots.add(testWapSnapshots.get(0));
+    allSnapshots.add(testWapSnapshots.get(1)); // Was staged, now cherry-picked
+    allSnapshots.add(testWapSnapshots.get(2)); // New snapshot
+
+    // MAIN ref points to new snapshot
+    Map<String, String> refs =
+        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(testWapSnapshots.get(2));
+    TableMetadata newMetadata = createMetadataWithSnapshots(base, allSnapshots, refs);
+
+    TableMetadata result = snapshotDiffApplier.applySnapshots(base, newMetadata);
+
+    // Verify new snapshot is on MAIN
+    assertNotNull(result.currentSnapshot());
+    assertEquals(testWapSnapshots.get(2).snapshotId(), result.currentSnapshot().snapshotId());
+
+    // Verify both operations tracked
+    Map<String, String> resultProps = result.properties();
+    String appendedSnapshotsStr =
+        resultProps.get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS));
+    String cherryPickedSnapshotsStr =
+        resultProps.get(getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS));
+
+    // New snapshot should be appended
+    assertNotNull(appendedSnapshotsStr);
+    assertTrue(appendedSnapshotsStr.contains(Long.toString(testWapSnapshots.get(2).snapshotId())));
+
+    // Staged snapshot should be cherry-picked
+    assertNotNull(cherryPickedSnapshotsStr);
+    assertTrue(
+        cherryPickedSnapshotsStr.contains(Long.toString(testWapSnapshots.get(1).snapshotId())));
+  }
+
+  /**
+   * Verifies that attempting to delete the current snapshot while unreferenced snapshots exist
+   * throws an exception. Tests current snapshot protection.
+   */
+  @Test
+  void testApplySnapshots_attemptDeleteCurrentWithUnreferencedPresent_throwsException()
+      throws IOException {
+    List<Snapshot> snapshots = IcebergTestUtil.getSnapshots();
+
+    // Base: MAIN snapshot + 2 unreferenced snapshots
+    TableMetadata base =
+        TableMetadata.buildFrom(baseMetadata)
+            .addSnapshot(snapshots.get(0)) // Unreferenced
+            .addSnapshot(snapshots.get(1)) // Unreferenced
+            .setBranchSnapshot(snapshots.get(2), SnapshotRef.MAIN_BRANCH) // Current snapshot
+            .build();
+
+    // Provided: only the 2 unreferenced (delete MAIN), no new snapshots
+    List<Snapshot> remainingSnapshots = snapshots.subList(0, 2);
+    TableMetadata newMetadata =
+        createMetadataWithSnapshots(base, remainingSnapshots, new HashMap<>());
+
+    // Should throw exception because current snapshot is being deleted without replacement
+    InvalidIcebergSnapshotException exception =
+        assertThrows(
+            InvalidIcebergSnapshotException.class,
+            () -> snapshotDiffApplier.applySnapshots(base, newMetadata));
+
+    assertTrue(exception.getMessage().contains("Cannot delete the current snapshot"));
+    assertTrue(exception.getMessage().contains(Long.toString(snapshots.get(2).snapshotId())));
+  }
+
+  /**
+   * Verifies adding regular (non-WAP) snapshots with empty refs. historically, such snapshots were
+   * automatically added to MAIN branch and tracked as APPENDED_SNAPSHOTS. This test validates
+   * backward compatibility with that behavior. NOTE: The semantics here are questionable -
+   * snapshots with no refs should arguably not be "appended" to MAIN, but this preserves the
+   * original behavior.
+   */
+  @Test
+  void testApplySnapshots_regularSnapshotsWithEmptyRefs_autoAppendedToMain() throws IOException {
+    List<Snapshot> baseSnapshots = IcebergTestUtil.getSnapshots();
+    TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, baseSnapshots);
+
+    // Provided: existing + new snapshots, but empty refs map (no MAIN branch)
+    List<Snapshot> extraSnapshots = IcebergTestUtil.getExtraSnapshots();
+    List<Snapshot> allSnapshots = new ArrayList<>(baseSnapshots);
+    allSnapshots.addAll(extraSnapshots);
+
+    // Empty refs - no MAIN branch
+    TableMetadata newMetadata =
+        createMetadataWithSnapshots(baseWithSnapshots, allSnapshots, new HashMap<>());
+
+    TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
+
+    // Verify new snapshots added
+    assertEquals(allSnapshots.size(), result.snapshots().size());
+
+    // Verify MAIN branch points to the latest snapshot (auto-appended to main)
+    assertNotNull(result.ref(SnapshotRef.MAIN_BRANCH));
+    assertEquals(
+        allSnapshots.get(allSnapshots.size() - 1).snapshotId(),
+        result.ref(SnapshotRef.MAIN_BRANCH).snapshotId());
+
+    // Verify new snapshots tracked as appended (even though unreferenced, they're not staged WAP)
+    Map<String, String> resultProps = result.properties();
+    String appendedSnapshotsStr =
+        resultProps.get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS));
+
+    assertNotNull(appendedSnapshotsStr);
+    for (Snapshot extraSnapshot : extraSnapshots) {
+      assertTrue(
+          appendedSnapshotsStr.contains(Long.toString(extraSnapshot.snapshotId())),
+          "Snapshot " + extraSnapshot.snapshotId() + " should be tracked as appended");
+    }
+  }
 }

From b51d5fbb4208a963a2a3706825a7c8aa24fcf9f9 Mon Sep 17 00:00:00 2001
From: cbb330 <chrisbush747@gmail.com>
Date: Sun, 9 Nov 2025 20:47:10 -0800
Subject: [PATCH 35/35] responding to comments, adding test

---
 .../internal/catalog/SnapshotDiffApplier.java |  56 +++++-----
 .../internal/catalog/IcebergTestUtil.java     |   5 +-
 .../OpenHouseInternalTableOperationsTest.java |  34 +++---
 .../catalog/SnapshotDiffApplierTest.java      | 102 ++++++++++++++++--
 4 files changed, 145 insertions(+), 52 deletions(-)

diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
index 08db88ba1..345f811ef 100644
--- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
+++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplier.java
@@ -172,7 +172,10 @@ static SnapshotDiff create(
       // Categorize snapshots
       List<Snapshot> newStagedSnapshots =
           newSnapshots.stream()
-              .filter(s -> s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
+              .filter(
+                  s ->
+                      s.summary() != null
+                          && s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
 
       // Compute source IDs for cherry-pick operations
@@ -220,7 +223,10 @@ static SnapshotDiff create(
       // (includes both regular commits and cherry-pick result snapshots)
       List<Snapshot> newMainBranchSnapshots =
           newSnapshots.stream()
-              .filter(s -> !s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
+              .filter(
+                  s ->
+                      s.summary() == null
+                          || !s.summary().containsKey(SnapshotSummary.STAGED_WAP_ID_PROP))
               .collect(Collectors.toList());
 
       // Compute appended count
@@ -373,33 +379,33 @@ TableMetadata apply() {
        *
        * <p>[1] Staged (WAP) snapshots - added without branch reference
        *
-       * <p>[2] New main branch snapshots - added without branch reference (branch pointer set
-       * below)
+       * <p>[2] New main branch snapshots - added and moved to MAIN branch incrementally
+       *
+       * <p>[3] Cherry-picked snapshots - existing snapshots, final branch pointer set below
        *
-       * <p>[3] Cherry-picked snapshots - existing snapshots, branch pointer set below
+       * <p>We trust the client-provided order rather than sorting. Sequence numbers are
+       * monotonically increasing along a branch's lineage (following parent pointers) for both
+       * cherry-pick result snapshots and fast-forward snapshots. Iceberg's setBranchSnapshot()
+       * validates sequence numbers, so we can rely on its built-in validation.
        */
-      // Add staged snapshots in sequence number order (ensures correct commit ordering)
-      this.newStagedSnapshots.stream()
-          .sorted(java.util.Comparator.comparingLong(Snapshot::sequenceNumber))
-          .forEach(metadataBuilder::addSnapshot);
-
-      // Add new main branch snapshots in sequence number order (ensures correct commit ordering)
-      List<Snapshot> sortedMainBranchSnapshots =
-          this.newMainBranchSnapshots.stream()
-              .sorted(java.util.Comparator.comparingLong(Snapshot::sequenceNumber))
-              .collect(Collectors.toList());
-      sortedMainBranchSnapshots.forEach(metadataBuilder::addSnapshot);
+      // Add staged snapshots in client-provided order
+      this.newStagedSnapshots.forEach(metadataBuilder::addSnapshot);
+
+      // Add new main branch snapshots and move MAIN pointer incrementally
+      // This works for both:
+      // - Regular commits: newly created snapshots
+      // - Cherry-pick results: newly created snapshots with SOURCE_SNAPSHOT_ID_PROP
+      for (Snapshot snapshot : this.newMainBranchSnapshots) {
+        metadataBuilder.setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH);
+      }
 
-      // Set branch pointer once using providedRefs (covers both new snapshots and cherry-pick)
-      if (!this.providedRefs.isEmpty()) {
-        long newSnapshotId = this.providedRefs.get(SnapshotRef.MAIN_BRANCH).snapshotId();
+      // Set final branch pointer using providedRefs if present
+      // This handles fast-forward for cherry-pick/WAP publish where we're moving the branch
+      // to an existing snapshot
+      SnapshotRef mainBranchRef = this.providedRefs.get(SnapshotRef.MAIN_BRANCH);
+      if (mainBranchRef != null) {
+        long newSnapshotId = mainBranchRef.snapshotId();
         metadataBuilder.setBranchSnapshot(newSnapshotId, SnapshotRef.MAIN_BRANCH);
-      } else if (!sortedMainBranchSnapshots.isEmpty()) {
-        // Auto-append to main: if no refs provided but there are new main branch snapshots,
-        // set MAIN to the last snapshot (latest by sequence number due to sort above)
-        Snapshot latestSnapshot =
-            sortedMainBranchSnapshots.get(sortedMainBranchSnapshots.size() - 1);
-        metadataBuilder.setBranchSnapshot(latestSnapshot.snapshotId(), SnapshotRef.MAIN_BRANCH);
       }
 
       // Delete snapshots
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java
index d4fd6efaa..cdedb3e93 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java
@@ -44,15 +44,14 @@ private static List<Snapshot> loadSnapshots(String snapshotFile) throws IOExcept
     return SnapshotsUtil.parseSnapshots(null, data);
   }
 
-  public static Map<String, String> obtainSnapshotRefsFromSnapshot(Snapshot snapshot) {
+  public static Map<String, String> createMainBranchRefPointingTo(Snapshot snapshot) {
     Map<String, String> snapshotRefs = new HashMap<>();
     SnapshotRef snapshotRef = SnapshotRef.branchBuilder(snapshot.snapshotId()).build();
     snapshotRefs.put(SnapshotRef.MAIN_BRANCH, SnapshotRefParser.toJson(snapshotRef));
     return snapshotRefs;
   }
 
-  public static Map<String, String> obtainSnapshotRefsFromSnapshot(
-      Snapshot snapshot, String branch) {
+  public static Map<String, String> createBranchRefPointingTo(Snapshot snapshot, String branch) {
     Map<String, String> snapshotRefs = new HashMap<>();
     SnapshotRef snapshotRef = SnapshotRef.branchBuilder(snapshot.snapshotId()).build();
     snapshotRefs.put(branch, SnapshotRefParser.toJson(snapshotRef));
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
index 476435a61..69b4027b9 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java
@@ -148,7 +148,7 @@ void testDoCommitAppendSnapshotsInitialVersion() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+              IcebergTestUtil.createMainBranchRefPointingTo(
                   testSnapshots.get(testSnapshots.size() - 1))));
 
       TableMetadata metadata = BASE_TABLE_METADATA.replaceProperties(properties);
@@ -193,7 +193,7 @@ void testDoCommitAppendSnapshotsExistingVersion() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+              IcebergTestUtil.createMainBranchRefPointingTo(
                   testSnapshots.get(testSnapshots.size() - 1))));
       properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
 
@@ -248,7 +248,7 @@ void testDoCommitAppendAndDeleteSnapshots() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+              IcebergTestUtil.createMainBranchRefPointingTo(
                   newSnapshots.get(newSnapshots.size() - 1))));
       properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
 
@@ -438,7 +438,7 @@ void testDoCommitDeleteSnapshots() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+              IcebergTestUtil.createMainBranchRefPointingTo(
                   testSnapshots.get(testSnapshots.size() - 1))));
       properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
 
@@ -640,7 +640,7 @@ void testDoCommitSnapshotsValidationThrowsException() throws IOException {
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
         SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+            IcebergTestUtil.createMainBranchRefPointingTo(
                 testSnapshots.get(1)))); // But main refs snapshot 1
     properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
     metadata = metadata.replaceProperties(properties);
@@ -727,7 +727,7 @@ void testDoCommitAppendStageOnlySnapshotsExistingVersion() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(newSnapshots.get(0))));
+              IcebergTestUtil.createMainBranchRefPointingTo(newSnapshots.get(0))));
       properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
 
       TableMetadata metadata = base.replaceProperties(properties);
@@ -771,7 +771,7 @@ void testAppendSnapshotsWithOldSnapshots() throws IOException {
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
         SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
+            IcebergTestUtil.createMainBranchRefPointingTo(snapshots.get(snapshots.size() - 1))));
 
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
@@ -790,7 +790,7 @@ void testAppendSnapshotsWithOldSnapshots() throws IOException {
     propertiesWithFuture.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
         SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1))));
+            IcebergTestUtil.createMainBranchRefPointingTo(snapshots.get(snapshots.size() - 1))));
 
     TableMetadata newMetadataWithFuture = baseMetadata.replaceProperties(propertiesWithFuture);
     openHouseInternalTableOperations.snapshotDiffApplier.applySnapshots(
@@ -823,7 +823,7 @@ void testDoCommitCherryPickSnapshotBaseUnchanged() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(testWapSnapshots.get(0))));
+              IcebergTestUtil.createMainBranchRefPointingTo(testWapSnapshots.get(0))));
       properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
 
       TableMetadata metadata = base.replaceProperties(properties);
@@ -864,7 +864,7 @@ void testDoCommitCherryPickSnapshotBaseChanged() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(
+              IcebergTestUtil.createMainBranchRefPointingTo(
                   testWapSnapshots.get(2)))); // new snapshot
       properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
 
@@ -905,7 +905,7 @@ void testDoCommitCherryPickFirstSnapshot() throws IOException {
       properties.put(
           CatalogConstants.SNAPSHOTS_REFS_KEY,
           SnapshotsUtil.serializeMap(
-              IcebergTestUtil.obtainSnapshotRefsFromSnapshot(testWapSnapshots.get(0))));
+              IcebergTestUtil.createMainBranchRefPointingTo(testWapSnapshots.get(0))));
       properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION);
 
       TableMetadata metadata = base.replaceProperties(properties);
@@ -1475,7 +1475,7 @@ void testDeleteSnapshotWithNoReference() throws IOException {
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
         SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(referencedSnapshot)));
+            IcebergTestUtil.createMainBranchRefPointingTo(referencedSnapshot)));
 
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
@@ -1537,7 +1537,7 @@ void testDeleteEmptySnapshotList() throws IOException {
         SnapshotsUtil.serializedSnapshots(baseMetadata.snapshots()));
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+        SnapshotsUtil.serializeMap(IcebergTestUtil.createMainBranchRefPointingTo(lastSnapshot)));
 
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
@@ -1583,7 +1583,7 @@ void testDeleteNullSnapshotList() throws IOException {
         SnapshotsUtil.serializedSnapshots(baseMetadata.snapshots()));
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+        SnapshotsUtil.serializeMap(IcebergTestUtil.createMainBranchRefPointingTo(lastSnapshot)));
 
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
@@ -1633,7 +1633,7 @@ void testDeleteNonExistentSnapshot() throws IOException {
         SnapshotsUtil.serializedSnapshots(baseMetadata.snapshots()));
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+        SnapshotsUtil.serializeMap(IcebergTestUtil.createMainBranchRefPointingTo(lastSnapshot)));
 
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
@@ -1721,7 +1721,7 @@ void testDeleteSnapshotMetricsRecordedBranch() throws IOException {
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
         SnapshotsUtil.serializeMap(
-            IcebergTestUtil.obtainSnapshotRefsFromSnapshot(referencedSnapshot)));
+            IcebergTestUtil.createMainBranchRefPointingTo(referencedSnapshot)));
 
     TableMetadata newMetadata = baseMetadata.replaceProperties(properties);
 
@@ -1768,7 +1768,7 @@ void testDeleteSnapshotMetricsRecordedNonExistent() throws IOException {
         SnapshotsUtil.serializedSnapshots(finalBaseMetadata.snapshots()));
     properties.put(
         CatalogConstants.SNAPSHOTS_REFS_KEY,
-        SnapshotsUtil.serializeMap(IcebergTestUtil.obtainSnapshotRefsFromSnapshot(lastSnapshot)));
+        SnapshotsUtil.serializeMap(IcebergTestUtil.createMainBranchRefPointingTo(lastSnapshot)));
 
     TableMetadata newMetadata = finalBaseMetadata.replaceProperties(properties);
 
diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
index 5a7bec3d7..a1319475d 100644
--- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
+++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/SnapshotDiffApplierTest.java
@@ -96,7 +96,7 @@ private TableMetadata createMetadataWithSnapshots(
   private TableMetadata createMetadataWithSnapshotsAndMainRef(
       TableMetadata base, List<Snapshot> snapshots) {
     Map<String, String> refs =
-        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(snapshots.get(snapshots.size() - 1));
+        IcebergTestUtil.createMainBranchRefPointingTo(snapshots.get(snapshots.size() - 1));
     return createMetadataWithSnapshots(base, snapshots, refs);
   }
 
@@ -184,7 +184,7 @@ void testApplySnapshots_mainBranchUpdates_success() throws IOException {
     TableMetadata baseWithSnapshots = addSnapshotsToMetadata(baseMetadata, snapshots);
 
     Snapshot newBranchTarget = snapshots.get(1);
-    Map<String, String> refs = IcebergTestUtil.obtainSnapshotRefsFromSnapshot(newBranchTarget);
+    Map<String, String> refs = IcebergTestUtil.createMainBranchRefPointingTo(newBranchTarget);
     TableMetadata newMetadata = createMetadataWithSnapshots(baseWithSnapshots, snapshots, refs);
 
     TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
@@ -288,7 +288,7 @@ void testMetrics_addStagedSnapshots_recordsStagedCounter() throws IOException {
     allSnapshots.addAll(wapSnapshots);
 
     Map<String, String> refs =
-        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(baseSnapshots.get(baseSnapshots.size() - 1));
+        IcebergTestUtil.createMainBranchRefPointingTo(baseSnapshots.get(baseSnapshots.size() - 1));
     TableMetadata newMetadata = createMetadataWithSnapshots(baseWithSnapshots, allSnapshots, refs);
 
     TableMetadata result = snapshotDiffApplier.applySnapshots(baseWithSnapshots, newMetadata);
@@ -614,7 +614,7 @@ void testApplySnapshots_baseWithUnreferencedSnapshotsOnly_addFirstMainBranch()
 
     // Provided: same snapshots + MAIN ref to one of them
     Snapshot mainSnapshot = snapshots.get(2);
-    Map<String, String> refs = IcebergTestUtil.obtainSnapshotRefsFromSnapshot(mainSnapshot);
+    Map<String, String> refs = IcebergTestUtil.createMainBranchRefPointingTo(mainSnapshot);
     TableMetadata newMetadata = createMetadataWithSnapshots(base, snapshots, refs);
 
     TableMetadata result = snapshotDiffApplier.applySnapshots(base, newMetadata);
@@ -691,8 +691,7 @@ void testApplySnapshots_addRegularAndStagedSimultaneously() throws IOException {
     allSnapshots.add(customWapSnapshots.get(0)); // New staged snapshot
 
     // MAIN ref points to the new regular snapshot
-    Map<String, String> refs =
-        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(extraSnapshots.get(0));
+    Map<String, String> refs = IcebergTestUtil.createMainBranchRefPointingTo(extraSnapshots.get(0));
     TableMetadata newMetadata = createMetadataWithSnapshots(baseMetadata, allSnapshots, refs);
 
     TableMetadata result = snapshotDiffApplier.applySnapshots(null, newMetadata);
@@ -741,7 +740,7 @@ void testApplySnapshots_cherryPickAndAddNewSimultaneously() throws IOException {
 
     // MAIN ref points to new snapshot
     Map<String, String> refs =
-        IcebergTestUtil.obtainSnapshotRefsFromSnapshot(testWapSnapshots.get(2));
+        IcebergTestUtil.createMainBranchRefPointingTo(testWapSnapshots.get(2));
     TableMetadata newMetadata = createMetadataWithSnapshots(base, allSnapshots, refs);
 
     TableMetadata result = snapshotDiffApplier.applySnapshots(base, newMetadata);
@@ -843,4 +842,93 @@ void testApplySnapshots_regularSnapshotsWithEmptyRefs_autoAppendedToMain() throw
           "Snapshot " + extraSnapshot.snapshotId() + " should be tracked as appended");
     }
   }
+
+  /**
+   * Verifies cherry-picking multiple staged snapshots in sequence, testing both fast-forward and
+   * rebase scenarios. wap1 and wap2 both have the same parent. Cherry-picking wap1 first is a
+   * fast-forward (no new snapshot). Cherry-picking wap2 after main has moved requires a rebase (new
+   * snapshot created).
+   */
+  @Test
+  void testApplySnapshots_cherryPickMultipleStagedSnapshotsOutOfOrder() throws IOException {
+    List<Snapshot> testSnapshots = IcebergTestUtil.getSnapshots();
+    List<Snapshot> testWapSnapshots = IcebergTestUtil.getWapSnapshots();
+
+    // Setup: MAIN snapshot + 2 staged WAP snapshots (wap1, wap2)
+    TableMetadata base =
+        TableMetadata.buildFrom(baseMetadata)
+            .setBranchSnapshot(testSnapshots.get(0), SnapshotRef.MAIN_BRANCH)
+            .addSnapshot(testWapSnapshots.get(0)) // wap1 (wap.id="wap1")
+            .addSnapshot(testWapSnapshots.get(1)) // wap2 (wap.id="wap2")
+            .build();
+
+    // Step 1: Fast-forward cherry-pick wap1
+    // wap1's parent == current main, so it's promoted directly (no new snapshot)
+    List<Snapshot> allSnapshots1 = new ArrayList<>();
+    allSnapshots1.add(testSnapshots.get(0));
+    allSnapshots1.add(testWapSnapshots.get(0)); // wap1 now on main
+    allSnapshots1.add(testWapSnapshots.get(1)); // wap2 still staged
+
+    // Set MAIN branch to point to wap1
+    Map<String, String> refs1 =
+        IcebergTestUtil.createMainBranchRefPointingTo(testWapSnapshots.get(0));
+    TableMetadata newMetadata1 = createMetadataWithSnapshots(base, allSnapshots1, refs1);
+
+    TableMetadata result1 = snapshotDiffApplier.applySnapshots(base, newMetadata1);
+
+    // Verify fast-forward: only cherry_picked tracked, no new snapshot appended
+    assertNotNull(result1.currentSnapshot());
+    assertEquals(testWapSnapshots.get(0).snapshotId(), result1.currentSnapshot().snapshotId());
+
+    Map<String, String> resultProps1 = result1.properties();
+    String cherryPickedSnapshots1 =
+        resultProps1.get(getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS));
+    assertNotNull(cherryPickedSnapshots1);
+    assertTrue(
+        cherryPickedSnapshots1.contains(Long.toString(testWapSnapshots.get(0).snapshotId())),
+        "wap1 should be tracked as cherry-picked");
+    assertNull(
+        resultProps1.get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS)),
+        "No new snapshot for fast-forward");
+
+    // Step 2: Rebase cherry-pick wap2
+    // wap2's parent != current main (which is now wap1), so a new snapshot is created
+    // New snapshot has: parent=wap1, source-snapshot-id=wap2, published.wap.id="wap2"
+    List<Snapshot> allSnapshots2 = new ArrayList<>();
+    allSnapshots2.add(testSnapshots.get(0));
+    allSnapshots2.add(testWapSnapshots.get(0)); // wap1
+    allSnapshots2.add(testWapSnapshots.get(1)); // wap2 (source)
+    allSnapshots2.add(testWapSnapshots.get(2)); // New rebased snapshot
+
+    Map<String, String> refs2 =
+        IcebergTestUtil.createMainBranchRefPointingTo(testWapSnapshots.get(2));
+    TableMetadata newMetadata2 = createMetadataWithSnapshots(result1, allSnapshots2, refs2);
+
+    TableMetadata result2 = snapshotDiffApplier.applySnapshots(result1, newMetadata2);
+
+    // Verify rebase: both cherry_picked (source) and appended (new snapshot) tracked
+    assertNotNull(result2.currentSnapshot());
+    assertEquals(testWapSnapshots.get(2).snapshotId(), result2.currentSnapshot().snapshotId());
+
+    Map<String, String> resultProps2 = result2.properties();
+
+    String cherryPickedSnapshots2 =
+        resultProps2.get(getCanonicalFieldName(CatalogConstants.CHERRY_PICKED_SNAPSHOTS));
+    assertNotNull(cherryPickedSnapshots2);
+    assertTrue(
+        cherryPickedSnapshots2.contains(Long.toString(testWapSnapshots.get(1).snapshotId())),
+        "wap2 should be tracked as cherry-picked (source)");
+
+    String appendedSnapshots2 =
+        resultProps2.get(getCanonicalFieldName(CatalogConstants.APPENDED_SNAPSHOTS));
+    assertNotNull(appendedSnapshots2);
+    assertTrue(
+        appendedSnapshots2.contains(Long.toString(testWapSnapshots.get(2).snapshotId())),
+        "New rebased snapshot should be tracked as appended");
+
+    // Verify all 4 snapshots present
+    assertEquals(4, result2.snapshots().size());
+    verify(mockMetricsReporter, atLeastOnce())
+        .count(eq(InternalCatalogMetricsConstant.SNAPSHOTS_CHERRY_PICKED_CTR), anyDouble());
+  }
 }