Add input validation, pre-fit warning, and docs fixes from AI review round 2

igerber · claude · igerber · commit c62d3bcc6b78 · 2026-02-10T18:18:12.000-05:00
Address 5 issues from PR #145 AI review: - P0: Validate treatment is constant within unit (reject staggered designs) - P1: Enforce balanced panel (all units must have all periods) - P1: Warn when pre-treatment fit RMSE exceeds treated outcome SD - P1: Fix Registry FW iteration count (1000 → 10000, matching R/code) - P2: Fix misleading placebo docstring (weights use fresh start, not warm start) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py
@@ -267,6 +267,24 @@ def fit(  # type: ignore[override]
         # Identify treated and control units
         # Treatment indicator should be constant within unit
         unit_treatment = data.groupby(unit)[treatment].first()
+
+        # Validate treatment is constant within unit (SDID requires block treatment)
+        treatment_nunique = data.groupby(unit)[treatment].nunique()
+        varying_units = treatment_nunique[treatment_nunique > 1]
+        if len(varying_units) > 0:
+            example_unit = varying_units.index[0]
+            example_vals = sorted(
+                data.loc[data[unit] == example_unit, treatment].unique()
+            )
+            raise ValueError(
+                f"Treatment indicator varies within {len(varying_units)} unit(s) "
+                f"(e.g., unit '{example_unit}' has values {example_vals}). "
+                f"SyntheticDiD requires 'block' treatment where treatment is "
+                f"constant within each unit across all time periods. "
+                f"For staggered adoption designs, use CallawaySantAnna or "
+                f"ImputationDiD instead."
+            )
+
         treated_units = unit_treatment[unit_treatment == 1].index.tolist()
         control_units = unit_treatment[unit_treatment == 0].index.tolist()
 
@@ -275,6 +293,21 @@ def fit(  # type: ignore[override]
         if len(control_units) == 0:
             raise ValueError("No control units found")
 
+        # Validate balanced panel (SDID requires all units observed in all periods)
+        periods_per_unit = data.groupby(unit)[time].nunique()
+        expected_n_periods = len(all_periods)
+        unbalanced_units = periods_per_unit[periods_per_unit != expected_n_periods]
+        if len(unbalanced_units) > 0:
+            example_unit = unbalanced_units.index[0]
+            actual_count = unbalanced_units.iloc[0]
+            raise ValueError(
+                f"Panel is not balanced: {len(unbalanced_units)} unit(s) do not "
+                f"have observations in all {expected_n_periods} periods "
+                f"(e.g., unit '{example_unit}' has {actual_count} periods). "
+                f"SyntheticDiD requires a balanced panel. Use "
+                f"diff_diff.prep.balance_panel() to balance the panel first."
+            )
+
         # Residualize covariates if provided
         working_data = data.copy()
         if covariates:
@@ -338,6 +371,22 @@ def fit(  # type: ignore[override]
         synthetic_pre = Y_pre_control @ unit_weights
         pre_fit_rmse = np.sqrt(np.mean((Y_pre_treated_mean - synthetic_pre) ** 2))
 
+        # Warn if pre-treatment fit is poor (Registry requirement).
+        # Threshold: 1× SD of treated pre-treatment outcomes — a natural baseline
+        # since RMSE exceeding natural variation indicates the synthetic control
+        # fails to reproduce the treated series' level or trend.
+        pre_treatment_sd = np.std(Y_pre_treated_mean, ddof=1) if len(Y_pre_treated_mean) > 1 else 0.0
+        if pre_treatment_sd > 0 and pre_fit_rmse > pre_treatment_sd:
+            warnings.warn(
+                f"Pre-treatment fit is poor: RMSE ({pre_fit_rmse:.4f}) exceeds "
+                f"the standard deviation of treated pre-treatment outcomes "
+                f"({pre_treatment_sd:.4f}). The synthetic control may not "
+                f"adequately reproduce treated unit trends. Consider adding "
+                f"more control units or adjusting regularization.",
+                UserWarning,
+                stacklevel=2,
+            )
+
         # Compute standard errors based on variance_method
         if self.variance_method == "bootstrap":
             se, bootstrap_estimates = self._bootstrap_se(
@@ -814,7 +863,7 @@ def _placebo_variance_se(
 
                 # Re-estimate weights on permuted data (matching R's behavior)
                 # R passes update.omega=TRUE, update.lambda=TRUE via opts,
-                # using original weights as starting points for FW optimization.
+                # re-estimating weights from uniform initialization (fresh start).
                 # Unit weights: re-estimate on pseudo-control/pseudo-treated data
                 pseudo_omega = compute_sdid_unit_weights(
                     Y_pre_pseudo_control,
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -587,7 +587,7 @@ where A = Y_unit[:, :N_co], b = Y_unit[:, N_co], and centering is column-wise (i
 **Two-pass sparsification procedure** (matches R's `synthdid::sc.weight.fw` + `sparsify_function`):
 1. First pass: Run Frank-Wolfe for 100 iterations (max_iter_pre_sparsify) from uniform initialization
 2. Sparsify: `v[v <= max(v)/4] = 0; v = v / sum(v)` (zero out small weights, renormalize)
-3. Second pass: Run Frank-Wolfe for 1000 iterations (max_iter) starting from sparsified weights
+3. Second pass: Run Frank-Wolfe for 10000 iterations (max_iter) starting from sparsified weights
 
 The sparsification step concentrates weights on the most important control units, improving interpretability and stability.
 
@@ -659,13 +659,16 @@ Convergence criterion: stop when objective decrease < min_decrease² (default mi
 - **Noise level with < 2 pre-periods**: Returns 0.0, which makes both zeta_omega and zeta_lambda equal to 0.0 (no regularization).
 - **NaN inference for undefined statistics**: t_stat uses NaN when SE is zero or non-finite; p_value and CI also NaN. Matches CallawaySantAnna NaN convention.
 - **Placebo p-value floor**: `p_value = max(empirical_p, 1/(n_replications + 1))` to avoid reporting exactly zero.
+- **Varying treatment within unit**: Raises `ValueError`. SDID requires block treatment (constant within each unit). Suggests CallawaySantAnna or ImputationDiD for staggered adoption.
+- **Unbalanced panel**: Raises `ValueError`. SDID requires all units observed in all periods. Suggests `balance_panel()`.
+- **Poor pre-treatment fit**: Warns (`UserWarning`) when `pre_fit_rmse > std(treated_pre_outcomes, ddof=1)`. Diagnostic only; estimation proceeds.
 
 **Reference implementation(s):**
 - R: `synthdid::synthdid_estimate()` (Arkhangelsky et al.'s official package)
 - Key R functions matched: `sc.weight.fw()` (Frank-Wolfe), `sparsify_function` (sparsification), `vcov.synthdid_estimate()` (variance)
 
 **Requirements checklist:**
-- [x] Unit weights: Frank-Wolfe on collapsed form (T_pre, N_co+1), two-pass sparsification (100 iters -> sparsify -> 1000 iters)
+- [x] Unit weights: Frank-Wolfe on collapsed form (T_pre, N_co+1), two-pass sparsification (100 iters -> sparsify -> 10000 iters)
 - [x] Time weights: Frank-Wolfe on collapsed form (N_co, T_pre+1), last column = per-control post mean
 - [x] Unit and time weights: sum to 1, non-negative (simplex constraint)
 - [x] Auto-regularization: noise_level = sd(first_diffs), zeta_omega = (N1*T1)^0.25 * noise_level, zeta_lambda = 1e-6 * noise_level
diff --git a/tests/test_methodology_sdid.py b/tests/test_methodology_sdid.py
@@ -11,6 +11,8 @@
 import numpy as np
 import pytest
 
+import pandas as pd
+
 from diff_diff.synthetic_did import SyntheticDiD
 from diff_diff.utils import (
     _compute_noise_level,
@@ -730,3 +732,164 @@ def test_placebo_reestimates_weights_not_fixed(self):
                 f"({fixed_se:.6f}), suggesting weights are NOT being "
                 f"re-estimated as R's synthdid does."
             )
+
+
+# =============================================================================
+# Treatment Validation
+# =============================================================================
+
+
+class TestTreatmentValidation:
+    """Test that SDID rejects time-varying treatment (staggered designs)."""
+
+    def test_varying_treatment_within_unit_raises(self):
+        """Unit whose treatment switches over time should raise ValueError."""
+        np.random.seed(42)
+        data = pd.DataFrame({
+            "unit": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3],
+            "time": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
+            "outcome": np.random.randn(12),
+            # Unit 1: treatment turns on at time 3 (staggered)
+            "treated": [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+        })
+        sdid = SyntheticDiD()
+        with pytest.raises(ValueError, match="Treatment indicator varies within"):
+            sdid.fit(
+                data, outcome="outcome", treatment="treated",
+                unit="unit", time="time", post_periods=[3, 4],
+            )
+
+    def test_constant_treatment_passes(self):
+        """Normal block-treatment data should pass validation."""
+        np.random.seed(42)
+        n_units, n_periods = 10, 8
+        rows = []
+        for u in range(n_units):
+            is_treated = 1 if u < 3 else 0
+            for t in range(n_periods):
+                rows.append({
+                    "unit": u, "time": t,
+                    "outcome": np.random.randn() + (2.0 if is_treated and t >= 5 else 0),
+                    "treated": is_treated,
+                })
+        data = pd.DataFrame(rows)
+        sdid = SyntheticDiD()
+        result = sdid.fit(
+            data, outcome="outcome", treatment="treated",
+            unit="unit", time="time", post_periods=[5, 6, 7],
+        )
+        assert result is not None
+
+
+# =============================================================================
+# Balanced Panel Validation
+# =============================================================================
+
+
+class TestBalancedPanelValidation:
+    """Test that SDID rejects unbalanced panels."""
+
+    def test_unbalanced_panel_raises(self):
+        """Unit missing a period should raise ValueError."""
+        np.random.seed(42)
+        rows = []
+        for u in range(6):
+            is_treated = 1 if u < 2 else 0
+            for t in range(5):
+                rows.append({
+                    "unit": u, "time": t,
+                    "outcome": np.random.randn(),
+                    "treated": is_treated,
+                })
+        data = pd.DataFrame(rows)
+        # Drop one observation to make panel unbalanced
+        data = data[~((data["unit"] == 3) & (data["time"] == 2))].reset_index(drop=True)
+
+        sdid = SyntheticDiD()
+        with pytest.raises(ValueError, match="Panel is not balanced"):
+            sdid.fit(
+                data, outcome="outcome", treatment="treated",
+                unit="unit", time="time", post_periods=[3, 4],
+            )
+
+    def test_balanced_panel_passes(self):
+        """Fully balanced panel should pass validation."""
+        np.random.seed(42)
+        rows = []
+        for u in range(8):
+            is_treated = 1 if u < 2 else 0
+            for t in range(6):
+                rows.append({
+                    "unit": u, "time": t,
+                    "outcome": np.random.randn() + (1.5 if is_treated and t >= 4 else 0),
+                    "treated": is_treated,
+                })
+        data = pd.DataFrame(rows)
+        sdid = SyntheticDiD()
+        result = sdid.fit(
+            data, outcome="outcome", treatment="treated",
+            unit="unit", time="time", post_periods=[4, 5],
+        )
+        assert result is not None
+
+
+# =============================================================================
+# Pre-treatment Fit Warning
+# =============================================================================
+
+
+class TestPreTreatmentFitWarning:
+    """Test that poor pre-treatment fit emits a warning."""
+
+    def test_poor_fit_emits_warning(self):
+        """Treated units at very different level from controls should warn."""
+        np.random.seed(42)
+        rows = []
+        for u in range(10):
+            is_treated = 1 if u < 2 else 0
+            # Large level difference: treated ~100, control ~10
+            level = 100.0 if is_treated else 10.0
+            for t in range(8):
+                rows.append({
+                    "unit": u, "time": t,
+                    "outcome": level + np.random.randn() * 0.5,
+                    "treated": is_treated,
+                })
+        data = pd.DataFrame(rows)
+        sdid = SyntheticDiD()
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            sdid.fit(
+                data, outcome="outcome", treatment="treated",
+                unit="unit", time="time", post_periods=[6, 7],
+            )
+            fit_warnings = [x for x in w if "Pre-treatment fit is poor" in str(x.message)]
+            assert len(fit_warnings) >= 1, (
+                "Expected warning about poor pre-treatment fit but none was raised"
+            )
+
+    def test_good_fit_no_warning(self):
+        """Parallel trends data with similar levels should not warn."""
+        np.random.seed(42)
+        rows = []
+        for u in range(10):
+            is_treated = 1 if u < 3 else 0
+            for t in range(8):
+                # Same level, parallel trends, treatment effect only in post
+                rows.append({
+                    "unit": u, "time": t,
+                    "outcome": t + np.random.randn() * 0.3 + (2.0 if is_treated and t >= 5 else 0),
+                    "treated": is_treated,
+                })
+        data = pd.DataFrame(rows)
+        sdid = SyntheticDiD()
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            sdid.fit(
+                data, outcome="outcome", treatment="treated",
+                unit="unit", time="time", post_periods=[5, 6, 7],
+            )
+            fit_warnings = [x for x in w if "Pre-treatment fit is poor" in str(x.message)]
+            assert len(fit_warnings) == 0, (
+                f"Unexpected pre-treatment fit warning: {fit_warnings[0].message}"
+            )