Warn when TripleDifference power params don't match fixed 2×2×2 DGP

igerber · claude · igerber · commit 0e912bbed172 · 2026-03-18T16:53:49.000-04:00
Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/TODO.md b/TODO.md
@@ -47,6 +47,7 @@ Deferred items from PR reviews that were not addressed before merge.
 | Bootstrap NaN-gating gap: manual SE/CI/p-value without non-finite filtering or SE<=0 guard | `imputation_bootstrap.py`, `two_stage_bootstrap.py` | #177 | Medium — migrate to `compute_effect_bootstrap_stats` from `bootstrap_utils.py` |
 | EfficientDiD: warn when cohort share is very small (< 2 units or < 1% of sample) — inverted in Omega*/EIF | `efficient_did_weights.py` | #192 | Low |
 | EfficientDiD: API docs / tutorial page for new public estimator | `docs/` | #192 | Medium |
+| TripleDifference power: `generate_ddd_data` is a fixed 2×2×2 cross-sectional DGP — no multi-period or unbalanced-group support. Add a `generate_ddd_panel_data` for panel DDD power analysis. | `prep_dgp.py`, `power.py` | #208 | Low |
 
 #### Performance
 
diff --git a/diff_diff/power.py b/diff_diff/power.py
@@ -310,6 +310,57 @@ def _check_staggered_dgp_compat(
         warnings.warn(msg, UserWarning, stacklevel=2)
 
 
+def _check_ddd_dgp_compat(
+    n_units: int,
+    n_periods: int,
+    treatment_fraction: float,
+    treatment_period: int,
+    data_generator_kwargs: Optional[Dict[str, Any]],
+) -> None:
+    """Warn when simulation inputs don't match DDD's fixed 2×2×2 design."""
+    overrides = data_generator_kwargs or {}
+    issues: List[str] = []
+
+    # DDD is a fixed 2-period factorial; n_periods and treatment_period are ignored
+    if n_periods != 2 and "n_per_cell" not in overrides:
+        issues.append(
+            f"n_periods={n_periods} is ignored (DDD uses a fixed " f"2-period design: pre/post)"
+        )
+    if treatment_period != 1 and "n_per_cell" not in overrides:
+        issues.append(
+            f"treatment_period={treatment_period} is ignored (DDD "
+            f"always treats in the second period)"
+        )
+
+    # DDD's 2×2×2 factorial has inherent 50% treatment fraction
+    if treatment_fraction != 0.5 and "n_per_cell" not in overrides:
+        issues.append(
+            f"treatment_fraction={treatment_fraction} is ignored "
+            f"(DDD uses a balanced 2×2×2 factorial where 50% of "
+            f"groups are treated)"
+        )
+
+    # n_units rounding: n_per_cell = max(2, n_units // 8)
+    effective_n_per_cell = overrides.get("n_per_cell", max(2, n_units // 8))
+    effective_n = effective_n_per_cell * 8
+    if effective_n != n_units:
+        issues.append(
+            f"effective sample size is {effective_n} "
+            f"(n_per_cell={effective_n_per_cell} × 8 cells), "
+            f"not the requested n_units={n_units}"
+        )
+
+    if issues:
+        warnings.warn(
+            "TripleDifference uses a fixed 2×2×2 factorial DGP "
+            "(group × partition × time). "
+            + "; ".join(issues)
+            + ". Pass a custom data_generator for non-standard DDD designs.",
+            UserWarning,
+            stacklevel=2,
+        )
+
+
 # -- Registry construction (deferred to avoid import-time cost) ---------------
 
 _ESTIMATOR_REGISTRY: Optional[Dict[str, _EstimatorProfile]] = None
@@ -1431,6 +1482,16 @@ def simulate_power(
     if profile is not None and not use_custom_dgp:
         _check_staggered_dgp_compat(estimator, data_generator_kwargs)
 
+    # Warn if DDD design inputs are silently ignored
+    if estimator_name == "TripleDifference" and not use_custom_dgp:
+        _check_ddd_dgp_compat(
+            n_units,
+            n_periods,
+            treatment_fraction,
+            treatment_period,
+            data_generator_kwargs,
+        )
+
     # Determine effect sizes to test
     if effect_sizes is None:
         effect_sizes = [treatment_effect]
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -1713,6 +1713,7 @@ n = 2(t_{α/2} + t_{1-κ})² σ² / MDE²
 - High ICC: dramatically reduces effective sample size
 - Unequal allocation: optimal is often 50-50 but depends on costs
 - **Note:** The simulation-based power registry (`simulate_power`, `simulate_mde`, `simulate_sample_size`) uses a single-cohort staggered DGP by default. Estimators configured with `control_group="not_yet_treated"`, `clean_control="strict"`, or `anticipation>0` will receive a `UserWarning` because the default DGP does not match their identification strategy. Users must supply `data_generator_kwargs` (e.g., `cohort_periods=[2, 4]`, `never_treated_frac=0.0`) or a custom `data_generator` to match the estimator design.
+- **Note:** The `TripleDifference` registry adapter uses `generate_ddd_data`, a fixed 2×2×2 factorial DGP (group × partition × time). The `n_periods`, `treatment_period`, and `treatment_fraction` parameters are ignored — DDD always simulates 2 periods with balanced groups. `n_units` is mapped to `n_per_cell = max(2, n_units // 8)` (effective total N = `n_per_cell × 8`), so non-multiples of 8 are rounded down and values below 16 are clamped to 16. A `UserWarning` is emitted when simulation inputs differ from the effective DDD design.
 
 **Reference implementation(s):**
 - R: `pwr` package (general), `DeclareDesign` (simulation-based)
diff --git a/tests/test_power.py b/tests/test_power.py
@@ -931,12 +931,140 @@ def test_triple_difference(self):
         result = simulate_power(
             TripleDifference(),
             n_units=80,
+            n_periods=2,
+            treatment_period=1,
             n_simulations=10,
             seed=42,
             progress=False,
         )
         self._assert_valid_result(result, "TripleDifference")
 
+    def test_ddd_warns_ignored_params(self):
+        """TripleDifference warns when simulation params don't match DDD design."""
+        with pytest.warns(UserWarning, match="n_periods=6 is ignored"):
+            simulate_power(
+                TripleDifference(),
+                n_units=80,
+                n_periods=6,
+                treatment_period=3,
+                treatment_fraction=0.3,
+                n_simulations=2,
+                seed=42,
+                progress=False,
+            )
+
+    def test_ddd_warns_nonaligned_n_units(self):
+        """TripleDifference warns when n_units doesn't map cleanly to 8 cells."""
+        with pytest.warns(UserWarning, match="effective sample size is 64"):
+            simulate_power(
+                TripleDifference(),
+                n_units=65,
+                n_periods=2,
+                treatment_period=1,
+                n_simulations=2,
+                seed=42,
+                progress=False,
+            )
+
+    def test_ddd_small_n_units_warns(self):
+        """TripleDifference warns when n_units < 16 (clamped to 16)."""
+        with pytest.warns(UserWarning, match="effective sample size is 16"):
+            simulate_power(
+                TripleDifference(),
+                n_units=10,
+                n_periods=2,
+                treatment_period=1,
+                n_simulations=2,
+                seed=42,
+                progress=False,
+            )
+
+    def test_ddd_no_warn_aligned(self):
+        """No warning when n_units is a multiple of 8 and defaults match DDD."""
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            simulate_power(
+                TripleDifference(),
+                n_units=80,
+                n_periods=2,
+                treatment_period=1,
+                treatment_fraction=0.5,
+                n_simulations=2,
+                seed=42,
+                progress=False,
+            )
+
+    def test_ddd_no_warn_custom_dgp(self):
+        """Custom data_generator bypasses the DDD compat check."""
+
+        def custom_dgp(**kwargs):
+            from diff_diff.prep_dgp import generate_ddd_data
+
+            return generate_ddd_data(n_per_cell=10, seed=kwargs.get("seed"))
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            simulate_power(
+                TripleDifference(),
+                n_units=65,
+                n_periods=6,
+                data_generator=custom_dgp,
+                estimator_kwargs=dict(
+                    outcome="outcome",
+                    group="group",
+                    partition="partition",
+                    time="time",
+                ),
+                n_simulations=2,
+                seed=42,
+                progress=False,
+            )
+
+    def test_ddd_no_warn_n_per_cell_override(self):
+        """data_generator_kwargs with n_per_cell suppresses DDD param warnings."""
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            simulate_power(
+                TripleDifference(),
+                n_units=80,
+                n_periods=6,
+                data_generator_kwargs=dict(n_per_cell=10),
+                n_simulations=2,
+                seed=42,
+                progress=False,
+            )
+
+    @pytest.mark.slow
+    def test_ddd_mde(self):
+        """simulate_mde works for TripleDifference."""
+        result = simulate_mde(
+            TripleDifference(),
+            n_units=80,
+            n_periods=2,
+            treatment_period=1,
+            n_simulations=5,
+            effect_range=(0.5, 5.0),
+            seed=42,
+            progress=False,
+        )
+        assert isinstance(result, SimulationMDEResults)
+        assert result.mde > 0
+
+    @pytest.mark.slow
+    def test_ddd_sample_size(self):
+        """simulate_sample_size works for TripleDifference."""
+        result = simulate_sample_size(
+            TripleDifference(),
+            n_periods=2,
+            treatment_period=1,
+            n_simulations=5,
+            n_range=(64, 200),
+            seed=42,
+            progress=False,
+        )
+        assert isinstance(result, SimulationSampleSizeResults)
+        assert result.required_n > 0
+
     @pytest.mark.slow
     def test_trop(self):
         result = simulate_power(