Skip to content

Commit 0e912bb

Browse files
igerberclaude
andcommitted
Warn when TripleDifference power params don't match fixed 2×2×2 DGP
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9e18a3e commit 0e912bb

4 files changed

Lines changed: 191 additions & 0 deletions

File tree

TODO.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Deferred items from PR reviews that were not addressed before merge.
4747
| Bootstrap NaN-gating gap: manual SE/CI/p-value without non-finite filtering or SE<=0 guard | `imputation_bootstrap.py`, `two_stage_bootstrap.py` | #177 | Medium — migrate to `compute_effect_bootstrap_stats` from `bootstrap_utils.py` |
4848
| EfficientDiD: warn when cohort share is very small (< 2 units or < 1% of sample) — inverted in Omega*/EIF | `efficient_did_weights.py` | #192 | Low |
4949
| EfficientDiD: API docs / tutorial page for new public estimator | `docs/` | #192 | Medium |
50+
| TripleDifference power: `generate_ddd_data` is a fixed 2×2×2 cross-sectional DGP — no multi-period or unbalanced-group support. Add a `generate_ddd_panel_data` for panel DDD power analysis. | `prep_dgp.py`, `power.py` | #208 | Low |
5051

5152
#### Performance
5253

diff_diff/power.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,57 @@ def _check_staggered_dgp_compat(
310310
warnings.warn(msg, UserWarning, stacklevel=2)
311311

312312

313+
def _check_ddd_dgp_compat(
314+
n_units: int,
315+
n_periods: int,
316+
treatment_fraction: float,
317+
treatment_period: int,
318+
data_generator_kwargs: Optional[Dict[str, Any]],
319+
) -> None:
320+
"""Warn when simulation inputs don't match DDD's fixed 2×2×2 design."""
321+
overrides = data_generator_kwargs or {}
322+
issues: List[str] = []
323+
324+
# DDD is a fixed 2-period factorial; n_periods and treatment_period are ignored
325+
if n_periods != 2 and "n_per_cell" not in overrides:
326+
issues.append(
327+
f"n_periods={n_periods} is ignored (DDD uses a fixed " f"2-period design: pre/post)"
328+
)
329+
if treatment_period != 1 and "n_per_cell" not in overrides:
330+
issues.append(
331+
f"treatment_period={treatment_period} is ignored (DDD "
332+
f"always treats in the second period)"
333+
)
334+
335+
# DDD's 2×2×2 factorial has inherent 50% treatment fraction
336+
if treatment_fraction != 0.5 and "n_per_cell" not in overrides:
337+
issues.append(
338+
f"treatment_fraction={treatment_fraction} is ignored "
339+
f"(DDD uses a balanced 2×2×2 factorial where 50% of "
340+
f"groups are treated)"
341+
)
342+
343+
# n_units rounding: n_per_cell = max(2, n_units // 8)
344+
effective_n_per_cell = overrides.get("n_per_cell", max(2, n_units // 8))
345+
effective_n = effective_n_per_cell * 8
346+
if effective_n != n_units:
347+
issues.append(
348+
f"effective sample size is {effective_n} "
349+
f"(n_per_cell={effective_n_per_cell} × 8 cells), "
350+
f"not the requested n_units={n_units}"
351+
)
352+
353+
if issues:
354+
warnings.warn(
355+
"TripleDifference uses a fixed 2×2×2 factorial DGP "
356+
"(group × partition × time). "
357+
+ "; ".join(issues)
358+
+ ". Pass a custom data_generator for non-standard DDD designs.",
359+
UserWarning,
360+
stacklevel=2,
361+
)
362+
363+
313364
# -- Registry construction (deferred to avoid import-time cost) ---------------
314365

315366
_ESTIMATOR_REGISTRY: Optional[Dict[str, _EstimatorProfile]] = None
@@ -1431,6 +1482,16 @@ def simulate_power(
14311482
if profile is not None and not use_custom_dgp:
14321483
_check_staggered_dgp_compat(estimator, data_generator_kwargs)
14331484

1485+
# Warn if DDD design inputs are silently ignored
1486+
if estimator_name == "TripleDifference" and not use_custom_dgp:
1487+
_check_ddd_dgp_compat(
1488+
n_units,
1489+
n_periods,
1490+
treatment_fraction,
1491+
treatment_period,
1492+
data_generator_kwargs,
1493+
)
1494+
14341495
# Determine effect sizes to test
14351496
if effect_sizes is None:
14361497
effect_sizes = [treatment_effect]

docs/methodology/REGISTRY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,6 +1713,7 @@ n = 2(t_{α/2} + t_{1-κ})² σ² / MDE²
17131713
- High ICC: dramatically reduces effective sample size
17141714
- Unequal allocation: optimal is often 50-50 but depends on costs
17151715
- **Note:** The simulation-based power registry (`simulate_power`, `simulate_mde`, `simulate_sample_size`) uses a single-cohort staggered DGP by default. Estimators configured with `control_group="not_yet_treated"`, `clean_control="strict"`, or `anticipation>0` will receive a `UserWarning` because the default DGP does not match their identification strategy. Users must supply `data_generator_kwargs` (e.g., `cohort_periods=[2, 4]`, `never_treated_frac=0.0`) or a custom `data_generator` to match the estimator design.
1716+
- **Note:** The `TripleDifference` registry adapter uses `generate_ddd_data`, a fixed 2×2×2 factorial DGP (group × partition × time). The `n_periods`, `treatment_period`, and `treatment_fraction` parameters are ignored — DDD always simulates 2 periods with balanced groups. `n_units` is mapped to `n_per_cell = max(2, n_units // 8)` (effective total N = `n_per_cell × 8`), so non-multiples of 8 are rounded down and values below 16 are clamped to 16. A `UserWarning` is emitted when simulation inputs differ from the effective DDD design.
17161717

17171718
**Reference implementation(s):**
17181719
- R: `pwr` package (general), `DeclareDesign` (simulation-based)

tests/test_power.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,12 +931,140 @@ def test_triple_difference(self):
931931
result = simulate_power(
932932
TripleDifference(),
933933
n_units=80,
934+
n_periods=2,
935+
treatment_period=1,
934936
n_simulations=10,
935937
seed=42,
936938
progress=False,
937939
)
938940
self._assert_valid_result(result, "TripleDifference")
939941

942+
def test_ddd_warns_ignored_params(self):
943+
"""TripleDifference warns when simulation params don't match DDD design."""
944+
with pytest.warns(UserWarning, match="n_periods=6 is ignored"):
945+
simulate_power(
946+
TripleDifference(),
947+
n_units=80,
948+
n_periods=6,
949+
treatment_period=3,
950+
treatment_fraction=0.3,
951+
n_simulations=2,
952+
seed=42,
953+
progress=False,
954+
)
955+
956+
def test_ddd_warns_nonaligned_n_units(self):
957+
"""TripleDifference warns when n_units doesn't map cleanly to 8 cells."""
958+
with pytest.warns(UserWarning, match="effective sample size is 64"):
959+
simulate_power(
960+
TripleDifference(),
961+
n_units=65,
962+
n_periods=2,
963+
treatment_period=1,
964+
n_simulations=2,
965+
seed=42,
966+
progress=False,
967+
)
968+
969+
def test_ddd_small_n_units_warns(self):
970+
"""TripleDifference warns when n_units < 16 (clamped to 16)."""
971+
with pytest.warns(UserWarning, match="effective sample size is 16"):
972+
simulate_power(
973+
TripleDifference(),
974+
n_units=10,
975+
n_periods=2,
976+
treatment_period=1,
977+
n_simulations=2,
978+
seed=42,
979+
progress=False,
980+
)
981+
982+
def test_ddd_no_warn_aligned(self):
983+
"""No warning when n_units is a multiple of 8 and defaults match DDD."""
984+
with warnings.catch_warnings():
985+
warnings.simplefilter("error")
986+
simulate_power(
987+
TripleDifference(),
988+
n_units=80,
989+
n_periods=2,
990+
treatment_period=1,
991+
treatment_fraction=0.5,
992+
n_simulations=2,
993+
seed=42,
994+
progress=False,
995+
)
996+
997+
def test_ddd_no_warn_custom_dgp(self):
998+
"""Custom data_generator bypasses the DDD compat check."""
999+
1000+
def custom_dgp(**kwargs):
1001+
from diff_diff.prep_dgp import generate_ddd_data
1002+
1003+
return generate_ddd_data(n_per_cell=10, seed=kwargs.get("seed"))
1004+
1005+
with warnings.catch_warnings():
1006+
warnings.simplefilter("error")
1007+
simulate_power(
1008+
TripleDifference(),
1009+
n_units=65,
1010+
n_periods=6,
1011+
data_generator=custom_dgp,
1012+
estimator_kwargs=dict(
1013+
outcome="outcome",
1014+
group="group",
1015+
partition="partition",
1016+
time="time",
1017+
),
1018+
n_simulations=2,
1019+
seed=42,
1020+
progress=False,
1021+
)
1022+
1023+
def test_ddd_no_warn_n_per_cell_override(self):
1024+
"""data_generator_kwargs with n_per_cell suppresses DDD param warnings."""
1025+
with warnings.catch_warnings():
1026+
warnings.simplefilter("error")
1027+
simulate_power(
1028+
TripleDifference(),
1029+
n_units=80,
1030+
n_periods=6,
1031+
data_generator_kwargs=dict(n_per_cell=10),
1032+
n_simulations=2,
1033+
seed=42,
1034+
progress=False,
1035+
)
1036+
1037+
@pytest.mark.slow
1038+
def test_ddd_mde(self):
1039+
"""simulate_mde works for TripleDifference."""
1040+
result = simulate_mde(
1041+
TripleDifference(),
1042+
n_units=80,
1043+
n_periods=2,
1044+
treatment_period=1,
1045+
n_simulations=5,
1046+
effect_range=(0.5, 5.0),
1047+
seed=42,
1048+
progress=False,
1049+
)
1050+
assert isinstance(result, SimulationMDEResults)
1051+
assert result.mde > 0
1052+
1053+
@pytest.mark.slow
1054+
def test_ddd_sample_size(self):
1055+
"""simulate_sample_size works for TripleDifference."""
1056+
result = simulate_sample_size(
1057+
TripleDifference(),
1058+
n_periods=2,
1059+
treatment_period=1,
1060+
n_simulations=5,
1061+
n_range=(64, 200),
1062+
seed=42,
1063+
progress=False,
1064+
)
1065+
assert isinstance(result, SimulationSampleSizeResults)
1066+
assert result.required_n > 0
1067+
9401068
@pytest.mark.slow
9411069
def test_trop(self):
9421070
result = simulate_power(

0 commit comments

Comments
 (0)