diff --git a/diff_diff/diagnostics.py b/diff_diff/diagnostics.py index 123a7de8..cd31f530 100644 --- a/diff_diff/diagnostics.py +++ b/diff_diff/diagnostics.py @@ -662,7 +662,7 @@ def permutation_test( ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100) # T-stat from original estimate - t_stat = original_att / se if se > 0 else 0.0 + t_stat = original_att / se if np.isfinite(se) and se > 0 else np.nan return PlaceboTestResults( test_type="permutation", @@ -783,14 +783,14 @@ def leave_one_out_test( # Statistics of LOO distribution mean_effect = np.mean(valid_effects) se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else 0.0 - t_stat = mean_effect / se if se > 0 else 0.0 + t_stat = mean_effect / se if np.isfinite(se) and se > 0 else np.nan # Use t-distribution for p-value df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1 p_value = compute_p_value(t_stat, df=df) # CI - conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df) + conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan) return PlaceboTestResults( test_type="leave_one_out", diff --git a/diff_diff/sun_abraham.py b/diff_diff/sun_abraham.py index cb7313f9..9cd0a6a9 100644 --- a/diff_diff/sun_abraham.py +++ b/diff_diff/sun_abraham.py @@ -600,9 +600,9 @@ def fit( coef_index_map, ) - overall_t = overall_att / overall_se if overall_se > 0 else 0.0 + overall_t = overall_att / overall_se if np.isfinite(overall_se) and overall_se > 0 else np.nan overall_p = compute_p_value(overall_t) - overall_ci = compute_confidence_interval(overall_att, overall_se, self.alpha) + overall_ci = compute_confidence_interval(overall_att, overall_se, self.alpha) if np.isfinite(overall_se) and overall_se > 0 else (np.nan, np.nan) # Run bootstrap if requested bootstrap_results = None @@ -623,7 +623,7 @@ def fit( # Update results with bootstrap inference overall_se = bootstrap_results.overall_att_se - overall_t = overall_att / overall_se if overall_se > 0 else 0.0 + overall_t = overall_att / overall_se if np.isfinite(overall_se) and overall_se > 0 else np.nan overall_p = bootstrap_results.overall_att_p_value overall_ci = bootstrap_results.overall_att_ci @@ -640,7 +640,7 @@ def fit( eff_val = event_study_effects[e]["effect"] se_val = event_study_effects[e]["se"] event_study_effects[e]["t_stat"] = ( - eff_val / se_val if se_val > 0 else 0.0 + eff_val / se_val if np.isfinite(se_val) and se_val > 0 else np.nan ) # Convert cohort effects to storage format @@ -878,9 +878,9 @@ def _compute_iw_effects( agg_var = float(weight_vec @ vcov_subset @ weight_vec) agg_se = np.sqrt(max(agg_var, 0)) - t_stat = agg_effect / agg_se if agg_se > 0 else 0.0 + t_stat = agg_effect / agg_se if np.isfinite(agg_se) and agg_se > 0 else np.nan p_val = compute_p_value(t_stat) - ci = compute_confidence_interval(agg_effect, agg_se, self.alpha) + ci = compute_confidence_interval(agg_effect, agg_se, self.alpha) if np.isfinite(agg_se) and agg_se > 0 else (np.nan, np.nan) event_study_effects[e] = { "effect": agg_effect, diff --git a/diff_diff/triple_diff.py b/diff_diff/triple_diff.py index 126d30b0..fc9c98d7 100644 --- a/diff_diff/triple_diff.py +++ b/diff_diff/triple_diff.py @@ -598,14 +598,14 @@ def fit( ) # Compute inference - t_stat = att / se if se > 0 else 0.0 + t_stat = att / se if np.isfinite(se) and se > 0 else np.nan df = n_obs - 8 # Approximate df (8 cell means) if covariates: df -= len(covariates) df = max(df, 1) p_value = compute_p_value(t_stat, df=df) - conf_int = compute_confidence_interval(att, se, self.alpha, df=df) + conf_int = compute_confidence_interval(att, se, self.alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan) # Get number of clusters if clustering n_clusters = None diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 36febbc9..637556ec 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -16,6 +16,7 @@ This document provides the academic foundations and key implementation requireme - [TripleDifference](#tripledifference) - [TROP](#trop) 4. [Diagnostics & Sensitivity](#diagnostics--sensitivity) + - [PlaceboTests](#placebotests) - [BaconDecomposition](#bacondecomposition) - [HonestDiD](#honestdid) - [PreTrendsPower](#pretrendspower) @@ -319,6 +320,12 @@ where weights ŵ_{g,e} = n_{g,e} / Σ_g n_{g,e} (sample share of cohort g at eve - Detection: Pivoted QR decomposition with tolerance `1e-07` (R's `qr()` default) - Handling: Warns and drops linearly dependent columns, sets NA for dropped coefficients (R-style, matches `lm()`) - Parameter: `rank_deficient_action` controls behavior: "warn" (default), "error", or "silent" +- NaN inference for undefined statistics: + - t_stat: Uses NaN (not 0.0) when SE is non-finite or zero + - Analytical inference: p_value and CI also NaN when t_stat is NaN (NaN propagates through `compute_p_value` and `compute_confidence_interval`) + - Bootstrap inference: p_value and CI computed from bootstrap distribution, may be valid even when SE/t_stat is NaN (only NaN if <50% of bootstrap samples are valid) + - Applies to overall ATT, per-effect event study, and aggregated event study + - **Note**: Defensive enhancement matching CallawaySantAnna behavior; R's `fixest::sunab()` may produce Inf/NaN without warning **Reference implementation(s):** - R: `fixest::sunab()` (Laurent Bergé's implementation) @@ -429,6 +436,10 @@ Doubly robust estimator: - Propensity scores near 0/1: trimmed at `pscore_trim` (default 0.01) - Empty cells: raises ValueError with diagnostic message - Collinear covariates: automatic detection and warning +- NaN inference for undefined statistics: + - t_stat: Uses NaN (not 0.0) when SE is non-finite or zero + - p_value and CI: Also NaN when t_stat is NaN + - **Note**: Defensive enhancement; reference implementation behavior not yet documented **Reference implementation(s):** - Authors' replication code (forthcoming) @@ -656,6 +667,18 @@ For joint method, LOOCV works as follows: # Diagnostics & Sensitivity +## PlaceboTests + +**Module:** `diff_diff/diagnostics.py` + +*Edge cases:* +- NaN inference for undefined statistics: + - `permutation_test`: t_stat is NaN when permutation SE is zero (all permutations produce identical estimates) + - `leave_one_out_test`: t_stat, p_value, CI are NaN when LOO SE is zero (all LOO effects identical) + - **Note**: Defensive enhancement matching CallawaySantAnna NaN convention + +--- + ## BaconDecomposition **Primary source:** [Goodman-Bacon, A. (2021). Difference-in-differences with variation in treatment timing. *Journal of Econometrics*, 225(2), 254-277.](https://doi.org/10.1016/j.jeconom.2021.03.014) diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py index 4564c71f..2bd34469 100644 --- a/tests/test_diagnostics.py +++ b/tests/test_diagnostics.py @@ -672,3 +672,128 @@ def test_returns_dict_structure(self, simple_panel_data): # Check that each result is either PlaceboTestResults or error dict for key, value in results.items(): assert isinstance(value, (PlaceboTestResults, dict)) + + +class TestDiagnosticsTStatNaN: + """Tests for NaN t_stat when SE is invalid in diagnostic functions.""" + + def test_permutation_test_tstat_nan_when_se_zero(self): + """permutation_test t_stat is NaN when SE is zero (all permutations identical).""" + np.random.seed(42) + + # Create data where all units have deterministic outcomes + # so permutation distribution has zero variance + n_units = 20 + data = [] + for unit in range(n_units): + is_treated = unit < n_units // 2 + for post in [0, 1]: + y = 5.0 + if is_treated and post == 1: + y += 2.0 + data.append({ + "unit": unit, + "post": post, + "outcome": y, + "treated": int(is_treated), + }) + + df = pd.DataFrame(data) + + import warnings + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + result = permutation_test( + df, + outcome="outcome", + treatment="treated", + time="post", + unit="unit", + n_permutations=20, + seed=42, + ) + + se = result.se + t_stat = result.t_stat + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"permutation t_stat should be NaN when SE={se}, got {t_stat}" + ) + else: + expected = result.original_effect / se + assert np.isclose(t_stat, expected), ( + f"permutation t_stat should be effect/SE, " + f"expected {expected}, got {t_stat}" + ) + + def test_leave_one_out_tstat_nan_when_se_zero(self): + """leave_one_out_test t_stat and CI are NaN when SE is zero.""" + np.random.seed(42) + + # Create data where leaving out any unit gives identical results + # (deterministic outcomes, no noise) + n_units = 20 + data = [] + for unit in range(n_units): + is_treated = unit < n_units // 2 + for post in [0, 1]: + y = 5.0 + if is_treated and post == 1: + y += 2.0 + data.append({ + "unit": unit, + "post": post, + "outcome": y, + "treated": int(is_treated), + }) + + df = pd.DataFrame(data) + + import warnings + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + result = leave_one_out_test( + df, + outcome="outcome", + treatment="treated", + time="post", + unit="unit", + ) + + se = result.se + t_stat = result.t_stat + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"LOO t_stat should be NaN when SE={se}, got {t_stat}" + ) + ci = result.conf_int + assert np.isnan(ci[0]) and np.isnan(ci[1]), ( + f"LOO conf_int should be (NaN, NaN) when SE={se}, got {ci}" + ) + + def test_permutation_tstat_consistency(self, simple_panel_data): + """permutation_test t_stat = effect/SE when SE is valid.""" + result = permutation_test( + simple_panel_data, + outcome="outcome", + treatment="treated", + time="post", + unit="unit", + n_permutations=50, + seed=42, + ) + + se = result.se + t_stat = result.t_stat + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"t_stat should be NaN when SE={se}, got {t_stat}" + ) + else: + expected = result.original_effect / se + assert np.isclose(t_stat, expected), ( + f"t_stat should be effect/SE, expected {expected}, got {t_stat}" + ) diff --git a/tests/test_sun_abraham.py b/tests/test_sun_abraham.py index 8a75e4e9..c91f266b 100644 --- a/tests/test_sun_abraham.py +++ b/tests/test_sun_abraham.py @@ -884,3 +884,177 @@ def test_rank_deficient_action_silent_no_warning(self): # Should still get valid results assert results is not None assert results.overall_att is not None + + +class TestSunAbrahamTStatNaN: + """Tests for NaN t_stat when SE is invalid.""" + + def test_per_effect_tstat_consistency(self): + """Per-effect t_stat uses NaN (not 0.0) when SE is non-finite or zero.""" + data = generate_staggered_data( + n_units=60, + n_periods=8, + n_cohorts=2, + treatment_effect=2.0, + seed=456, + ) + + sa = SunAbraham(n_bootstrap=0) + results = sa.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + ) + + for e, effect_data in results.event_study_effects.items(): + se = effect_data["se"] + t_stat = effect_data["t_stat"] + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"t_stat for e={e} should be NaN when SE={se}, " + f"got t_stat={t_stat}" + ) + else: + expected = effect_data["effect"] / se + assert np.isclose(t_stat, expected), ( + f"t_stat for e={e} should be effect/SE, " + f"expected {expected}, got {t_stat}" + ) + + def test_overall_tstat_nan_when_se_invalid(self): + """Overall t_stat is NaN when SE is non-finite or zero.""" + data = generate_staggered_data( + n_units=60, + n_periods=8, + n_cohorts=2, + treatment_effect=2.0, + seed=456, + ) + + sa = SunAbraham(n_bootstrap=0) + results = sa.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + ) + + se = results.overall_se + t_stat = results.overall_t_stat + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"overall_t_stat should be NaN when SE={se}, got {t_stat}" + ) + assert np.isnan(results.overall_p_value), ( + f"overall_p_value should be NaN when SE={se} (analytical inference), " + f"got {results.overall_p_value}" + ) + ci = results.overall_conf_int + assert np.isnan(ci[0]) and np.isnan(ci[1]), ( + f"overall_conf_int should be (NaN, NaN) when SE={se}, got {ci}" + ) + else: + expected = results.overall_att / se + assert np.isclose(t_stat, expected), ( + f"overall_t_stat should be ATT/SE, expected {expected}, got {t_stat}" + ) + + def test_bootstrap_tstat_nan_when_se_invalid(self): + """Bootstrap t_stat uses NaN (not 0.0) when SE is non-finite or zero.""" + data = generate_staggered_data( + n_units=60, + n_periods=8, + n_cohorts=2, + treatment_effect=2.0, + seed=456, + ) + + sa = SunAbraham(n_bootstrap=50, seed=42) + results = sa.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + ) + + # Check overall + se = results.overall_se + t_stat = results.overall_t_stat + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"bootstrap overall_t_stat should be NaN when SE={se}, got {t_stat}" + ) + + # Check event study effects + for e, effect_data in results.event_study_effects.items(): + se = effect_data["se"] + t_stat = effect_data["t_stat"] + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"bootstrap t_stat for e={e} should be NaN when SE={se}, " + f"got t_stat={t_stat}" + ) + else: + expected = effect_data["effect"] / se + assert np.isclose(t_stat, expected), ( + f"bootstrap t_stat for e={e} should be effect/SE, " + f"expected {expected}, got {t_stat}" + ) + + def test_aggregated_event_study_tstat_nan(self): + """Aggregated event study t_stat is NaN when SE is zero or non-finite.""" + n_units = 20 + n_periods = 5 + np.random.seed(123) + + data = [] + for unit in range(n_units): + first_treat = 3 if unit < n_units // 2 else 0 + for t in range(1, n_periods + 1): + outcome = np.random.randn() + data.append({ + "unit": unit, + "time": t, + "outcome": outcome, + "first_treat": first_treat, + }) + + df = pd.DataFrame(data) + + sa = SunAbraham(n_bootstrap=0) + results = sa.fit( + df, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + ) + + for e, effect_data in results.event_study_effects.items(): + se = effect_data["se"] + t_stat = effect_data["t_stat"] + effect = effect_data["effect"] + + if not np.isfinite(se) or se <= 0: + assert np.isnan(t_stat), ( + f"Aggregated t_stat for e={e} should be NaN when SE={se}, " + f"got t_stat={t_stat}" + ) + ci = effect_data["conf_int"] + assert np.isnan(ci[0]) and np.isnan(ci[1]), ( + f"Aggregated CI for e={e} should be (NaN, NaN) when SE={se}, got {ci}" + ) + else: + expected_t = effect / se + assert np.isclose(t_stat, expected_t, rtol=1e-10), ( + f"Aggregated t_stat for e={e} should be effect/SE={expected_t}, " + f"got {t_stat}" + ) diff --git a/tests/test_triple_diff.py b/tests/test_triple_diff.py index 02d77b32..6e3dabd8 100644 --- a/tests/test_triple_diff.py +++ b/tests/test_triple_diff.py @@ -958,3 +958,73 @@ def test_convenience_function_passes_rank_deficient_action(self, ddd_data_with_c covariates=["x1", "x1_dup"], rank_deficient_action="error" ) + + +class TestTripleDifferenceTStatNaN: + """Tests for NaN t_stat when SE is invalid.""" + + def test_tstat_nan_when_se_zero(self): + """t_stat is NaN (not 0.0) when SE is zero or non-finite.""" + # Generate standard DDD data + data = generate_ddd_data(n_per_cell=100, true_att=2.0, seed=42) + + td = TripleDifference(estimation_method="reg") + results = td.fit( + data, + outcome="outcome", + group="group", + partition="partition", + time="time", + ) + + se = results.se + t_stat = results.t_stat + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"t_stat should be NaN when SE={se}, got {t_stat}" + ) + ci = results.conf_int + assert np.isnan(ci[0]) and np.isnan(ci[1]), ( + f"conf_int should be (NaN, NaN) when SE={se}, got {ci}" + ) + else: + expected = results.att / se + assert np.isclose(t_stat, expected), ( + f"t_stat should be ATT/SE, expected {expected}, got {t_stat}" + ) + + def test_tstat_consistency_all_methods(self): + """t_stat follows NaN pattern across all estimation methods.""" + data = generate_ddd_data( + n_per_cell=50, + true_att=2.0, + seed=42, + add_covariates=True, + covariate_effect=0.5, + ) + + for method in ["reg", "ipw", "dr"]: + td = TripleDifference(estimation_method=method) + results = td.fit( + data, + outcome="outcome", + group="group", + partition="partition", + time="time", + covariates=["x1"], + ) + + se = results.se + t_stat = results.t_stat + + if not np.isfinite(se) or se == 0: + assert np.isnan(t_stat), ( + f"[{method}] t_stat should be NaN when SE={se}, got {t_stat}" + ) + else: + expected = results.att / se + assert np.isclose(t_stat, expected), ( + f"[{method}] t_stat should be ATT/SE, " + f"expected {expected}, got {t_stat}" + )