Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions diff_diff/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ def permutation_test(
ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100)

# T-stat from original estimate
t_stat = original_att / se if se > 0 else 0.0
t_stat = original_att / se if np.isfinite(se) and se > 0 else np.nan

return PlaceboTestResults(
test_type="permutation",
Expand Down Expand Up @@ -783,14 +783,14 @@ def leave_one_out_test(
# Statistics of LOO distribution
mean_effect = np.mean(valid_effects)
se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else 0.0
t_stat = mean_effect / se if se > 0 else 0.0
t_stat = mean_effect / se if np.isfinite(se) and se > 0 else np.nan

# Use t-distribution for p-value
df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1
p_value = compute_p_value(t_stat, df=df)

# CI
conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df)
conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan)

return PlaceboTestResults(
test_type="leave_one_out",
Expand Down
12 changes: 6 additions & 6 deletions diff_diff/sun_abraham.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,9 +600,9 @@ def fit(
coef_index_map,
)

overall_t = overall_att / overall_se if overall_se > 0 else 0.0
overall_t = overall_att / overall_se if np.isfinite(overall_se) and overall_se > 0 else np.nan
overall_p = compute_p_value(overall_t)
overall_ci = compute_confidence_interval(overall_att, overall_se, self.alpha)
overall_ci = compute_confidence_interval(overall_att, overall_se, self.alpha) if np.isfinite(overall_se) and overall_se > 0 else (np.nan, np.nan)

# Run bootstrap if requested
bootstrap_results = None
Expand All @@ -623,7 +623,7 @@ def fit(

# Update results with bootstrap inference
overall_se = bootstrap_results.overall_att_se
overall_t = overall_att / overall_se if overall_se > 0 else 0.0
overall_t = overall_att / overall_se if np.isfinite(overall_se) and overall_se > 0 else np.nan
overall_p = bootstrap_results.overall_att_p_value
overall_ci = bootstrap_results.overall_att_ci

Expand All @@ -640,7 +640,7 @@ def fit(
eff_val = event_study_effects[e]["effect"]
se_val = event_study_effects[e]["se"]
event_study_effects[e]["t_stat"] = (
eff_val / se_val if se_val > 0 else 0.0
eff_val / se_val if np.isfinite(se_val) and se_val > 0 else np.nan
)

# Convert cohort effects to storage format
Expand Down Expand Up @@ -878,9 +878,9 @@ def _compute_iw_effects(
agg_var = float(weight_vec @ vcov_subset @ weight_vec)
agg_se = np.sqrt(max(agg_var, 0))

t_stat = agg_effect / agg_se if agg_se > 0 else 0.0
t_stat = agg_effect / agg_se if np.isfinite(agg_se) and agg_se > 0 else np.nan
p_val = compute_p_value(t_stat)
ci = compute_confidence_interval(agg_effect, agg_se, self.alpha)
ci = compute_confidence_interval(agg_effect, agg_se, self.alpha) if np.isfinite(agg_se) and agg_se > 0 else (np.nan, np.nan)

event_study_effects[e] = {
"effect": agg_effect,
Expand Down
4 changes: 2 additions & 2 deletions diff_diff/triple_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,14 +598,14 @@ def fit(
)

# Compute inference
t_stat = att / se if se > 0 else 0.0
t_stat = att / se if np.isfinite(se) and se > 0 else np.nan
df = n_obs - 8 # Approximate df (8 cell means)
if covariates:
df -= len(covariates)
df = max(df, 1)

p_value = compute_p_value(t_stat, df=df)
conf_int = compute_confidence_interval(att, se, self.alpha, df=df)
conf_int = compute_confidence_interval(att, se, self.alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan)

# Get number of clusters if clustering
n_clusters = None
Expand Down
23 changes: 23 additions & 0 deletions docs/methodology/REGISTRY.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This document provides the academic foundations and key implementation requireme
- [TripleDifference](#tripledifference)
- [TROP](#trop)
4. [Diagnostics & Sensitivity](#diagnostics--sensitivity)
- [PlaceboTests](#placebotests)
- [BaconDecomposition](#bacondecomposition)
- [HonestDiD](#honestdid)
- [PreTrendsPower](#pretrendspower)
Expand Down Expand Up @@ -319,6 +320,12 @@ where weights ŵ_{g,e} = n_{g,e} / Σ_g n_{g,e} (sample share of cohort g at eve
- Detection: Pivoted QR decomposition with tolerance `1e-07` (R's `qr()` default)
- Handling: Warns and drops linearly dependent columns, sets NA for dropped coefficients (R-style, matches `lm()`)
- Parameter: `rank_deficient_action` controls behavior: "warn" (default), "error", or "silent"
- NaN inference for undefined statistics:
- t_stat: Uses NaN (not 0.0) when SE is non-finite or zero
- Analytical inference: p_value and CI also NaN when t_stat is NaN (NaN propagates through `compute_p_value` and `compute_confidence_interval`)
- Bootstrap inference: p_value and CI computed from bootstrap distribution, may be valid even when SE/t_stat is NaN (only NaN if <50% of bootstrap samples are valid)
- Applies to overall ATT, per-effect event study, and aggregated event study
- **Note**: Defensive enhancement matching CallawaySantAnna behavior; R's `fixest::sunab()` may produce Inf/NaN without warning

**Reference implementation(s):**
- R: `fixest::sunab()` (Laurent Bergé's implementation)
Expand Down Expand Up @@ -429,6 +436,10 @@ Doubly robust estimator:
- Propensity scores near 0/1: trimmed at `pscore_trim` (default 0.01)
- Empty cells: raises ValueError with diagnostic message
- Collinear covariates: automatic detection and warning
- NaN inference for undefined statistics:
- t_stat: Uses NaN (not 0.0) when SE is non-finite or zero
- p_value and CI: Also NaN when t_stat is NaN
- **Note**: Defensive enhancement; reference implementation behavior not yet documented

**Reference implementation(s):**
- Authors' replication code (forthcoming)
Expand Down Expand Up @@ -656,6 +667,18 @@ For joint method, LOOCV works as follows:

# Diagnostics & Sensitivity

## PlaceboTests

**Module:** `diff_diff/diagnostics.py`

*Edge cases:*
- NaN inference for undefined statistics:
- `permutation_test`: t_stat is NaN when permutation SE is zero (all permutations produce identical estimates)
- `leave_one_out_test`: t_stat, p_value, CI are NaN when LOO SE is zero (all LOO effects identical)
- **Note**: Defensive enhancement matching CallawaySantAnna NaN convention

---

## BaconDecomposition

**Primary source:** [Goodman-Bacon, A. (2021). Difference-in-differences with variation in treatment timing. *Journal of Econometrics*, 225(2), 254-277.](https://doi.org/10.1016/j.jeconom.2021.03.014)
Expand Down
125 changes: 125 additions & 0 deletions tests/test_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,3 +672,128 @@ def test_returns_dict_structure(self, simple_panel_data):
# Check that each result is either PlaceboTestResults or error dict
for key, value in results.items():
assert isinstance(value, (PlaceboTestResults, dict))


class TestDiagnosticsTStatNaN:
"""Tests for NaN t_stat when SE is invalid in diagnostic functions."""

def test_permutation_test_tstat_nan_when_se_zero(self):
"""permutation_test t_stat is NaN when SE is zero (all permutations identical)."""
np.random.seed(42)

# Create data where all units have deterministic outcomes
# so permutation distribution has zero variance
n_units = 20
data = []
for unit in range(n_units):
is_treated = unit < n_units // 2
for post in [0, 1]:
y = 5.0
if is_treated and post == 1:
y += 2.0
data.append({
"unit": unit,
"post": post,
"outcome": y,
"treated": int(is_treated),
})

df = pd.DataFrame(data)

import warnings
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
result = permutation_test(
df,
outcome="outcome",
treatment="treated",
time="post",
unit="unit",
n_permutations=20,
seed=42,
)

se = result.se
t_stat = result.t_stat

if not np.isfinite(se) or se == 0:
assert np.isnan(t_stat), (
f"permutation t_stat should be NaN when SE={se}, got {t_stat}"
)
else:
expected = result.original_effect / se
assert np.isclose(t_stat, expected), (
f"permutation t_stat should be effect/SE, "
f"expected {expected}, got {t_stat}"
)

def test_leave_one_out_tstat_nan_when_se_zero(self):
"""leave_one_out_test t_stat and CI are NaN when SE is zero."""
np.random.seed(42)

# Create data where leaving out any unit gives identical results
# (deterministic outcomes, no noise)
n_units = 20
data = []
for unit in range(n_units):
is_treated = unit < n_units // 2
for post in [0, 1]:
y = 5.0
if is_treated and post == 1:
y += 2.0
data.append({
"unit": unit,
"post": post,
"outcome": y,
"treated": int(is_treated),
})

df = pd.DataFrame(data)

import warnings
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
result = leave_one_out_test(
df,
outcome="outcome",
treatment="treated",
time="post",
unit="unit",
)

se = result.se
t_stat = result.t_stat

if not np.isfinite(se) or se == 0:
assert np.isnan(t_stat), (
f"LOO t_stat should be NaN when SE={se}, got {t_stat}"
)
ci = result.conf_int
assert np.isnan(ci[0]) and np.isnan(ci[1]), (
f"LOO conf_int should be (NaN, NaN) when SE={se}, got {ci}"
)

def test_permutation_tstat_consistency(self, simple_panel_data):
"""permutation_test t_stat = effect/SE when SE is valid."""
result = permutation_test(
simple_panel_data,
outcome="outcome",
treatment="treated",
time="post",
unit="unit",
n_permutations=50,
seed=42,
)

se = result.se
t_stat = result.t_stat

if not np.isfinite(se) or se == 0:
assert np.isnan(t_stat), (
f"t_stat should be NaN when SE={se}, got {t_stat}"
)
else:
expected = result.original_effect / se
assert np.isclose(t_stat, expected), (
f"t_stat should be effect/SE, expected {expected}, got {t_stat}"
)
Loading