diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f215d2e..73e2e81f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Removed +- **TROP `variance_method` parameter** — Jackknife variance estimation removed. + Bootstrap (the only method specified in Athey et al. 2025) is now always used. + The `variance_method` field has also been removed from `TROPResults`. + ## [2.2.0] - 2026-01-27 ### Added diff --git a/README.md b/README.md index d7fea1bd..a675e354 100644 --- a/README.md +++ b/README.md @@ -1274,7 +1274,6 @@ TROP( max_iter=100, # Max iterations for factor estimation tol=1e-6, # Convergence tolerance alpha=0.05, # Significance level - variance_method='bootstrap', # 'bootstrap' or 'jackknife' n_bootstrap=200, # Bootstrap replications seed=None # Random seed ) @@ -1874,8 +1873,7 @@ TROP( max_iter=100, # Max iterations for factor estimation tol=1e-6, # Convergence tolerance alpha=0.05, # Significance level for CIs - variance_method='bootstrap', # 'bootstrap' or 'jackknife' - n_bootstrap=200, # Bootstrap/jackknife iterations + n_bootstrap=200, # Bootstrap replications seed=None # Random seed ) ``` @@ -1899,7 +1897,7 @@ Note: TROP infers treatment periods from the treatment indicator column. The tre | Attribute | Description | |-----------|-------------| | `att` | Average Treatment effect on the Treated | -| `se` | Standard error (bootstrap or jackknife) | +| `se` | Standard error (bootstrap) | | `t_stat` | T-statistic | | `p_value` | P-value | | `conf_int` | Confidence interval | @@ -1918,7 +1916,6 @@ Note: TROP infers treatment periods from the treatment indicator column. The tre | `loocv_score` | LOOCV score for selected parameters | | `n_pre_periods` | Number of pre-treatment periods | | `n_post_periods` | Number of post-treatment periods | -| `variance_method` | Variance estimation method | | `bootstrap_distribution` | Bootstrap distribution (if bootstrap) | **Methods:** diff --git a/diff_diff/trop.py b/diff_diff/trop.py index 8c7ee111..df07bffd 100644 --- a/diff_diff/trop.py +++ b/diff_diff/trop.py @@ -132,8 +132,6 @@ class TROPResults: Effective rank of the factor matrix (sum of singular values / max). loocv_score : float Leave-one-out cross-validation score for selected parameters. - variance_method : str - Method used for variance estimation. alpha : float Significance level for confidence interval. n_pre_periods : int @@ -164,7 +162,6 @@ class TROPResults: factor_matrix: np.ndarray effective_rank: float loocv_score: float - variance_method: str alpha: float = 0.05 n_pre_periods: int = 0 n_post_periods: int = 0 @@ -222,9 +219,8 @@ def summary(self, alpha: Optional[float] = None) -> str: f"{'LOOCV score:':<25} {self.loocv_score:>10.6f}", ] - # Variance method info - lines.append(f"{'Variance method:':<25} {self.variance_method:>10}") - if self.variance_method == "bootstrap" and self.n_bootstrap is not None: + # Variance info + if self.n_bootstrap is not None: lines.append(f"{'Bootstrap replications:':<25} {self.n_bootstrap:>10}") lines.extend([ @@ -280,7 +276,6 @@ def to_dict(self) -> Dict[str, Any]: "lambda_nn": self.lambda_nn, "effective_rank": self.effective_rank, "loocv_score": self.loocv_score, - "variance_method": self.variance_method, } def to_dataframe(self) -> pd.DataFrame: @@ -397,10 +392,8 @@ class TROP: Convergence tolerance for optimization. alpha : float, default=0.05 Significance level for confidence intervals. - variance_method : str, default='bootstrap' - Method for variance estimation: 'bootstrap' or 'jackknife'. n_bootstrap : int, default=200 - Number of replications for variance estimation. + Number of bootstrap replications for variance estimation. max_loocv_samples : int, default=100 Maximum control observations to use in LOOCV for tuning parameter selection. Subsampling is used for computational tractability as @@ -461,7 +454,6 @@ def __init__( max_iter: int = 100, tol: float = 1e-6, alpha: float = 0.05, - variance_method: str = 'bootstrap', n_bootstrap: int = 200, max_loocv_samples: int = 100, seed: Optional[int] = None, @@ -482,19 +474,10 @@ def __init__( self.max_iter = max_iter self.tol = tol self.alpha = alpha - self.variance_method = variance_method self.n_bootstrap = n_bootstrap self.max_loocv_samples = max_loocv_samples self.seed = seed - # Validate parameters - valid_variance_methods = ("bootstrap", "jackknife") - if variance_method not in valid_variance_methods: - raise ValueError( - f"variance_method must be one of {valid_variance_methods}, " - f"got '{variance_method}'" - ) - # Validate that time/unit grids do not contain inf. # Per Athey et al. (2025) Eq. 3, λ_time=0 and λ_unit=0 give uniform # weights (exp(-0 × dist) = 1). Using inf is a misunderstanding of @@ -1260,9 +1243,9 @@ def _fit_joint( Notes ----- - Bootstrap and jackknife variance estimation assume simultaneous treatment - adoption (fixed `treated_periods` across resamples). The treatment timing - is inferred from the data once and held constant for all bootstrap/jackknife + Bootstrap variance estimation assumes simultaneous treatment adoption + (fixed `treated_periods` across resamples). The treatment timing is + inferred from the data once and held constant for all bootstrap iterations. For staggered adoption designs where treatment timing varies across units, use `method="twostep"` which computes observation-specific weights that naturally handle heterogeneous timing. @@ -1505,17 +1488,10 @@ def _fit_joint( # Bootstrap variance estimation effective_lambda = (lambda_time, lambda_unit, lambda_nn) - if self.variance_method == "bootstrap": - se, bootstrap_dist = self._bootstrap_variance_joint( - data, outcome, treatment, unit, time, - effective_lambda, treated_periods - ) - else: - # Jackknife for joint method - se, bootstrap_dist = self._jackknife_variance_joint( - Y, D, effective_lambda, treated_periods, - n_units, n_periods - ) + se, bootstrap_dist = self._bootstrap_variance_joint( + data, outcome, treatment, unit, time, + effective_lambda, treated_periods + ) # Compute test statistics if se > 0: @@ -1550,11 +1526,10 @@ def _fit_joint( factor_matrix=L, effective_rank=effective_rank, loocv_score=best_score, - variance_method=self.variance_method, alpha=self.alpha, n_pre_periods=n_pre_periods, n_post_periods=n_post_periods, - n_bootstrap=self.n_bootstrap if self.variance_method == "bootstrap" else None, + n_bootstrap=self.n_bootstrap, bootstrap_distribution=bootstrap_dist if len(bootstrap_dist) > 0 else None, ) @@ -1752,87 +1727,6 @@ def _fit_joint_with_fixed_lambda( return tau - def _jackknife_variance_joint( - self, - Y: np.ndarray, - D: np.ndarray, - optimal_lambda: Tuple[float, float, float], - treated_periods: int, - n_units: int, - n_periods: int, - ) -> Tuple[float, np.ndarray]: - """ - Compute jackknife standard error for joint method. - - Parameters - ---------- - Y : np.ndarray - Outcome matrix. - D : np.ndarray - Treatment matrix. - optimal_lambda : tuple - Optimal tuning parameters. - treated_periods : int - Number of post-treatment periods. - n_units : int - Number of units. - n_periods : int - Number of periods. - - Returns - ------- - Tuple[float, np.ndarray] - (se, jackknife_estimates). - """ - lambda_time, lambda_unit, lambda_nn = optimal_lambda - jackknife_estimates = [] - - # Get treated unit indices - treated_unit_idx = np.where(np.any(D == 1, axis=0))[0] - - for leave_out in treated_unit_idx: - # True leave-one-out: zero the delta weight for the left-out unit - # This excludes the unit from estimation without imputation - Y_jack = Y.copy() - D_jack = D.copy() - D_jack[:, leave_out] = 0 # Mark as not treated for weight computation - - try: - # Compute weights (left-out unit is still in calculation) - delta = self._compute_joint_weights( - Y_jack, D_jack, lambda_time, lambda_unit, - treated_periods, n_units, n_periods - ) - - # Zero the delta weight for the left-out unit - # This ensures the unit doesn't contribute to estimation - delta[:, leave_out] = 0.0 - - # Fit model (left-out unit has zero weight, truly excluded) - if lambda_nn >= 1e10: - _, _, _, tau = self._solve_joint_no_lowrank(Y_jack, D_jack, delta) - else: - _, _, _, _, tau = self._solve_joint_with_lowrank( - Y_jack, D_jack, delta, lambda_nn, self.max_iter, self.tol - ) - - jackknife_estimates.append(tau) - - except (np.linalg.LinAlgError, ValueError): - continue - - jackknife_estimates = np.array(jackknife_estimates) - - if len(jackknife_estimates) < 2: - return 0.0, jackknife_estimates - - # Jackknife SE formula - n = len(jackknife_estimates) - mean_est = np.mean(jackknife_estimates) - se = np.sqrt((n - 1) / n * np.sum((jackknife_estimates - mean_est) ** 2)) - - return float(se), jackknife_estimates - def fit( self, data: pd.DataFrame, @@ -2175,16 +2069,10 @@ def fit( # Step 4: Variance estimation # Use effective_lambda (converted values) to ensure SE is computed with same # parameters as point estimation. This fixes the variance inconsistency issue. - if self.variance_method == "bootstrap": - se, bootstrap_dist = self._bootstrap_variance( - data, outcome, treatment, unit, time, - effective_lambda, Y=Y, D=D, control_unit_idx=control_unit_idx - ) - else: - se, bootstrap_dist = self._jackknife_variance( - Y, D, control_mask, control_unit_idx, effective_lambda, - n_units, n_periods - ) + se, bootstrap_dist = self._bootstrap_variance( + data, outcome, treatment, unit, time, + effective_lambda, Y=Y, D=D, control_unit_idx=control_unit_idx + ) # Compute test statistics if se > 0: @@ -2221,11 +2109,10 @@ def fit( factor_matrix=L_hat, effective_rank=effective_rank, loocv_score=best_score, - variance_method=self.variance_method, alpha=self.alpha, n_pre_periods=n_pre_periods, n_post_periods=n_post_periods, - n_bootstrap=self.n_bootstrap if self.variance_method == "bootstrap" else None, + n_bootstrap=self.n_bootstrap, bootstrap_distribution=bootstrap_dist if len(bootstrap_dist) > 0 else None, ) @@ -2323,7 +2210,7 @@ def _compute_observation_weights( # Weight matrix: outer product (n_periods x n_units) return np.outer(time_weights, unit_weights) - # Fallback: compute from scratch (used in bootstrap/jackknife) + # Fallback: compute from scratch (used in bootstrap) # Time distance: |t - s| following paper's Equation 3 (page 7) dist_time = np.abs(np.arange(n_periods) - t) time_weights = np.exp(-lambda_time * dist_time) @@ -2904,103 +2791,6 @@ def _bootstrap_variance( se = np.std(bootstrap_estimates, ddof=1) return float(se), bootstrap_estimates - def _jackknife_variance( - self, - Y: np.ndarray, - D: np.ndarray, - control_mask: np.ndarray, - control_unit_idx: np.ndarray, - optimal_lambda: Tuple[float, float, float], - n_units: int, - n_periods: int, - ) -> Tuple[float, np.ndarray]: - """ - Compute jackknife standard error (leave-one-unit-out). - - Uses observation-specific weights following Algorithm 2. - - Parameters - ---------- - Y : np.ndarray - Outcome matrix. - D : np.ndarray - Treatment matrix. - control_mask : np.ndarray - Control observation mask. - control_unit_idx : np.ndarray - Indices of control units. - optimal_lambda : tuple - Optimal tuning parameters. - n_units : int - Number of units. - n_periods : int - Number of periods. - - Returns - ------- - tuple - (se, jackknife_estimates). - """ - lambda_time, lambda_unit, lambda_nn = optimal_lambda - jackknife_estimates = [] - - # Get treated unit indices - treated_unit_idx = np.where(np.any(D == 1, axis=0))[0] - - for leave_out in treated_unit_idx: - # Create mask excluding this unit - Y_jack = Y.copy() - D_jack = D.copy() - Y_jack[:, leave_out] = np.nan - D_jack[:, leave_out] = 0 - - control_mask_jack = D_jack == 0 - - # Get remaining treated observations - treated_obs_jack = [(t, i) for t in range(n_periods) for i in range(n_units) - if D_jack[t, i] == 1] - - if not treated_obs_jack: - continue - - try: - # Compute ATT using observation-specific weights (Algorithm 2) - tau_values = [] - for t, i in treated_obs_jack: - # Compute observation-specific weights for this (i, t) - weight_matrix = self._compute_observation_weights( - Y_jack, D_jack, i, t, lambda_time, lambda_unit, - control_unit_idx, n_units, n_periods - ) - - # Fit model with these weights - alpha, beta, L = self._estimate_model( - Y_jack, control_mask_jack, weight_matrix, lambda_nn, - n_units, n_periods - ) - - # Compute treatment effect - tau = Y_jack[t, i] - alpha[i] - beta[t] - L[t, i] - tau_values.append(tau) - - if tau_values: - jackknife_estimates.append(np.mean(tau_values)) - - except (np.linalg.LinAlgError, ValueError): - continue - - jackknife_estimates = np.array(jackknife_estimates) - - if len(jackknife_estimates) < 2: - return 0.0, jackknife_estimates - - # Jackknife SE formula - n = len(jackknife_estimates) - mean_est = np.mean(jackknife_estimates) - se = np.sqrt((n - 1) / n * np.sum((jackknife_estimates - mean_est) ** 2)) - - return se, jackknife_estimates - def _fit_with_fixed_lambda( self, data: pd.DataFrame, @@ -3086,7 +2876,6 @@ def get_params(self) -> Dict[str, Any]: "max_iter": self.max_iter, "tol": self.tol, "alpha": self.alpha, - "variance_method": self.variance_method, "n_bootstrap": self.n_bootstrap, "max_loocv_samples": self.max_loocv_samples, "seed": self.seed, diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 7b337a3c..1bf16b94 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -550,8 +550,7 @@ Q(λ) = Σ_{j,s: D_js=0} [τ̂_js^loocv(λ)]² - This ensures λ selection only considers fully estimable combinations *Standard errors:* -- Default: Block bootstrap preserving panel structure (Algorithm 3) -- Alternative: Jackknife (leave-one-unit-out) — **implementation addition** not described in the paper +- Block bootstrap preserving panel structure (Algorithm 3) *Edge cases:* - Rank selection: automatic via cross-validation, information criterion, or elbow @@ -661,7 +660,7 @@ For joint method, LOOCV works as follows: - **Simultaneous adoption (enforced)**: The joint method requires all treated units to receive treatment at the same time. A `ValueError` is raised if staggered adoption is detected (units first treated at different periods). Treatment timing is - inferred once and held constant for bootstrap/jackknife variance estimation. + inferred once and held constant for bootstrap variance estimation. For staggered adoption designs, use `method="twostep"`. **Reference**: Adapted from reference implementation. See also Athey et al. (2025). @@ -974,7 +973,7 @@ should be a deliberate user choice. | SunAbraham | Cluster-robust + delta method | Pairs bootstrap | | SyntheticDiD | Placebo variance (Alg 4) | Block bootstrap | | TripleDifference | HC1 / cluster-robust | Influence function for IPW/DR | -| TROP | Block bootstrap | Jackknife | +| TROP | Block bootstrap | — | | BaconDecomposition | N/A (exact decomposition) | Individual 2×2 SEs | | HonestDiD | Inherited from event study | FLCI, C-LF | | PreTrendsPower | Exact (analytical) | - | diff --git a/docs/tutorials/10_trop.ipynb b/docs/tutorials/10_trop.ipynb index ab25d9d6..ffa52b84 100644 --- a/docs/tutorials/10_trop.ipynb +++ b/docs/tutorials/10_trop.ipynb @@ -587,47 +587,14 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## 9. Variance Estimation Methods\n", - "\n", - "TROP supports two methods for variance estimation:\n", - "- **Bootstrap** (default): Unit-level stratified block bootstrap as specified in Algorithm 3 of the paper. Control and treated units are sampled separately to preserve the treatment ratio.\n", - "- **Jackknife**: Leave-one-treated-unit-out. **Note**: This is an implementation convenience, not specified in the paper. The paper's inference procedure uses bootstrap exclusively." - ] + "source": "## 9. Variance Estimation\n\nTROP uses **unit-level stratified block bootstrap** for variance estimation, as specified in Algorithm 3 of Athey et al. (2025). Control and treated units are sampled separately to preserve the treatment ratio. The number of bootstrap replications is controlled by the `n_bootstrap` parameter (default: 200)." }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Compare variance estimation methods\n", - "print(\"Variance estimation comparison:\")\n", - "print(\"=\"*50)\n", - "\n", - "for method in ['bootstrap', 'jackknife']:\n", - " trop_var = TROP(\n", - " lambda_time_grid=[1.0],\n", - " lambda_unit_grid=[1.0], \n", - " lambda_nn_grid=[0.1],\n", - " variance_method=method,\n", - " n_bootstrap=30, # Reduced for faster execution\n", - " seed=42\n", - " )\n", - " \n", - " res = trop_var.fit(\n", - " df,\n", - " outcome='outcome',\n", - " treatment='treated',\n", - " unit='unit',\n", - " time='period'\n", - " )\n", - " \n", - " print(f\"\\n{method.capitalize()}:\")\n", - " print(f\" ATT: {res.att:.4f}\")\n", - " print(f\" SE: {res.se:.4f}\")\n", - " print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")" - ] + "source": "# Bootstrap variance with different numbers of replications\nprint(\"Bootstrap replications comparison:\")\nprint(\"=\"*50)\n\nfor n_boot in [20, 50, 100]:\n trop_var = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0], \n lambda_nn_grid=[0.1],\n n_bootstrap=n_boot,\n seed=42\n )\n \n res = trop_var.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n print(f\"\\nn_bootstrap={n_boot}:\")\n print(f\" ATT: {res.att:.4f}\")\n print(f\" SE: {res.se:.4f}\")\n print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")" }, { "cell_type": "code", diff --git a/tests/test_trop.py b/tests/test_trop.py index 111dfe5a..196476c9 100644 --- a/tests/test_trop.py +++ b/tests/test_trop.py @@ -191,7 +191,6 @@ def test_bootstrap_variance(self, simple_panel_data): lambda_time_grid=[0.0, 1.0], lambda_unit_grid=[0.0, 1.0], lambda_nn_grid=[0.0, 0.1], - variance_method="bootstrap", n_bootstrap=30, seed=42 ) @@ -204,30 +203,9 @@ def test_bootstrap_variance(self, simple_panel_data): ) assert results.se > 0 - assert results.variance_method == "bootstrap" assert results.n_bootstrap == 30 assert results.bootstrap_distribution is not None - def test_jackknife_variance(self, simple_panel_data): - """Test jackknife variance estimation.""" - trop_est = TROP( - lambda_time_grid=[0.0, 1.0], - lambda_unit_grid=[0.0, 1.0], - lambda_nn_grid=[0.0, 0.1], - variance_method="jackknife", - seed=42 - ) - results = trop_est.fit( - simple_panel_data, - outcome="outcome", - treatment="treated", - unit="unit", - time="period", - ) - - assert results.se >= 0 - assert results.variance_method == "jackknife" - def test_confidence_interval(self, simple_panel_data): """Test confidence interval properties.""" trop_est = TROP( @@ -260,11 +238,6 @@ def test_get_set_params(self): trop_est.set_params(alpha=0.10) assert trop_est.alpha == 0.10 - def test_invalid_variance_method(self): - """Test error on invalid variance method.""" - with pytest.raises(ValueError): - TROP(variance_method="invalid") - def test_missing_columns(self, simple_panel_data): """Test error when column is missing.""" trop_est = TROP( @@ -541,7 +514,6 @@ def test_nan_propagation_when_se_zero(self): factor_matrix=np.zeros((10, 15)), effective_rank=2.0, loocv_score=0.5, - variance_method="bootstrap", ) # Verify that all inference fields are NaN when SE=0 @@ -1667,7 +1639,6 @@ def test_issue_d_stratified_bootstrap(self): lambda_time_grid=[0.0], lambda_unit_grid=[0.0], lambda_nn_grid=[0.0], - variance_method="bootstrap", n_bootstrap=30, seed=42 ) @@ -2207,7 +2178,6 @@ def test_variance_estimation_uses_converted_params(self, simple_panel_data): lambda_unit_grid=[0.0], lambda_nn_grid=[np.inf], # Will be converted to 1e10 internally n_bootstrap=5, - variance_method="bootstrap", seed=42 ) @@ -2827,7 +2797,6 @@ def test_joint_bootstrap_variance(self, simple_panel_data): lambda_time_grid=[0.0, 1.0], lambda_unit_grid=[0.0, 1.0], lambda_nn_grid=[0.0, 0.1], - variance_method="bootstrap", n_bootstrap=20, seed=42, ) @@ -2840,31 +2809,9 @@ def test_joint_bootstrap_variance(self, simple_panel_data): ) assert results.se > 0 - assert results.variance_method == "bootstrap" assert results.n_bootstrap == 20 assert results.bootstrap_distribution is not None - def test_joint_jackknife_variance(self, simple_panel_data): - """Joint method jackknife variance estimation works.""" - trop_est = TROP( - method="joint", - lambda_time_grid=[0.0, 1.0], - lambda_unit_grid=[0.0, 1.0], - lambda_nn_grid=[0.0, 0.1], - variance_method="jackknife", - seed=42, - ) - results = trop_est.fit( - simple_panel_data, - outcome="outcome", - treatment="treated", - unit="unit", - time="period", - ) - - assert results.se >= 0 - assert results.variance_method == "jackknife" - def test_joint_confidence_interval(self, simple_panel_data): """Joint method produces valid confidence intervals.""" trop_est = TROP( @@ -3099,41 +3046,6 @@ def test_joint_nan_exclusion_behavior(self, simple_panel_data): f"({results_dropped.att:.4f}) - true NaN exclusion" ) - def test_joint_jackknife_produces_variation(self, simple_panel_data): - """Verify jackknife produces variation across leave-out iterations. - - This tests the PR #113 fix: jackknife should truly exclude units - via weight zeroing, not imputation. If imputation were used, all - jackknife estimates would be nearly identical. - """ - trop_est = TROP( - method="joint", - lambda_time_grid=[1.0], - lambda_unit_grid=[1.0], - lambda_nn_grid=[0.0], - variance_method="jackknife", - seed=42, - ) - results = trop_est.fit( - simple_panel_data, - outcome="outcome", - treatment="treated", - unit="unit", - time="period", - ) - - # SE should be positive (variation exists) - assert results.se > 0, "Jackknife SE should be positive" - - # If we can access jackknife estimates, verify they vary - # (The SE being > 0 already implies variation, but this is more explicit) - if hasattr(results, 'bootstrap_distribution') and results.bootstrap_distribution is not None: - # For jackknife, this stores the jackknife estimates - jack_estimates = results.bootstrap_distribution - if len(jack_estimates) > 1: - estimate_std = np.std(jack_estimates) - assert estimate_std > 0, "Jackknife estimates should vary" - def test_joint_unit_no_valid_pre_gets_zero_weight(self, simple_panel_data): """Verify units with no valid pre-period data get zero weight.