diff --git a/README.md b/README.md
index f6d103b1..571753d2 100644
--- a/README.md
+++ b/README.md
@@ -1983,7 +1983,7 @@ TROP(
     max_iter=100,              # Max iterations for factor estimation
     tol=1e-6,                  # Convergence tolerance
     alpha=0.05,                # Significance level for CIs
-    n_bootstrap=200,           # Bootstrap replications
+    n_bootstrap=200,           # Bootstrap replications (minimum 2; TROP requires bootstrap for SEs)
     seed=None                  # Random seed
 )
 ```
@@ -2064,8 +2064,6 @@ SunAbraham(
 | `time` | str | Time period column |
 | `first_treat` | str | Column with first treatment period (0 for never-treated) |
 | `covariates` | list | Covariate column names |
-| `min_pre_periods` | int | Minimum pre-treatment periods to include |
-| `min_post_periods` | int | Minimum post-treatment periods to include |
 
 ### SunAbrahamResults
 
@@ -2105,6 +2103,7 @@ ImputationDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon
@@ -2159,6 +2158,7 @@ TwoStageDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs (defaults to unit)
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical GMM SEs)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon
diff --git a/TODO.md b/TODO.md
index 37485884..7d4b8ea5 100644
--- a/TODO.md
+++ b/TODO.md
@@ -28,7 +28,7 @@ Target: < 1000 lines per module for maintainability.
 | ~~`trop.py`~~ | ~~2904~~ ~2560 | ✅ Partially split: results extracted to `trop_results.py` (~340 lines) |
 | ~~`imputation.py`~~ | ~~2480~~ ~1740 | ✅ Split into imputation.py, imputation_results.py, imputation_bootstrap.py |
 | ~~`two_stage.py`~~ | ~~2209~~ ~1490 | ✅ Split into two_stage.py, two_stage_results.py, two_stage_bootstrap.py |
-| `utils.py` | 1879 | Monitor -- legacy placebo functions stay to avoid circular imports |
+| `utils.py` | 1780 | Monitor -- legacy placebo function removed |
 | `visualization.py` | 1678 | Monitor -- growing but cohesive |
 | `linalg.py` | 1537 | Monitor -- unified backend, splitting would hurt cohesion |
 | `honest_did.py` | 1511 | Acceptable |
@@ -58,18 +58,18 @@ Deferred items from PR reviews that were not addressed before merge.
 
 | Issue | Location | PR | Priority |
 |-------|----------|----|----------|
-| TwoStageDiD & ImputationDiD bootstrap hardcodes Rademacher only; no `bootstrap_weights` parameter unlike CallawaySantAnna | `two_stage_bootstrap.py`, `imputation_bootstrap.py` | #156, #141 | Medium |
-| TwoStageDiD GMM score logic duplicated between analytic/bootstrap with inconsistent NaN/overflow handling | `two_stage.py`, `two_stage_bootstrap.py` | #156 | Medium |
-| ImputationDiD weight construction duplicated between aggregation and bootstrap (drift risk) -- has explicit code comment acknowledging duplication | `imputation.py`, `imputation_bootstrap.py` | #141 | Medium |
-| ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium |
+| ~~TwoStageDiD & ImputationDiD bootstrap hardcodes Rademacher only; no `bootstrap_weights` parameter unlike CallawaySantAnna~~ | ~~`two_stage_bootstrap.py`, `imputation_bootstrap.py`~~ | ~~#156, #141~~ | ✅ Fixed: Added `bootstrap_weights` parameter to both estimators |
+| ~~TwoStageDiD GMM score logic duplicated between analytic/bootstrap with inconsistent NaN/overflow handling~~ | ~~`two_stage.py`, `two_stage_bootstrap.py`~~ | ~~#156~~ | ✅ Fixed: Unified via `_compute_gmm_scores()` static method |
+| ~~ImputationDiD weight construction duplicated between aggregation and bootstrap (drift risk)~~ | ~~`imputation.py`, `imputation_bootstrap.py`~~ | ~~#141~~ | ✅ Fixed: Extracted `_compute_target_weights()` helper in `imputation_bootstrap.py` |
+| ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails; fixing requires sparse least-squares alternatives) |
 
 #### Performance
 
 | Issue | Location | PR | Priority |
 |-------|----------|----|----------|
-| TwoStageDiD per-column `.toarray()` in loop for cluster scores | `two_stage_bootstrap.py` | #156 | Medium |
+| ~~TwoStageDiD per-column `.toarray()` in loop for cluster scores~~ | ~~`two_stage_bootstrap.py`~~ | ~~#156~~ | ✅ Fixed: Single `.toarray()` call replaces per-column loop |
 | ImputationDiD event-study SEs recompute full conservative variance per horizon (should cache A0/A1 factorization) | `imputation.py` | #141 | Low |
-| Legacy `compute_placebo_effects` uses deprecated projected-gradient weights (marked deprecated, users directed to `SyntheticDiD`) | `utils.py:1689-1691` | #145 | Low |
+| ~~Legacy `compute_placebo_effects` uses deprecated projected-gradient weights~~ | ~~`utils.py:1689-1691`~~ | ~~#145~~ | ✅ Fixed: Removed function entirely |
 | Rust faer SVD ndarray-to-faer conversion overhead (minimal vs SVD cost) | `rust/src/linalg.rs:67` | #115 | Low |
 
 #### Testing/Docs
@@ -78,11 +78,11 @@ Deferred items from PR reviews that were not addressed before merge.
 |-------|----------|----|----------|
 | Tutorial notebooks not executed in CI | `docs/tutorials/*.ipynb` | #159 | Low |
 | ~~TwoStageDiD `test_nan_propagation` is a no-op~~ | ~~`tests/test_two_stage.py:643-652`~~ | ~~#156~~ | ✅ Fixed |
-| ImputationDiD bootstrap + covariate path untested | `tests/test_imputation.py` | #141 | Low |
-| TROP `n_bootstrap >= 2` validation missing (can yield 0/NaN SE silently) | `trop.py:462` | #124 | Low |
-| SunAbraham deprecated `min_pre_periods`/`min_post_periods` still in `fit()` docstring | `sun_abraham.py:458-487` | #153 | Low |
+| ~~ImputationDiD bootstrap + covariate path untested~~ | ~~`tests/test_imputation.py`~~ | ~~#141~~ | ✅ Fixed: Added `test_bootstrap_with_covariates` |
+| ~~TROP `n_bootstrap >= 2` validation missing (can yield 0/NaN SE silently)~~ | ~~`trop.py:462`~~ | ~~#124~~ | ✅ Fixed: Added `ValueError` for `n_bootstrap < 2` |
+| ~~SunAbraham deprecated `min_pre_periods`/`min_post_periods` still in `fit()` docstring~~ | ~~`sun_abraham.py:458-487`~~ | ~~#153~~ | ✅ Fixed: Removed deprecated params from `fit()` |
 | R comparison tests spawn separate `Rscript` per test (slow CI) | `tests/test_methodology_twfe.py:294` | #139 | Low |
-| Rust TROP bootstrap SE returns 0.0 instead of NaN for <2 samples | `rust/src/trop.rs:1038-1054` | #115 | Low |
+| ~~Rust TROP bootstrap SE returns 0.0 instead of NaN for <2 samples~~ | ~~`rust/src/trop.rs:1038-1054`~~ | ~~#115~~ | ✅ Already fixed: Returns `f64::NAN` at `rust/src/trop.rs:1034` |
 
 ---
 
diff --git a/diff_diff/imputation.py b/diff_diff/imputation.py
index ae651efe..e9943ecb 100644
--- a/diff_diff/imputation.py
+++ b/diff_diff/imputation.py
@@ -22,7 +22,7 @@
 from scipy import sparse, stats
 from scipy.sparse.linalg import spsolve
 
-from diff_diff.imputation_bootstrap import ImputationDiDBootstrapMixin
+from diff_diff.imputation_bootstrap import ImputationDiDBootstrapMixin, _compute_target_weights
 from diff_diff.imputation_results import ImputationBootstrapResults, ImputationDiDResults  # noqa: F401 (re-export)
 from diff_diff.linalg import solve_ols
 from diff_diff.utils import safe_inference
@@ -63,6 +63,8 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
     n_bootstrap : int, default=0
         Number of bootstrap iterations. If 0, uses analytical inference
         (conservative variance from Theorem 3).
+    bootstrap_weights : str, default="rademacher"
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     seed : int, optional
         Random seed for reproducibility.
     rank_deficient_action : str, default="warn"
@@ -126,6 +128,7 @@ def __init__(
         alpha: float = 0.05,
         cluster: Optional[str] = None,
         n_bootstrap: int = 0,
+        bootstrap_weights: str = "rademacher",
         seed: Optional[int] = None,
         rank_deficient_action: str = "warn",
         horizon_max: Optional[int] = None,
@@ -136,6 +139,11 @@ def __init__(
                 f"rank_deficient_action must be 'warn', 'error', or 'silent', "
                 f"got '{rank_deficient_action}'"
             )
+        if bootstrap_weights not in ("rademacher", "mammen", "webb"):
+            raise ValueError(
+                f"bootstrap_weights must be 'rademacher', 'mammen', or 'webb', "
+                f"got '{bootstrap_weights}'"
+            )
         if aux_partition not in ("cohort_horizon", "cohort", "horizon"):
             raise ValueError(
                 f"aux_partition must be 'cohort_horizon', 'cohort', or 'horizon', "
@@ -146,6 +154,7 @@ def __init__(
         self.alpha = alpha
         self.cluster = cluster
         self.n_bootstrap = n_bootstrap
+        self.bootstrap_weights = bootstrap_weights
         self.seed = seed
         self.rank_deficient_action = rank_deficient_action
         self.horizon_max = horizon_max
@@ -1359,15 +1368,7 @@ def _aggregate_event_study(
             effect = float(np.mean(valid_tau))
 
             # Compute SE via conservative variance with horizon-specific weights
-            weights_h = np.zeros(int(omega_1_mask.sum()))
-            # Map h_mask (relative to df_1) to weights array
-            h_indices_in_omega1 = np.where(h_mask)[0]
-            n_valid = len(valid_tau)
-            # Only weight valid (finite) observations
-            finite_mask = np.isfinite(tau_hat[h_mask])
-            valid_h_indices = h_indices_in_omega1[finite_mask]
-            for idx in valid_h_indices:
-                weights_h[idx] = 1.0 / n_valid
+            weights_h, n_valid = _compute_target_weights(tau_hat, h_mask)
 
             se = self._compute_conservative_variance(
                 df=df,
@@ -1477,12 +1478,7 @@ def _aggregate_group(
             effect = float(np.mean(valid_tau))
 
             # Compute SE with group-specific weights
-            weights_g = np.zeros(int(omega_1_mask.sum()))
-            finite_mask = np.isfinite(tau_hat) & g_mask
-            g_indices = np.where(finite_mask)[0]
-            n_valid = len(valid_tau)
-            for idx in g_indices:
-                weights_g[idx] = 1.0 / n_valid
+            weights_g, _ = _compute_target_weights(tau_hat, g_mask)
 
             se = self._compute_conservative_variance(
                 df=df,
@@ -1664,6 +1660,7 @@ def get_params(self) -> Dict[str, Any]:
             "alpha": self.alpha,
             "cluster": self.cluster,
             "n_bootstrap": self.n_bootstrap,
+            "bootstrap_weights": self.bootstrap_weights,
             "seed": self.seed,
             "rank_deficient_action": self.rank_deficient_action,
             "horizon_max": self.horizon_max,
diff --git a/diff_diff/imputation_bootstrap.py b/diff_diff/imputation_bootstrap.py
index 7d9f1e3d..4524cfb8 100644
--- a/diff_diff/imputation_bootstrap.py
+++ b/diff_diff/imputation_bootstrap.py
@@ -19,6 +19,39 @@
 ]
 
 
+def _compute_target_weights(
+    tau_hat: np.ndarray,
+    target_mask: np.ndarray,
+) -> "tuple[np.ndarray, int]":
+    """
+    Equal weights for finite tau_hat observations within target_mask.
+
+    Used by both aggregation and bootstrap paths to avoid weight logic
+    duplication.
+
+    Parameters
+    ----------
+    tau_hat : np.ndarray
+        Per-observation treatment effects (may contain NaN).
+    target_mask : np.ndarray
+        Boolean mask selecting the target subset within tau_hat.
+
+    Returns
+    -------
+    weights : np.ndarray
+        Weight array (same length as tau_hat). 1/n_valid for finite
+        observations in target_mask, 0 elsewhere.
+    n_valid : int
+        Number of finite observations in the target subset.
+    """
+    finite_target = np.isfinite(tau_hat) & target_mask
+    n_valid = int(finite_target.sum())
+    weights = np.zeros(len(tau_hat))
+    if n_valid > 0:
+        weights[np.where(finite_target)[0]] = 1.0 / n_valid
+    return weights, n_valid
+
+
 class ImputationDiDBootstrapMixin:
     """Mixin providing bootstrap inference methods for ImputationDiD."""
 
@@ -91,7 +124,8 @@ def _precompute_bootstrap_psi(
 
         For each aggregation target (overall, per-horizon, per-group), computes
         psi_i = sum_t v_it * epsilon_tilde_it for each cluster. The multiplier
-        bootstrap then perturbs these psi sums with Rademacher weights.
+        bootstrap then perturbs these psi sums with multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``).
 
         Computational cost scales with the number of aggregation targets, since
         each target requires its own v_untreated computation (weight-dependent).
@@ -120,13 +154,10 @@ def _precompute_bootstrap_psi(
         result["overall"] = (overall_psi, cluster_ids)
 
         # Event study: per-horizon weights
-        # NOTE: weight logic duplicated from _aggregate_event_study.
-        # If weight scheme changes there, update here too.
         if event_study_effects:
             result["event_study"] = {}
             df_1 = df.loc[omega_1_mask]
             rel_times = df_1["_rel_time"].values
-            n_omega_1 = int(omega_1_mask.sum())
 
             # Balanced cohort mask (same logic as _aggregate_event_study)
             balanced_mask = None
@@ -150,24 +181,18 @@ def _precompute_bootstrap_psi(
                 h_mask = rel_times == h
                 if balanced_mask is not None:
                     h_mask = h_mask & balanced_mask
-                weights_h = np.zeros(n_omega_1)
-                finite_h = np.isfinite(tau_hat) & h_mask
-                n_valid_h = int(finite_h.sum())
+                weights_h, n_valid_h = _compute_target_weights(tau_hat, h_mask)
                 if n_valid_h == 0:
                     continue
-                weights_h[np.where(finite_h)[0]] = 1.0 / n_valid_h
 
                 psi_h, _ = self._compute_cluster_psi_sums(**common, weights=weights_h)
                 result["event_study"][h] = psi_h
 
         # Group effects: per-group weights
-        # NOTE: weight logic duplicated from _aggregate_group.
-        # If weight scheme changes there, update here too.
         if group_effects:
             result["group"] = {}
             df_1 = df.loc[omega_1_mask]
             cohorts = df_1[first_treat].values
-            n_omega_1 = int(omega_1_mask.sum())
 
             for g in group_effects:
                 if group_effects[g].get("n_obs", 0) == 0:
@@ -175,12 +200,9 @@ def _precompute_bootstrap_psi(
                 if not np.isfinite(group_effects[g].get("effect", np.nan)):
                     continue
                 g_mask = cohorts == g
-                weights_g = np.zeros(n_omega_1)
-                finite_g = np.isfinite(tau_hat) & g_mask
-                n_valid_g = int(finite_g.sum())
+                weights_g, n_valid_g = _compute_target_weights(tau_hat, g_mask)
                 if n_valid_g == 0:
                     continue
-                weights_g[np.where(finite_g)[0]] = 1.0 / n_valid_g
 
                 psi_g, _ = self._compute_cluster_psi_sums(**common, weights=weights_g)
                 result["group"][g] = psi_g
@@ -197,7 +219,8 @@ def _run_bootstrap(
         """
         Run multiplier bootstrap on pre-computed influence function sums.
 
-        Uses T_b = sum_i w_b_i * psi_i where w_b_i are Rademacher weights
+        Uses T_b = sum_i w_b_i * psi_i where w_b_i are multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``)
         and psi_i are cluster-level influence function sums from Theorem 3.
         SE = std(T_b, ddof=1).
         """
@@ -216,7 +239,7 @@ def _run_bootstrap(
 
         # Generate ALL weights upfront: shape (n_bootstrap, n_clusters)
         all_weights = _generate_bootstrap_weights_batch(
-            self.n_bootstrap, n_clusters, "rademacher", rng
+            self.n_bootstrap, n_clusters, self.bootstrap_weights, rng
         )
 
         # Overall ATT bootstrap draws
@@ -295,7 +318,7 @@ def _run_bootstrap(
 
         return ImputationBootstrapResults(
             n_bootstrap=self.n_bootstrap,
-            weight_type="rademacher",
+            weight_type=self.bootstrap_weights,
             alpha=self.alpha,
             overall_att_se=overall_se,
             overall_att_ci=overall_ci,
diff --git a/diff_diff/imputation_results.py b/diff_diff/imputation_results.py
index 6a05a5d8..6520fca4 100644
--- a/diff_diff/imputation_results.py
+++ b/diff_diff/imputation_results.py
@@ -33,7 +33,7 @@ class ImputationBootstrapResults:
     n_bootstrap : int
         Number of bootstrap iterations.
     weight_type : str
-        Type of bootstrap weights (currently "rademacher" only).
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     alpha : float
         Significance level used for confidence intervals.
     overall_att_se : float
diff --git a/diff_diff/sun_abraham.py b/diff_diff/sun_abraham.py
index 7f84fec7..e9c0bfbd 100644
--- a/diff_diff/sun_abraham.py
+++ b/diff_diff/sun_abraham.py
@@ -433,8 +433,6 @@ def fit(
         time: str,
         first_treat: str,
         covariates: Optional[List[str]] = None,
-        min_pre_periods: int = 1,
-        min_post_periods: int = 1,
     ) -> SunAbrahamResults:
         """
         Fit the Sun-Abraham estimator using saturated regression.
@@ -454,10 +452,6 @@ def fit(
             Use 0 (or np.inf) for never-treated units.
         covariates : list, optional
             List of covariate column names to include in regression.
-        min_pre_periods : int, default=1
-            **Deprecated**: Accepted but ignored. Will be removed in a future version.
-        min_post_periods : int, default=1
-            **Deprecated**: Accepted but ignored. Will be removed in a future version.
 
         Returns
         -------
@@ -469,22 +463,6 @@ def fit(
         ValueError
             If required columns are missing or data validation fails.
         """
-        # Deprecation warnings for unimplemented parameters
-        if min_pre_periods != 1:
-            warnings.warn(
-                "min_pre_periods is not yet implemented and will be ignored. "
-                "This parameter will be removed in a future version.",
-                FutureWarning,
-                stacklevel=2,
-            )
-        if min_post_periods != 1:
-            warnings.warn(
-                "min_post_periods is not yet implemented and will be ignored. "
-                "This parameter will be removed in a future version.",
-                FutureWarning,
-                stacklevel=2,
-            )
-
         # Validate inputs
         required_cols = [outcome, unit, time, first_treat]
         if covariates:
diff --git a/diff_diff/trop.py b/diff_diff/trop.py
index 1059cca0..41f02d04 100644
--- a/diff_diff/trop.py
+++ b/diff_diff/trop.py
@@ -93,7 +93,7 @@ class TROP:
     alpha : float, default=0.05
         Significance level for confidence intervals.
     n_bootstrap : int, default=200
-        Number of bootstrap replications for variance estimation.
+        Number of bootstrap replications for variance estimation. Must be >= 2.
     seed : int, optional
         Random seed for reproducibility.
 
@@ -156,6 +156,12 @@ def __init__(
         self.lambda_unit_grid = lambda_unit_grid or [0.0, 0.1, 0.5, 1.0, 2.0, 5.0]
         self.lambda_nn_grid = lambda_nn_grid or [0.0, 0.01, 0.1, 1.0, 10.0]
 
+        if n_bootstrap < 2:
+            raise ValueError(
+                "n_bootstrap must be >= 2 for TROP (bootstrap variance "
+                "estimation is always used)"
+            )
+
         self.max_iter = max_iter
         self.tol = tol
         self.alpha = alpha
diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py
index 5e03dfb6..ab1f2297 100644
--- a/diff_diff/two_stage.py
+++ b/diff_diff/two_stage.py
@@ -29,6 +29,11 @@
 from scipy import sparse
 from scipy.sparse.linalg import factorized as sparse_factorized
 
+# Maximum number of elements before falling back to per-column sparse aggregation.
+# 10M float64 elements ≈ 80 MB peak allocation. Above this, per-column .getcol()
+# trades throughput for bounded memory. Keep in sync with two_stage_bootstrap.py.
+_SPARSE_DENSE_THRESHOLD = 10_000_000
+
 from diff_diff.linalg import solve_ols
 from diff_diff.two_stage_bootstrap import TwoStageDiDBootstrapMixin
 from diff_diff.two_stage_results import TwoStageBootstrapResults, TwoStageDiDResults  # noqa: F401 (re-export)
@@ -67,6 +72,8 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin):
     n_bootstrap : int, default=0
         Number of bootstrap iterations. If 0, uses analytical GMM
         sandwich inference.
+    bootstrap_weights : str, default="rademacher"
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     seed : int, optional
         Random seed for reproducibility.
     rank_deficient_action : str, default="warn"
@@ -125,6 +132,7 @@ def __init__(
         alpha: float = 0.05,
         cluster: Optional[str] = None,
         n_bootstrap: int = 0,
+        bootstrap_weights: str = "rademacher",
         seed: Optional[int] = None,
         rank_deficient_action: str = "warn",
         horizon_max: Optional[int] = None,
@@ -134,11 +142,17 @@ def __init__(
                 f"rank_deficient_action must be 'warn', 'error', or 'silent', "
                 f"got '{rank_deficient_action}'"
             )
+        if bootstrap_weights not in ("rademacher", "mammen", "webb"):
+            raise ValueError(
+                f"bootstrap_weights must be 'rademacher', 'mammen', or 'webb', "
+                f"got '{bootstrap_weights}'"
+            )
 
         self.anticipation = anticipation
         self.alpha = alpha
         self.cluster = cluster
         self.n_bootstrap = n_bootstrap
+        self.bootstrap_weights = bootstrap_weights
         self.seed = seed
         self.rank_deficient_action = rank_deficient_action
         self.horizon_max = horizon_max
@@ -1065,6 +1079,41 @@ def _stage2_group(
 
         return group_effects
 
+    # =========================================================================
+    # GMM score computation
+    # =========================================================================
+
+    @staticmethod
+    def _compute_gmm_scores(
+        c_by_cluster: np.ndarray,
+        gamma_hat: np.ndarray,
+        s2_by_cluster: np.ndarray,
+    ) -> np.ndarray:
+        """
+        Compute per-cluster GMM scores S_g = gamma_hat' c_g - X'_{2g} eps_{2g}.
+
+        Handles NaN/overflow from rank-deficient FE by wrapping in errstate
+        and replacing non-finite values with 0.
+
+        Parameters
+        ----------
+        c_by_cluster : np.ndarray, shape (G, p)
+            Per-cluster Stage 1 scores.
+        gamma_hat : np.ndarray, shape (p, k)
+            Cross-moment correction matrix.
+        s2_by_cluster : np.ndarray, shape (G, k)
+            Per-cluster Stage 2 scores.
+
+        Returns
+        -------
+        np.ndarray, shape (G, k)
+            Per-cluster influence scores.
+        """
+        with np.errstate(invalid="ignore", divide="ignore", over="ignore"):
+            correction = np.dot(c_by_cluster, gamma_hat)
+        np.nan_to_num(correction, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
+        return correction - s2_by_cluster
+
     # =========================================================================
     # GMM Sandwich Variance (Butts & Gardner 2022)
     # =========================================================================
@@ -1178,12 +1227,19 @@ def _compute_gmm_variance(
         unique_clusters, cluster_indices = np.unique(cluster_ids, return_inverse=True)
         G = len(unique_clusters)
 
-        # Aggregate sparse rows by cluster using column-wise np.add.at
-        weighted_X10_csc = weighted_X10.tocsc()
+        n_elements = weighted_X10.shape[0] * weighted_X10.shape[1]
         c_by_cluster = np.zeros((G, p))
-        for j_col in range(p):
-            col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
-            np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        if n_elements > _SPARSE_DENSE_THRESHOLD:
+            # Per-column path: limits peak memory for large FE matrices
+            weighted_X10_csc = weighted_X10.tocsc()
+            for j_col in range(p):
+                col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        else:
+            # Dense path: faster for moderate-size matrices
+            weighted_X10_dense = weighted_X10.toarray()
+            for j_col in range(p):
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, weighted_X10_dense[:, j_col])
 
         # 3. Per-cluster Stage 2 scores: X'_{2g} eps_{2g}
         weighted_X2 = X_2 * eps_2[:, None]  # (n x k) dense
@@ -1192,11 +1248,7 @@ def _compute_gmm_variance(
             np.add.at(s2_by_cluster[:, j_col], cluster_indices, weighted_X2[:, j_col])
 
         # 4. S_g = gamma_hat' c_g - X'_{2g} eps_{2g}
-        with np.errstate(invalid="ignore", divide="ignore", over="ignore"):
-            correction = np.dot(c_by_cluster, gamma_hat)  # (G x p) @ (p x k) = (G x k)
-        # Replace NaN/inf from overflow (rank-deficient FE) with 0
-        np.nan_to_num(correction, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
-        S = correction - s2_by_cluster  # (G x k)
+        S = self._compute_gmm_scores(c_by_cluster, gamma_hat, s2_by_cluster)
 
         # 5. Meat: sum_g S_g S'_g = S' S
         with np.errstate(invalid="ignore", over="ignore"):
@@ -1304,6 +1356,7 @@ def get_params(self) -> Dict[str, Any]:
             "alpha": self.alpha,
             "cluster": self.cluster,
             "n_bootstrap": self.n_bootstrap,
+            "bootstrap_weights": self.bootstrap_weights,
             "seed": self.seed,
             "rank_deficient_action": self.rank_deficient_action,
             "horizon_max": self.horizon_max,
diff --git a/diff_diff/two_stage_bootstrap.py b/diff_diff/two_stage_bootstrap.py
index f33a5501..51d0cd63 100644
--- a/diff_diff/two_stage_bootstrap.py
+++ b/diff_diff/two_stage_bootstrap.py
@@ -15,6 +15,9 @@
 
 from diff_diff.linalg import solve_ols
 from diff_diff.staggered_bootstrap import _generate_bootstrap_weights_batch
+# Maximum number of elements before falling back to per-column sparse aggregation.
+# Keep in sync with two_stage.py.
+_SPARSE_DENSE_THRESHOLD = 10_000_000
 from diff_diff.two_stage_results import TwoStageBootstrapResults
 
 __all__ = [
@@ -106,19 +109,26 @@ def _compute_cluster_S_scores(
         unique_clusters, cluster_indices = np.unique(cluster_ids, return_inverse=True)
         G = len(unique_clusters)
 
-        weighted_X10_csc = weighted_X10.tocsc()
+        n_elements = weighted_X10.shape[0] * weighted_X10.shape[1]
         c_by_cluster = np.zeros((G, p))
-        for j_col in range(p):
-            col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
-            np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        if n_elements > _SPARSE_DENSE_THRESHOLD:
+            # Per-column path: limits peak memory for large FE matrices
+            weighted_X10_csc = weighted_X10.tocsc()
+            for j_col in range(p):
+                col_data = weighted_X10_csc.getcol(j_col).toarray().ravel()
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, col_data)
+        else:
+            # Dense path: faster for moderate-size matrices
+            weighted_X10_dense = weighted_X10.toarray()
+            for j_col in range(p):
+                np.add.at(c_by_cluster[:, j_col], cluster_indices, weighted_X10_dense[:, j_col])
 
         weighted_X2 = X_2 * eps_2[:, None]
         s2_by_cluster = np.zeros((G, k))
         for j_col in range(k):
             np.add.at(s2_by_cluster[:, j_col], cluster_indices, weighted_X2[:, j_col])
 
-        correction = np.dot(c_by_cluster, gamma_hat)
-        S = correction - s2_by_cluster
+        S = self._compute_gmm_scores(c_by_cluster, gamma_hat, s2_by_cluster)
 
         # Bread
         XtX_2 = np.dot(X_2.T, X_2)
@@ -201,7 +211,7 @@ def _run_bootstrap(
 
         n_clusters = len(unique_clusters)
         all_weights = _generate_bootstrap_weights_batch(
-            self.n_bootstrap, n_clusters, "rademacher", rng
+            self.n_bootstrap, n_clusters, self.bootstrap_weights, rng
         )
 
         # T_b = bread @ (sum_g w_bg * S_g) = bread @ (W @ S)'  per boot
@@ -385,7 +395,7 @@ def _run_bootstrap(
 
         return TwoStageBootstrapResults(
             n_bootstrap=self.n_bootstrap,
-            weight_type="rademacher",
+            weight_type=self.bootstrap_weights,
             alpha=self.alpha,
             overall_att_se=overall_se,
             overall_att_ci=overall_ci,
diff --git a/diff_diff/two_stage_results.py b/diff_diff/two_stage_results.py
index a1277726..16b4916a 100644
--- a/diff_diff/two_stage_results.py
+++ b/diff_diff/two_stage_results.py
@@ -34,7 +34,7 @@ class TwoStageBootstrapResults:
     n_bootstrap : int
         Number of bootstrap iterations.
     weight_type : str
-        Type of bootstrap weights (currently "rademacher" only).
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     alpha : float
         Significance level used for confidence intervals.
     overall_att_se : float
diff --git a/diff_diff/utils.py b/diff_diff/utils.py
index 712af691..1b4b3c60 100644
--- a/diff_diff/utils.py
+++ b/diff_diff/utils.py
@@ -1643,104 +1643,6 @@ def compute_sdid_estimator(
     return float(tau)
 
 
-def compute_placebo_effects(
-    Y_pre_control: np.ndarray,
-    Y_post_control: np.ndarray,
-    Y_pre_treated: np.ndarray,
-    unit_weights: np.ndarray,
-    time_weights: np.ndarray,
-    control_unit_ids: List[Any],
-    n_placebo: Optional[int] = None
-) -> np.ndarray:
-    """
-    Compute placebo treatment effects by treating each control as treated.
-
-    Used for inference in synthetic DiD when bootstrap is not appropriate.
-
-    Parameters
-    ----------
-    Y_pre_control : np.ndarray
-        Control outcomes in pre-treatment periods, shape (n_pre, n_control).
-    Y_post_control : np.ndarray
-        Control outcomes in post-treatment periods, shape (n_post, n_control).
-    Y_pre_treated : np.ndarray
-        Treated outcomes in pre-treatment periods, shape (n_pre,).
-    unit_weights : np.ndarray
-        Unit weights, shape (n_control,).
-    time_weights : np.ndarray
-        Time weights, shape (n_pre,).
-    control_unit_ids : list
-        List of control unit identifiers.
-    n_placebo : int, optional
-        Number of placebo tests. If None, uses all control units.
-
-    Returns
-    -------
-    np.ndarray
-        Array of placebo treatment effects.
-
-    Notes
-    -----
-    For each control unit j, we pretend it was treated and compute
-    the SDID estimate using the remaining controls. The distribution
-    of these placebo effects provides a reference for inference.
-
-    .. deprecated::
-        This function uses legacy projected-gradient weights and fixed time
-        weights. Use ``SyntheticDiD(variance_method='placebo')`` instead.
-    """
-    warnings.warn(
-        "compute_placebo_effects uses legacy projected-gradient weights and "
-        "fixed time weights, which does not match the SDID methodology "
-        "(Algorithm 4 requires re-estimating both omega and lambda per "
-        "placebo iteration). Use SyntheticDiD(variance_method='placebo') "
-        "for correct placebo inference. This function will be removed in a "
-        "future version.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
-    n_pre, n_control = Y_pre_control.shape
-
-    if n_placebo is None:
-        n_placebo = n_control
-
-    placebo_effects = []
-
-    for j in range(min(n_placebo, n_control)):
-        # Treat unit j as the "treated" unit
-        Y_pre_placebo_treated = Y_pre_control[:, j]
-        Y_post_placebo_treated = Y_post_control[:, j]
-
-        # Use remaining units as controls
-        remaining_idx = [i for i in range(n_control) if i != j]
-
-        if len(remaining_idx) == 0:
-            continue
-
-        Y_pre_remaining = Y_pre_control[:, remaining_idx]
-        Y_post_remaining = Y_post_control[:, remaining_idx]
-
-        # Recompute weights for remaining controls
-        remaining_weights = compute_synthetic_weights(
-            Y_pre_remaining,
-            Y_pre_placebo_treated
-        )
-
-        # Compute placebo effect
-        placebo_tau = compute_sdid_estimator(
-            Y_pre_remaining,
-            Y_post_remaining,
-            Y_pre_placebo_treated,
-            Y_post_placebo_treated,
-            remaining_weights,
-            time_weights
-        )
-
-        placebo_effects.append(placebo_tau)
-
-    return np.asarray(placebo_effects)
-
-
 def demean_by_group(
     data: pd.DataFrame,
     variables: List[str],
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 08116c96..59e54d02 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -427,7 +427,7 @@ where weights ŵ_{g,e} = n_{g,e} / Σ_g n_{g,e} (sample share of cohort g at eve
 - Extrapolation beyond observed event-times: not estimated
 - Event-time range: no artificial cap (estimates all available relative times, matching R's `fixest::sunab()`)
 - No post-treatment effects: returns `(NaN, NaN)` for overall ATT/SE; all inference fields (t_stat, p_value, conf_int) propagate NaN via `np.isfinite()` guards
-- `min_pre_periods`/`min_post_periods` parameters: deprecated (accepted but ignored, emit `FutureWarning`)
+- `min_pre_periods`/`min_post_periods` parameters: removed (previously deprecated with `FutureWarning`; callers passing these will now get `TypeError`)
 - Variance fallback: when full weight vector cannot be constructed for overall ATT SE, uses simplified variance (ignores covariances between periods) with `UserWarning`
 - Rank-deficient design matrix (covariate collinearity):
   - Detection: Pivoted QR decomposition with tolerance `1e-07` (R's `qr()` default)
@@ -545,7 +545,7 @@ Y_it = alpha_i + beta_t [+ X'_it * delta] + W'_it * gamma + epsilon_it
 - **treatment_effects DataFrame weights:** `weight` column uses `1/n_valid` for finite tau_hat and 0 for NaN tau_hat, consistent with the ATT estimand.
 - **Rank-deficient covariates in variance:** Covariates with NaN coefficients (dropped for rank deficiency in Step 1) are excluded from the variance design matrices `A_0`/`A_1`. Only covariates with finite coefficients participate in the `v_it` projection.
 - **Sparse variance solver:** `_compute_v_untreated_with_covariates` uses `scipy.sparse.linalg.spsolve` to solve `(A_0'A_0) z = A_1'w` without densifying the normal equations matrix. Falls back to dense `lstsq` if the sparse solver fails.
-- **Bootstrap inference:** Uses multiplier bootstrap on the Theorem 3 influence function: `psi_i = sum_t v_it * epsilon_tilde_it`. Cluster-level psi sums are pre-computed for each aggregation target (overall, per-horizon, per-group), then perturbed with Rademacher weights. This is a library extension (not in the paper) consistent with CallawaySantAnna/SunAbraham bootstrap patterns.
+- **Bootstrap inference:** Uses multiplier bootstrap on the Theorem 3 influence function: `psi_i = sum_t v_it * epsilon_tilde_it`. Cluster-level psi sums are pre-computed for each aggregation target (overall, per-horizon, per-group), then perturbed with multiplier weights (Rademacher by default; configurable via `bootstrap_weights` parameter to use Mammen or Webb weights, matching CallawaySantAnna). This is a library extension (not in the paper) consistent with CallawaySantAnna/SunAbraham bootstrap patterns.
 - **Auxiliary residuals (Equation 8):** Uses v_it-weighted tau_tilde_g formula: `tau_tilde_g = sum(v_it * tau_hat_it) / sum(v_it)` within each partition group. Zero-weight groups (common in event-study SE computation) fall back to unweighted mean.
 
 **Reference implementation(s):**
@@ -610,7 +610,7 @@ where `psi_i` is the stacked influence function for unit i across all its observ
 
 *Bootstrap:*
 
-Our implementation uses multiplier bootstrap on the GMM influence function: cluster-level `psi` sums are pre-computed, then perturbed with Rademacher weights. The R `did2s` package defaults to block bootstrap (resampling clusters with replacement). Both approaches are asymptotically valid; the multiplier bootstrap is computationally cheaper and consistent with the CallawaySantAnna/ImputationDiD bootstrap patterns in this library.
+Our implementation uses multiplier bootstrap on the GMM influence function: cluster-level `psi` sums are pre-computed, then perturbed with multiplier weights (Rademacher by default; configurable via `bootstrap_weights` parameter to use Mammen or Webb weights, matching CallawaySantAnna). The R `did2s` package defaults to block bootstrap (resampling clusters with replacement). Both approaches are asymptotically valid; the multiplier bootstrap is computationally cheaper and consistent with the CallawaySantAnna/ImputationDiD bootstrap patterns in this library.
 
 *Edge cases:*
 - **Always-treated units:** Units treated in all observed periods have no untreated observations for Stage 1 FE estimation. These are excluded with a warning listing the affected unit IDs. Their treated observations do NOT contribute to Stage 2.
@@ -941,6 +941,7 @@ Q(λ) = Σ_{j,s: D_js=0} [τ̂_js^loocv(λ)]²
   - **n_post_periods metadata**: Counts periods where D=1 is actually observed (at least one unit has D=1), not calendar periods from first treatment. In unbalanced panels where treated units are missing in some post-treatment periods, only periods with observed D=1 values are counted.
 - Wrong D specification: if user provides event-style D (only first treatment period),
   the absorbing-state validation will raise ValueError with helpful guidance
+- **Bootstrap minimum**: `n_bootstrap` must be >= 2 (enforced via `ValueError`). TROP uses bootstrap for all variance estimation — there is no analytical SE formula.
 - **LOOCV failure metadata**: When LOOCV fits fail in the Rust backend, the first failed observation coordinates (t, i) are returned to Python for informative warning messages
 - **Inference CI distribution**: After `safe_inference()` migration, CI uses t-distribution (df = max(1, n_treated_obs - 1)), consistent with p_value. Previously CI used normal-distribution while p_value used t-distribution (inconsistent). This is a minor behavioral change; CIs may be slightly wider for small n_treated_obs.
 
diff --git a/tests/test_imputation.py b/tests/test_imputation.py
index b2b9c1b8..5e2b46b7 100644
--- a/tests/test_imputation.py
+++ b/tests/test_imputation.py
@@ -425,6 +425,19 @@ def test_get_params(self):
         assert params["aux_partition"] == "cohort"
         assert params["cluster"] is None
         assert params["rank_deficient_action"] == "warn"
+        assert params["bootstrap_weights"] == "rademacher"
+
+    def test_bootstrap_weights_in_get_set_params(self):
+        """bootstrap_weights should appear in get_params and be settable."""
+        est = ImputationDiD(bootstrap_weights="mammen")
+        assert est.get_params()["bootstrap_weights"] == "mammen"
+        est.set_params(bootstrap_weights="webb")
+        assert est.bootstrap_weights == "webb"
+
+    def test_bootstrap_weights_invalid_raises(self):
+        """Invalid bootstrap_weights value should raise ValueError."""
+        with pytest.raises(ValueError, match="bootstrap_weights"):
+            ImputationDiD(bootstrap_weights="invalid")
 
     def test_set_params(self):
         """Test set_params modifies attributes."""
@@ -1076,6 +1089,97 @@ def test_bootstrap_percentile_ci(self, ci_params):
         np.testing.assert_allclose(br.overall_att_ci[0], expected_lower, rtol=1e-10)
         np.testing.assert_allclose(br.overall_att_ci[1], expected_upper, rtol=1e-10)
 
+    def test_bootstrap_weights_mammen(self, ci_params):
+        """Bootstrap with mammen weights should produce valid results."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        est = ImputationDiD(n_bootstrap=n_boot, bootstrap_weights="mammen", seed=42)
+        results = est.fit(
+            data, outcome="outcome", unit="unit", time="time", first_treat="first_treat",
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "mammen"
+        assert br.overall_att_se > 0
+        assert np.isfinite(br.overall_att_p_value)
+
+    def test_bootstrap_weights_webb(self, ci_params):
+        """Bootstrap with webb weights should produce valid results."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        est = ImputationDiD(n_bootstrap=n_boot, bootstrap_weights="webb", seed=42)
+        results = est.fit(
+            data, outcome="outcome", unit="unit", time="time", first_treat="first_treat",
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "webb"
+        assert br.overall_att_se > 0
+        assert np.isfinite(br.overall_att_p_value)
+
+    def test_bootstrap_weights_event_study(self, ci_params):
+        """Bootstrap with non-default weights should work for event study aggregation."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        est = ImputationDiD(
+            n_bootstrap=n_boot, bootstrap_weights="mammen", seed=42
+        )
+        results = est.fit(
+            data, outcome="outcome", unit="unit", time="time",
+            first_treat="first_treat", aggregate="event_study",
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "mammen"
+        assert br.event_study_ses is not None
+        assert len(br.event_study_ses) > 0
+        for h, se in br.event_study_ses.items():
+            assert se > 0, f"Non-positive SE at horizon {h}"
+
+    def test_bootstrap_weights_group(self, ci_params):
+        """Bootstrap with non-default weights should work for group aggregation."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        est = ImputationDiD(
+            n_bootstrap=n_boot, bootstrap_weights="mammen", seed=42
+        )
+        results = est.fit(
+            data, outcome="outcome", unit="unit", time="time",
+            first_treat="first_treat", aggregate="group",
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "mammen"
+        assert br.group_ses is not None
+        assert len(br.group_ses) > 0
+        for g, se in br.group_ses.items():
+            assert se > 0, f"Non-positive SE for group {g}"
+
+    def test_bootstrap_with_covariates(self, ci_params):
+        """Bootstrap should work with covariates."""
+        data = generate_test_data()
+        # Add a covariate
+        rng = np.random.default_rng(123)
+        data["x1"] = rng.normal(0, 1, len(data))
+        n_boot = ci_params.bootstrap(50)
+        est = ImputationDiD(n_bootstrap=n_boot, seed=42)
+        results = est.fit(
+            data,
+            outcome="outcome",
+            unit="unit",
+            time="time",
+            first_treat="first_treat",
+            covariates=["x1"],
+        )
+
+        assert results.bootstrap_results is not None
+        assert results.bootstrap_results.overall_att_se > 0
+        assert np.isfinite(results.bootstrap_results.overall_att_p_value)
+
 
 # =============================================================================
 # TestImputationVsOtherEstimators
diff --git a/tests/test_sun_abraham.py b/tests/test_sun_abraham.py
index ce60fb0b..d450ede8 100644
--- a/tests/test_sun_abraham.py
+++ b/tests/test_sun_abraham.py
@@ -1178,28 +1178,6 @@ def test_no_post_effects_bootstrap_returns_nan(self, ci_params):
             f"Expected (NaN, NaN) overall_conf_int, got {results.overall_conf_int}"
         )
 
-    def test_deprecated_min_pre_periods_warning(self):
-        """Test that min_pre_periods emits FutureWarning (Step 5c)."""
-        data = generate_staggered_data(seed=42)
-
-        sa = SunAbraham(n_bootstrap=0)
-        with pytest.warns(FutureWarning, match="min_pre_periods"):
-            sa.fit(
-                data, outcome="outcome", unit="unit", time="time",
-                first_treat="first_treat", min_pre_periods=2,
-            )
-
-    def test_deprecated_min_post_periods_warning(self):
-        """Test that min_post_periods emits FutureWarning (Step 5c)."""
-        data = generate_staggered_data(seed=42)
-
-        sa = SunAbraham(n_bootstrap=0)
-        with pytest.warns(FutureWarning, match="min_post_periods"):
-            sa.fit(
-                data, outcome="outcome", unit="unit", time="time",
-                first_treat="first_treat", min_post_periods=2,
-            )
-
     def test_event_time_no_truncation(self):
         """Test that event times beyond ±20 are estimated (Step 5d).
 
@@ -1464,6 +1442,15 @@ def test_never_treated_inf_encoding(self):
             f"SE differs: inf={results_inf.overall_se}, zero={results_zero.overall_se}"
         )
 
+    def test_removed_params_raise_typeerror(self):
+        """Removed min_pre_periods/min_post_periods raise TypeError."""
+        data = generate_staggered_data(n_units=30, n_periods=6, seed=42)
+        sa = SunAbraham(n_bootstrap=0)
+        with pytest.raises(TypeError, match="unexpected keyword argument"):
+            sa.fit(data, "outcome", "unit", "time", "first_treat", min_pre_periods=2)
+        with pytest.raises(TypeError, match="unexpected keyword argument"):
+            sa.fit(data, "outcome", "unit", "time", "first_treat", min_post_periods=2)
+
     def test_all_never_treated_inf_raises(self):
         """Test that all-never-treated data with np.inf encoding raises ValueError."""
         data = generate_staggered_data(n_units=100, n_periods=10, n_cohorts=3, seed=42)
diff --git a/tests/test_trop.py b/tests/test_trop.py
index 1ee95da5..41a137f2 100644
--- a/tests/test_trop.py
+++ b/tests/test_trop.py
@@ -95,6 +95,13 @@ def simple_panel_data():
 class TestTROP:
     """Tests for TROP estimator."""
 
+    def test_n_bootstrap_less_than_2_raises(self):
+        """n_bootstrap < 2 should raise ValueError."""
+        with pytest.raises(ValueError, match="n_bootstrap must be >= 2"):
+            TROP(n_bootstrap=1)
+        with pytest.raises(ValueError, match="n_bootstrap must be >= 2"):
+            TROP(n_bootstrap=0)
+
     def test_basic_fit(self, simple_panel_data):
         """Test basic model fitting."""
         trop_est = TROP(
diff --git a/tests/test_two_stage.py b/tests/test_two_stage.py
index 5450992e..ed311e00 100644
--- a/tests/test_two_stage.py
+++ b/tests/test_two_stage.py
@@ -907,6 +907,19 @@ def test_get_params(self):
         assert params["horizon_max"] == 5
         assert params["rank_deficient_action"] == "warn"
         assert params["cluster"] is None
+        assert params["bootstrap_weights"] == "rademacher"
+
+    def test_bootstrap_weights_in_get_set_params(self):
+        """bootstrap_weights should appear in get_params and be settable."""
+        est = TwoStageDiD(bootstrap_weights="mammen")
+        assert est.get_params()["bootstrap_weights"] == "mammen"
+        est.set_params(bootstrap_weights="webb")
+        assert est.bootstrap_weights == "webb"
+
+    def test_bootstrap_weights_invalid_raises(self):
+        """Invalid bootstrap_weights value should raise ValueError."""
+        with pytest.raises(ValueError, match="bootstrap_weights"):
+            TwoStageDiD(bootstrap_weights="invalid")
 
     def test_set_params(self):
         """set_params should modify attributes."""
@@ -1047,6 +1060,76 @@ def test_bootstrap_event_study(self, ci_params):
         for h, se in results.bootstrap_results.event_study_ses.items():
             assert se > 0
 
+    def test_bootstrap_weights_mammen(self, ci_params):
+        """Bootstrap with mammen weights should produce valid results."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        results = TwoStageDiD(
+            n_bootstrap=n_boot, bootstrap_weights="mammen", seed=42
+        ).fit(
+            data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "mammen"
+        assert br.overall_att_se > 0
+        assert np.isfinite(br.overall_att_p_value)
+
+    def test_bootstrap_weights_webb(self, ci_params):
+        """Bootstrap with webb weights should produce valid results."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        results = TwoStageDiD(
+            n_bootstrap=n_boot, bootstrap_weights="webb", seed=42
+        ).fit(
+            data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "webb"
+        assert br.overall_att_se > 0
+        assert np.isfinite(br.overall_att_p_value)
+
+    def test_bootstrap_weights_event_study(self, ci_params):
+        """Bootstrap with non-default weights should work for event study aggregation."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        results = TwoStageDiD(
+            n_bootstrap=n_boot, bootstrap_weights="mammen", seed=42
+        ).fit(
+            data, outcome="outcome", unit="unit", time="time",
+            first_treat="first_treat", aggregate="event_study",
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "mammen"
+        assert br.event_study_ses is not None
+        assert len(br.event_study_ses) > 0
+        for h, se in br.event_study_ses.items():
+            assert se > 0, f"Non-positive SE at horizon {h}"
+
+    def test_bootstrap_weights_group(self, ci_params):
+        """Bootstrap with non-default weights should work for group aggregation."""
+        data = generate_test_data()
+        n_boot = ci_params.bootstrap(50)
+        results = TwoStageDiD(
+            n_bootstrap=n_boot, bootstrap_weights="mammen", seed=42
+        ).fit(
+            data, outcome="outcome", unit="unit", time="time",
+            first_treat="first_treat", aggregate="group",
+        )
+
+        br = results.bootstrap_results
+        assert br is not None
+        assert br.weight_type == "mammen"
+        assert br.group_ses is not None
+        assert len(br.group_ses) > 0
+        for g, se in br.group_ses.items():
+            assert se > 0, f"Non-positive SE for group {g}"
+
 
 # =============================================================================
 # TestTwoStageDiDConvenience
@@ -1116,3 +1199,35 @@ def test_print_summary(self, capsys):
         results.print_summary()
         captured = capsys.readouterr()
         assert "Two-Stage DiD" in captured.out
+
+    def test_sparse_fallback_path(self):
+        """Size guard falls back to per-column path and produces same results."""
+        import diff_diff.two_stage as ts_mod
+
+        data = generate_test_data(n_units=50, n_periods=6, seed=42)
+
+        # Run with normal (high) threshold — uses dense path
+        result_dense = TwoStageDiD().fit(
+            data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+        )
+
+        # Patch threshold to 1 to force per-column path on all data
+        orig = ts_mod._SPARSE_DENSE_THRESHOLD
+        try:
+            ts_mod._SPARSE_DENSE_THRESHOLD = 1
+            result_sparse = TwoStageDiD().fit(
+                data,
+                outcome="outcome",
+                unit="unit",
+                time="time",
+                first_treat="first_treat",
+            )
+        finally:
+            ts_mod._SPARSE_DENSE_THRESHOLD = orig
+
+        np.testing.assert_allclose(
+            result_dense.overall_att, result_sparse.overall_att, rtol=1e-10
+        )
+        np.testing.assert_allclose(
+            result_dense.overall_se, result_sparse.overall_se, rtol=1e-10
+        )
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 3d79846d..61a0ed80 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -23,7 +23,6 @@
     check_parallel_trends_robust,
     compute_confidence_interval,
     compute_p_value,
-    compute_placebo_effects,
     compute_robust_se,
     compute_sdid_estimator,
     compute_synthetic_weights,
@@ -1046,116 +1045,6 @@ def test_tighter_margin_harder_to_pass(self, parallel_trends_data):
         assert results_wide["tost_p_value"] <= results_tight["tost_p_value"]
 
 
-# =============================================================================
-# Tests for compute_placebo_effects
-# =============================================================================
-
-
-class TestComputePlaceboEffects:
-    """Tests for compute_placebo_effects function."""
-
-    def test_returns_correct_number_of_effects(self, sdid_panel_data):
-        """Test that function returns correct number of placebo effects."""
-        # Prepare data matrices
-        control_data = sdid_panel_data[sdid_panel_data["treated"] == 0]
-        treated_data = sdid_panel_data[sdid_panel_data["treated"] == 1]
-
-        control_pivot = control_data.pivot(index="period", columns="unit", values="outcome")
-        treated_mean = treated_data.groupby("period")["outcome"].mean()
-
-        pre_periods = [0, 1, 2, 3, 4]
-        post_periods = [5, 6, 7]
-
-        Y_pre_control = control_pivot.loc[pre_periods].values
-        Y_post_control = control_pivot.loc[post_periods].values
-        Y_pre_treated = treated_mean.loc[pre_periods].values
-
-        # Compute weights
-        unit_weights = compute_synthetic_weights(Y_pre_control, Y_pre_treated)
-        time_weights = compute_time_weights(Y_pre_control, Y_post_control, zeta_lambda=1e-6)
-
-        control_units = list(control_pivot.columns)
-
-        with pytest.warns(DeprecationWarning, match="compute_placebo_effects uses legacy"):
-            placebo_effects = compute_placebo_effects(
-                Y_pre_control,
-                Y_post_control,
-                Y_pre_treated,
-                unit_weights,
-                time_weights,
-                control_units
-            )
-
-        # Should have one placebo effect per control unit
-        assert len(placebo_effects) == len(control_units)
-
-    def test_placebo_effects_distribution(self, sdid_panel_data):
-        """Test that placebo effects form a reasonable distribution."""
-        control_data = sdid_panel_data[sdid_panel_data["treated"] == 0]
-        treated_data = sdid_panel_data[sdid_panel_data["treated"] == 1]
-
-        control_pivot = control_data.pivot(index="period", columns="unit", values="outcome")
-        treated_mean = treated_data.groupby("period")["outcome"].mean()
-
-        pre_periods = [0, 1, 2, 3, 4]
-        post_periods = [5, 6, 7]
-
-        Y_pre_control = control_pivot.loc[pre_periods].values
-        Y_post_control = control_pivot.loc[post_periods].values
-        Y_pre_treated = treated_mean.loc[pre_periods].values
-
-        unit_weights = compute_synthetic_weights(Y_pre_control, Y_pre_treated)
-        time_weights = compute_time_weights(Y_pre_control, Y_post_control, zeta_lambda=1e-6)
-
-        control_units = list(control_pivot.columns)
-
-        with pytest.warns(DeprecationWarning, match="compute_placebo_effects uses legacy"):
-            placebo_effects = compute_placebo_effects(
-                Y_pre_control,
-                Y_post_control,
-                Y_pre_treated,
-                unit_weights,
-                time_weights,
-                control_units
-            )
-
-        # Placebo effects should be centered around zero (no treatment)
-        assert np.abs(np.mean(placebo_effects)) < 2.0  # Allow some deviation
-
-    def test_n_placebo_limits_output(self, sdid_panel_data):
-        """Test that n_placebo parameter limits output size."""
-        control_data = sdid_panel_data[sdid_panel_data["treated"] == 0]
-        treated_data = sdid_panel_data[sdid_panel_data["treated"] == 1]
-
-        control_pivot = control_data.pivot(index="period", columns="unit", values="outcome")
-        treated_mean = treated_data.groupby("period")["outcome"].mean()
-
-        pre_periods = [0, 1, 2, 3, 4]
-        post_periods = [5, 6, 7]
-
-        Y_pre_control = control_pivot.loc[pre_periods].values
-        Y_post_control = control_pivot.loc[post_periods].values
-        Y_pre_treated = treated_mean.loc[pre_periods].values
-
-        unit_weights = compute_synthetic_weights(Y_pre_control, Y_pre_treated)
-        time_weights = compute_time_weights(Y_pre_control, Y_post_control, zeta_lambda=1e-6)
-
-        control_units = list(control_pivot.columns)
-
-        with pytest.warns(DeprecationWarning, match="compute_placebo_effects uses legacy"):
-            placebo_effects = compute_placebo_effects(
-                Y_pre_control,
-                Y_post_control,
-                Y_pre_treated,
-                unit_weights,
-                time_weights,
-                control_units,
-                n_placebo=5
-            )
-
-        assert len(placebo_effects) == 5
-
-
 # =============================================================================
 # Additional tests for compute_synthetic_weights
 # =============================================================================