From 974c66009c46220647a07acc3638479ccb108a6e Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sun, 15 Feb 2026 11:45:27 -0500
Subject: [PATCH] Remove Rust outer-loop SDID variance estimation to fix SE
 mismatch and perf regression

The Rust placebo/bootstrap variance paths used a different RNG (Xoshiro256PlusPlus)
producing different permutation sequences than Python, causing SE divergence between
backends. Rayon parallelism across all replications also saturated memory bandwidth
at 1k+ scale (3-10x slower than pure Python). Inner Frank-Wolfe weight calls still
dispatch to Rust for ~18x speedup over R.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                   |  11 +
 CLAUDE.md                      |   3 -
 diff_diff/_backend.py          |  12 -
 diff_diff/synthetic_did.py     | 103 -------
 docs/benchmarks.rst            | 141 +++++-----
 rust/src/lib.rs                |   5 -
 rust/src/sdid_variance.rs      | 498 ---------------------------------
 tests/test_methodology_sdid.py |  88 ++++++
 tests/test_rust_backend.py     | 274 ------------------
 9 files changed, 168 insertions(+), 967 deletions(-)
 delete mode 100644 rust/src/sdid_variance.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5de161c2..4bfcda5c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Changed
+- Remove Rust outer-loop variance estimation for SyntheticDiD (placebo and bootstrap)
+  - Fixes SE mismatch between pure Python and Rust backends (different RNG sequences)
+  - Fixes Rust performance regression at 1k+ scale (memory bandwidth saturation from rayon parallelism)
+  - Inner Frank-Wolfe weight computation still uses Rust when available
+
+### Documentation
+- Re-run SyntheticDiD benchmarks against R after Frank-Wolfe methodology rewrite
+- Updated `docs/benchmarks.rst` SDID validation results, performance tables, and known differences
+- ATT now matches R to < 1e-10 (previously 0.3% diff) since both use Frank-Wolfe optimizer
+
 ## [2.3.0] - 2026-02-09
 
 ### Added
diff --git a/CLAUDE.md b/CLAUDE.md
index e1d76280..a6a62d11 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -167,9 +167,6 @@ cross-platform compilation - no OpenBLAS or Intel MKL installation required.
     - `compute_unit_distance_matrix()` - Parallel pairwise RMSE distance computation (4-8x speedup)
     - `loocv_grid_search()` - Parallel LOOCV across tuning parameters (10-50x speedup)
     - `bootstrap_trop_variance()` - Parallel bootstrap variance estimation (5-15x speedup)
-  - **`rust/src/sdid_variance.rs`** - SDID variance estimation acceleration:
-    - `placebo_variance_sdid()` - Parallel placebo SE computation (~8x speedup)
-    - `bootstrap_variance_sdid()` - Parallel bootstrap SE computation (~6x speedup)
   - Uses pure-Rust `faer` library for linear algebra (no external BLAS/LAPACK dependencies)
   - Cross-platform: builds on Linux, macOS, and Windows without additional setup
   - Provides 4-8x speedup for SyntheticDiD, 5-20x speedup for TROP
diff --git a/diff_diff/_backend.py b/diff_diff/_backend.py
index dd8af93c..954f94c4 100644
--- a/diff_diff/_backend.py
+++ b/diff_diff/_backend.py
@@ -35,9 +35,6 @@
         compute_time_weights as _rust_compute_time_weights,
         compute_noise_level as _rust_compute_noise_level,
         sc_weight_fw as _rust_sc_weight_fw,
-        # SDID variance estimation (parallel placebo and bootstrap)
-        placebo_variance_sdid as _rust_placebo_variance_sdid,
-        bootstrap_variance_sdid as _rust_bootstrap_variance_sdid,
     )
     _rust_available = True
 except ImportError:
@@ -59,9 +56,6 @@
     _rust_compute_time_weights = None
     _rust_compute_noise_level = None
     _rust_sc_weight_fw = None
-    # SDID variance estimation (parallel placebo and bootstrap)
-    _rust_placebo_variance_sdid = None
-    _rust_bootstrap_variance_sdid = None
 
 # Determine final backend based on environment variable and availability
 if _backend_env == 'python':
@@ -84,9 +78,6 @@
     _rust_compute_time_weights = None
     _rust_compute_noise_level = None
     _rust_sc_weight_fw = None
-    # SDID variance estimation (parallel placebo and bootstrap)
-    _rust_placebo_variance_sdid = None
-    _rust_bootstrap_variance_sdid = None
 elif _backend_env == 'rust':
     # Force Rust mode - fail if not available
     if not _rust_available:
@@ -118,7 +109,4 @@
     '_rust_compute_time_weights',
     '_rust_compute_noise_level',
     '_rust_sc_weight_fw',
-    # SDID variance estimation (parallel placebo and bootstrap)
-    '_rust_placebo_variance_sdid',
-    '_rust_bootstrap_variance_sdid',
 ]
diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py
index 47f01438..f0591647 100644
--- a/diff_diff/synthetic_did.py
+++ b/diff_diff/synthetic_did.py
@@ -583,62 +583,6 @@ def _bootstrap_se(
         ])
         n_pre = Y_pre_control.shape[0]
 
-        # Try Rust parallel implementation for ~6x speedup
-        from diff_diff._backend import HAS_RUST_BACKEND, _rust_bootstrap_variance_sdid
-
-        if HAS_RUST_BACKEND and _rust_bootstrap_variance_sdid is not None:
-            # Generate random seed when self.seed is None
-            rust_seed = self.seed if self.seed is not None else int(
-                np.random.default_rng(None).integers(0, 2**63)
-            )
-
-            se, boot_arr, n_failed = _rust_bootstrap_variance_sdid(
-                np.ascontiguousarray(Y_pre_control, dtype=np.float64),
-                np.ascontiguousarray(Y_post_control, dtype=np.float64),
-                np.ascontiguousarray(Y_pre_treated, dtype=np.float64),
-                np.ascontiguousarray(Y_post_treated, dtype=np.float64),
-                np.ascontiguousarray(unit_weights, dtype=np.float64),
-                np.ascontiguousarray(time_weights, dtype=np.float64),
-                self.n_bootstrap, rust_seed,
-            )
-            bootstrap_estimates = np.asarray(boot_arr)
-            n_successful = len(bootstrap_estimates)
-            failure_rate = 1 - (n_successful / self.n_bootstrap)
-
-            # Apply same warning/error logic as Python path
-            if n_successful == 0:
-                raise ValueError(
-                    f"All {self.n_bootstrap} bootstrap iterations failed. "
-                    f"This typically occurs when:\n"
-                    f"  - Sample size is too small for reliable resampling\n"
-                    f"  - Weight matrices are singular or near-singular\n"
-                    f"  - Insufficient pre-treatment periods for weight estimation\n"
-                    f"  - Too few control units relative to treated units\n"
-                    f"Consider using variance_method='placebo' or increasing "
-                    f"the regularization parameters (zeta_omega, zeta_lambda)."
-                )
-            elif n_successful == 1:
-                warnings.warn(
-                    f"Only 1/{self.n_bootstrap} bootstrap iteration succeeded. "
-                    f"Standard error cannot be computed reliably (requires at least 2). "
-                    f"Returning SE=0.0. Consider using variance_method='placebo' or "
-                    f"increasing the regularization (zeta_omega, zeta_lambda).",
-                    UserWarning,
-                    stacklevel=2,
-                )
-                return 0.0, bootstrap_estimates
-            elif failure_rate > 0.05:
-                warnings.warn(
-                    f"Only {n_successful}/{self.n_bootstrap} bootstrap iterations succeeded "
-                    f"({failure_rate:.1%} failure rate). Standard errors may be unreliable. "
-                    f"This can occur with small samples or insufficient pre-treatment periods.",
-                    UserWarning,
-                    stacklevel=2,
-                )
-
-            return se, bootstrap_estimates
-
-        # Python fallback
         bootstrap_estimates = []
 
         for _ in range(self.n_bootstrap):
@@ -796,53 +740,6 @@ def _placebo_variance_se(
             )
             return 0.0, np.array([])
 
-        # Try Rust parallel implementation for ~8x speedup
-        from diff_diff._backend import HAS_RUST_BACKEND, _rust_placebo_variance_sdid
-
-        if HAS_RUST_BACKEND and _rust_placebo_variance_sdid is not None:
-            # Generate random seed when self.seed is None (matching Python's non-reproducible behavior)
-            rust_seed = self.seed if self.seed is not None else int(
-                np.random.default_rng(None).integers(0, 2**63)
-            )
-
-            se, placebo_arr = _rust_placebo_variance_sdid(
-                np.ascontiguousarray(Y_pre_control, dtype=np.float64),
-                np.ascontiguousarray(Y_post_control, dtype=np.float64),
-                np.ascontiguousarray(Y_pre_treated_mean, dtype=np.float64),
-                np.ascontiguousarray(Y_post_treated_mean, dtype=np.float64),
-                n_treated, zeta_omega, zeta_lambda, min_decrease,
-                True,   # intercept
-                100,    # max_iter_pre_sparsify
-                10000,  # max_iter
-                replications, rust_seed,
-            )
-            placebo_estimates = np.asarray(placebo_arr)
-            n_successful = len(placebo_estimates)
-
-            # Apply same warning/error logic as Python path
-            if n_successful < 2:
-                warnings.warn(
-                    f"Only {n_successful} placebo replications completed successfully. "
-                    f"Standard error cannot be estimated reliably. "
-                    f"Consider using variance_method='bootstrap' or increasing "
-                    f"the number of control units.",
-                    UserWarning,
-                    stacklevel=3,
-                )
-                return 0.0, placebo_estimates
-
-            failure_rate = 1 - (n_successful / replications)
-            if failure_rate > 0.05:
-                warnings.warn(
-                    f"Only {n_successful}/{replications} placebo replications succeeded "
-                    f"({failure_rate:.1%} failure rate). Standard errors may be unreliable.",
-                    UserWarning,
-                    stacklevel=3,
-                )
-
-            return se, placebo_estimates
-
-        # Python fallback
         placebo_estimates = []
 
         for _ in range(replications):
diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
index eee42d90..3c4366a0 100644
--- a/docs/benchmarks.rst
+++ b/docs/benchmarks.rst
@@ -88,8 +88,8 @@ Summary Table
      - Yes
      - **PASS**
    * - SyntheticDiD
-     - 0.011
-     - 3.1%
+     - < 1e-10
+     - 0.3%
      - Yes
      - **PASS**
 
@@ -173,32 +173,36 @@ Synthetic DiD Results
    :header-rows: 1
 
    * - Metric
-     - diff-diff
+     - diff-diff (Pure)
+     - diff-diff (Rust)
      - R synthdid
      - Difference
    * - ATT
-     - 3.851
      - 3.840
-     - 0.011 (0.3%)
+     - 3.840
+     - 3.840
+     - < 1e-10
    * - SE
-     - 0.106
-     - 0.103
-     - 3.1%
+     - 0.105
+     - 0.099
+     - 0.105
+     - 0.3% (pure)
    * - Time (s)
-     - 0.017
-     - 7.49
-     - **433x faster**
-
-**Validation**: PASS - Both ATT and SE estimates match closely. Both implementations
-use placebo-based variance estimation (R's Algorithm 4 from Arkhangelsky et al. 2021).
-
-The small SE difference (3.1%) is due to different unit/time weight optimization
-algorithms:
-
-- diff-diff uses projected gradient descent
-- R synthdid uses Frank-Wolfe optimization with adaptive regularization
-
-This leads to slightly different weights, which propagate to the placebo estimates.
+     - 3.41
+     - 1.65
+     - 8.19
+     - **2.4x faster** (pure)
+
+**Validation**: PASS - ATT estimates are numerically identical across all
+implementations. Both diff-diff and R's synthdid use Frank-Wolfe optimization
+with two-pass sparsification and auto-computed regularization (``zeta_omega``,
+``zeta_lambda``), producing identical unit and time weights. Both use
+placebo-based variance estimation (Algorithm 4 from Arkhangelsky et al. 2021).
+
+The small SE difference (0.3% at small scale, up to ~7% at larger scales) is
+due to Monte Carlo variance in the placebo procedure, which randomly permutes
+control units to construct pseudo-treated groups. Different random seeds across
+implementations produce slightly different placebo samples.
 
 Callaway-Sant'Anna Results
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -362,49 +366,38 @@ Three-Way Performance Summary
      - R (s)
      - Python Pure (s)
      - Python Rust (s)
-     - Rust/R
+     - Pure/R
      - Rust/Pure
    * - small
-     - 7.73
-     - 0.016
-     - 0.004
-     - **2147x**
-     - **4.4x**
+     - 8.19
+     - 3.41
+     - 1.65
+     - **2.4x**
+     - **2.1x**
    * - 1k
-     - 108.1
-     - 0.068
-     - 0.101
-     - **1070x**
-     - 0.7x
+     - 111.7
+     - 24.0
+     - 76.1
+     - **4.7x**
+     - 0.3x
    * - 5k
-     - 504.5
-     - 2.97
-     - 0.69
-     - **734x**
-     - **4.3x**
-   * - 10k
-     - 1107.2
-     - 19.54
-     - 2.58
-     - **429x**
-     - **7.6x**
-   * - 20k
-     - 2451.0
-     - 137.3
-     - 10.9
-     - **225x**
-     - **12.6x**
+     - 524.2
+     - 31.7
+     - 307.5
+     - **16.5x**
+     - 0.1x
 
 .. note::
 
-   **SyntheticDiD Performance**: diff-diff achieves **225x to 2147x speedup** over
-   R's synthdid package. At 10k scale, R takes ~18 minutes while Python Rust
-   completes in 2.6 seconds. At 20k scale, R takes ~41 minutes while Python Rust
-   completes in 11 seconds. The Rust backend provides **4-13x additional speedup**
-   over pure Python for SyntheticDiD due to optimized simplex projection and
-   synthetic weight computation. ATT estimates differ slightly due to different
-   weight optimization algorithms (projected gradient descent vs Frank-Wolfe),
-   but confidence intervals overlap.
+   **SyntheticDiD Performance**: diff-diff's pure Python backend achieves
+   **2.4x to 16.5x speedup** over R's synthdid package using the same
+   Frank-Wolfe optimization algorithm. At 5k scale, R takes ~9 minutes while
+   pure Python completes in 32 seconds. ATT estimates are numerically identical
+   (< 1e-10 difference) since both implementations use the same Frank-Wolfe
+   optimizer with two-pass sparsification. The Rust backend provides a speedup
+   at small scale (2.1x over pure Python) but is slower at larger scales due to
+   overhead in the placebo variance estimation loop; this is a known area for
+   future optimization.
 
 Dataset Sizes
 ~~~~~~~~~~~~~
@@ -457,29 +450,30 @@ Key Observations
 
    - **BasicDiD/TWFE**: 2-17x faster than R at all scales
    - **CallawaySantAnna**: 4-11x faster than R at all scales (vectorized WIF computation)
-   - **SyntheticDiD**: 225-2147x faster than R (R takes 41 minutes at 20k scale!)
+   - **SyntheticDiD**: 2.4-16.5x faster than R (pure Python), with both
+     implementations using the same Frank-Wolfe algorithm
 
 2. **Rust backend benefit depends on the estimator**:
 
-   - **SyntheticDiD**: Rust provides **4-13x speedup** over pure Python due to
-     optimized simplex projection and synthetic weight computation
+   - **SyntheticDiD**: Rust provides speedup at small scale (2.1x) but is
+     slower at larger scales due to placebo variance loop overhead
    - **BasicDiD/CallawaySantAnna**: Rust provides minimal benefit (~1x) since
      these estimators use OLS/variance computations already optimized in NumPy/SciPy
 
 3. **When to use Rust backend**:
 
-   - **SyntheticDiD**: Recommended - provides significant speedup (4-13x)
+   - **SyntheticDiD at small scale**: Rust is ~2x faster than pure Python
    - **Bootstrap inference**: May help with parallelized iterations
    - **BasicDiD/CallawaySantAnna**: Optional - pure Python is equally fast
 
 4. **Scaling behavior**: Python implementations show excellent scaling behavior
-   across all estimators. SyntheticDiD is 225x faster than R at 20k scale.
-   CallawaySantAnna achieves **exact SE accuracy** (0.0% difference) while
-   being 4-11x faster than R through vectorized NumPy operations.
+   across all estimators. SyntheticDiD pure Python is 16.5x faster than R at
+   5k scale. CallawaySantAnna achieves **exact SE accuracy** (0.0% difference)
+   while being 4-11x faster than R through vectorized NumPy operations.
 
 5. **No Rust required for most use cases**: Users without Rust/maturin can
    install diff-diff and get full functionality with excellent performance.
-   Only SyntheticDiD benefits significantly from the Rust backend.
+   Pure Python is the fastest option for SyntheticDiD at 1k+ scales.
 
 6. **CallawaySantAnna accuracy and speed**: As of v2.0.3, CallawaySantAnna
    achieves both exact numerical accuracy (0.0% SE difference from R) AND
@@ -650,9 +644,11 @@ When to Trust Results
   ``treated * time_f | unit`` interaction syntax (unit FE absorbed). Both average
   ATT and all period-level effects match to machine precision. Use with confidence.
 
-- **SyntheticDiD**: Both point estimates (0.3% diff) and standard errors (3.1% diff)
-  match R closely. Use ``variance_method="placebo"`` (default) to match R's
-  inference. Results are fully validated.
+- **SyntheticDiD**: Point estimates are numerically identical (< 1e-10 diff) and
+  standard errors match closely (0.3% diff at small scale). Both implementations
+  use Frank-Wolfe optimization with identical weights. Use
+  ``variance_method="placebo"`` (default) to match R's inference. Results are
+  fully validated.
 
 - **CallawaySantAnna**: Both group-time effects (ATT(g,t)) and overall ATT
   aggregation match R exactly. Standard errors are numerically equivalent
@@ -668,9 +664,10 @@ Known Differences
 2. **Aggregation Weights**: Overall ATT is a weighted average of ATT(g,t).
    Weighting schemes may differ between implementations.
 
-3. **Weight Optimization**: SyntheticDiD uses different optimization algorithms
-   for unit/time weights (projected gradient descent vs Frank-Wolfe). This leads
-   to slightly different weights but equivalent ATT estimates.
+3. **Placebo Variance**: SyntheticDiD SE estimates differ slightly (0.3-7%)
+   across implementations due to Monte Carlo variance in the placebo procedure.
+   Point estimates and unit/time weights are numerically identical since both
+   implementations use the same Frank-Wolfe optimizer.
 
 References
 ----------
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index e7c232c1..15606281 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -7,7 +7,6 @@ use pyo3::prelude::*;
 
 mod bootstrap;
 mod linalg;
-mod sdid_variance;
 mod trop;
 mod weights;
 
@@ -43,10 +42,6 @@ fn _rust_backend(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(trop::loocv_grid_search_joint, m)?)?;
     m.add_function(wrap_pyfunction!(trop::bootstrap_trop_variance_joint, m)?)?;
 
-    // SDID variance estimation (parallel placebo and bootstrap)
-    m.add_function(wrap_pyfunction!(sdid_variance::placebo_variance_sdid, m)?)?;
-    m.add_function(wrap_pyfunction!(sdid_variance::bootstrap_variance_sdid, m)?)?;
-
     // Version info
     m.add("__version__", env!("CARGO_PKG_VERSION"))?;
 
diff --git a/rust/src/sdid_variance.rs b/rust/src/sdid_variance.rs
deleted file mode 100644
index cae15cf4..00000000
--- a/rust/src/sdid_variance.rs
+++ /dev/null
@@ -1,498 +0,0 @@
-//! SDID variance estimation acceleration.
-//!
-//! This module provides parallelized implementations of:
-//! - Placebo-based variance estimation (Algorithm 4, Arkhangelsky et al. 2021)
-//! - Bootstrap variance estimation with fixed weights
-//!
-//! Both functions parallelize across replications using rayon, providing
-//! near-linear speedup on multi-core machines (e.g., ~8x on 8 cores).
-//!
-//! Reference:
-//! Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
-//! (2021). Synthetic Difference-in-Differences. American Economic Review,
-//! 111(12), 4088-4118.
-
-use ndarray::{Array1, Array2, Axis};
-use numpy::{PyArray1, PyReadonlyArray1, PyReadonlyArray2, ToPyArray};
-use pyo3::prelude::*;
-use rayon::prelude::*;
-
-use crate::weights;
-
-/// Fisher-Yates permutation of 0..n using the provided RNG.
-fn fisher_yates_permutation(rng: &mut impl rand::Rng, n: usize) -> Vec<usize> {
-    let mut perm: Vec<usize> = (0..n).collect();
-    for i in (1..n).rev() {
-        let j = rng.gen_range(0..=i);
-        perm.swap(i, j);
-    }
-    perm
-}
-
-/// Extract columns from a matrix by index.
-fn extract_submatrix_cols(matrix: &Array2<f64>, col_indices: &[usize]) -> Array2<f64> {
-    let n_rows = matrix.nrows();
-    let n_cols = col_indices.len();
-    let mut result = Array2::zeros((n_rows, n_cols));
-    for (new_j, &old_j) in col_indices.iter().enumerate() {
-        for i in 0..n_rows {
-            result[[i, new_j]] = matrix[[i, old_j]];
-        }
-    }
-    result
-}
-
-/// Compute column means for selected columns.
-fn column_means(matrix: &Array2<f64>, col_indices: &[usize]) -> Array1<f64> {
-    let n_rows = matrix.nrows();
-    let n_cols = col_indices.len();
-    if n_cols == 0 {
-        return Array1::zeros(n_rows);
-    }
-    let mut means = Array1::zeros(n_rows);
-    for &j in col_indices {
-        for i in 0..n_rows {
-            means[i] += matrix[[i, j]];
-        }
-    }
-    means /= n_cols as f64;
-    means
-}
-
-/// Compute the SDID estimator (ATT) from pre/post control/treated data and weights.
-///
-/// Matches `compute_sdid_estimator` in Python's `utils.py:1587-1604`:
-///
-/// ```text
-/// τ̂ = (Ȳ_treated,post - Σ_t λ_t * Y_treated,t)
-///     - Σ_j ω_j * (Ȳ_j,post - Σ_t λ_t * Y_j,t)
-/// ```
-fn sdid_estimator_internal(
-    y_pre_control: &Array2<f64>,
-    y_post_control: &Array2<f64>,
-    y_pre_treated_mean: &Array1<f64>,
-    y_post_treated_mean: &Array1<f64>,
-    unit_weights: &Array1<f64>,
-    time_weights: &Array1<f64>,
-) -> f64 {
-    // Weighted pre-treatment averages
-    // time_weights @ Y_pre_control -> (n_control,)
-    let weighted_pre_control = time_weights.dot(&y_pre_control.view());
-    // time_weights @ Y_pre_treated_mean -> scalar
-    let weighted_pre_treated = time_weights.dot(y_pre_treated_mean);
-
-    // Post-treatment averages
-    let mean_post_control = y_post_control.mean_axis(Axis(0)).unwrap();
-    let mean_post_treated = y_post_treated_mean.mean().unwrap();
-
-    // DiD for treated: post - weighted pre
-    let did_treated = mean_post_treated - weighted_pre_treated;
-
-    // Weighted DiD for controls: sum over j of omega_j * (post_j - weighted_pre_j)
-    let diff_control = &mean_post_control - &weighted_pre_control;
-    let did_control = unit_weights.dot(&diff_control);
-
-    // SDID estimator
-    did_treated - did_control
-}
-
-/// Compute placebo-based variance for SDID in parallel.
-///
-/// Implements Algorithm 4 from Arkhangelsky et al. (2021), matching R's
-/// `synthdid::vcov(method = "placebo")`:
-///
-/// 1. Randomly permute control indices
-/// 2. Split into pseudo-controls and pseudo-treated
-/// 3. Re-estimate both omega and lambda on the permuted data
-/// 4. Compute SDID estimate with re-estimated weights
-/// 5. Repeat `replications` times (in parallel)
-/// 6. SE = sqrt((r-1)/r) * sd(estimates)
-///
-/// # Arguments
-/// * `y_pre_control` - Control outcomes in pre-treatment periods (n_pre, n_control)
-/// * `y_post_control` - Control outcomes in post-treatment periods (n_post, n_control)
-/// * `y_pre_treated_mean` - Mean treated outcomes in pre-treatment (n_pre,)
-/// * `y_post_treated_mean` - Mean treated outcomes in post-treatment (n_post,)
-/// * `n_treated` - Number of treated units in the original estimation
-/// * `zeta_omega` - Regularization parameter for unit weights
-/// * `zeta_lambda` - Regularization parameter for time weights
-/// * `min_decrease` - Convergence threshold for Frank-Wolfe
-/// * `intercept` - Column-center if true (default: true)
-/// * `max_iter_pre_sparsify` - Iterations for first pass (default: 100)
-/// * `max_iter` - Iterations for second pass (default: 10000)
-/// * `replications` - Number of placebo replications
-/// * `seed` - Random seed for reproducibility
-///
-/// # Returns
-/// (se, placebo_estimates) where se is the standard error and
-/// placebo_estimates is the array of successful placebo effects.
-#[pyfunction]
-#[pyo3(signature = (y_pre_control, y_post_control, y_pre_treated_mean, y_post_treated_mean,
-                     n_treated, zeta_omega, zeta_lambda, min_decrease,
-                     intercept=true, max_iter_pre_sparsify=100, max_iter=10000,
-                     replications=200, seed=42))]
-#[allow(clippy::too_many_arguments)]
-pub fn placebo_variance_sdid<'py>(
-    py: Python<'py>,
-    y_pre_control: PyReadonlyArray2<'py, f64>,
-    y_post_control: PyReadonlyArray2<'py, f64>,
-    y_pre_treated_mean: PyReadonlyArray1<'py, f64>,
-    y_post_treated_mean: PyReadonlyArray1<'py, f64>,
-    n_treated: usize,
-    zeta_omega: f64,
-    zeta_lambda: f64,
-    min_decrease: f64,
-    intercept: bool,
-    max_iter_pre_sparsify: usize,
-    max_iter: usize,
-    replications: usize,
-    seed: u64,
-) -> PyResult<(f64, Bound<'py, PyArray1<f64>>)> {
-    // Convert to owned arrays for Send across threads
-    let y_pre_c = y_pre_control.as_array().to_owned();
-    let y_post_c = y_post_control.as_array().to_owned();
-    let _y_pre_t_mean = y_pre_treated_mean.as_array().to_owned();
-    let _y_post_t_mean = y_post_treated_mean.as_array().to_owned();
-
-    let n_control = y_pre_c.ncols();
-
-    // Check if we have enough controls for the split
-    let n_pseudo_control = n_control.saturating_sub(n_treated);
-    if n_pseudo_control < 1 {
-        let empty = Array1::<f64>::zeros(0);
-        return Ok((0.0, empty.to_pyarray_bound(py)));
-    }
-
-    // Parallel loop over replications
-    let placebo_estimates: Vec<f64> = (0..replications)
-        .into_par_iter()
-        .filter_map(|b| {
-            use rand::prelude::*;
-            use rand_xoshiro::Xoshiro256PlusPlus;
-
-            let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(b as u64));
-
-            // Generate random permutation of control indices
-            let perm = fisher_yates_permutation(&mut rng, n_control);
-
-            // Split into pseudo-controls and pseudo-treated
-            let pseudo_control_idx = &perm[..n_pseudo_control];
-            let pseudo_treated_idx = &perm[n_pseudo_control..];
-
-            // Extract submatrices
-            let y_pre_pseudo_control = extract_submatrix_cols(&y_pre_c, pseudo_control_idx);
-            let y_post_pseudo_control = extract_submatrix_cols(&y_post_c, pseudo_control_idx);
-            let y_pre_pseudo_treated_mean = column_means(&y_pre_c, pseudo_treated_idx);
-            let y_post_pseudo_treated_mean = column_means(&y_post_c, pseudo_treated_idx);
-
-            // Re-estimate unit weights on permuted data
-            let pseudo_omega = weights::compute_sdid_unit_weights_internal(
-                &y_pre_pseudo_control.view(),
-                &y_pre_pseudo_treated_mean.view(),
-                zeta_omega,
-                intercept,
-                min_decrease,
-                max_iter_pre_sparsify,
-                max_iter,
-            );
-
-            // Re-estimate time weights on permuted data
-            let pseudo_lambda = weights::compute_time_weights_internal(
-                &y_pre_pseudo_control.view(),
-                &y_post_pseudo_control.view(),
-                zeta_lambda,
-                intercept,
-                min_decrease,
-                max_iter_pre_sparsify,
-                max_iter,
-            );
-
-            // Compute placebo SDID estimate
-            let tau = sdid_estimator_internal(
-                &y_pre_pseudo_control,
-                &y_post_pseudo_control,
-                &y_pre_pseudo_treated_mean,
-                &y_post_pseudo_treated_mean,
-                &pseudo_omega,
-                &pseudo_lambda,
-            );
-
-            if tau.is_finite() {
-                Some(tau)
-            } else {
-                None
-            }
-        })
-        .collect();
-
-    let n_successful = placebo_estimates.len();
-
-    // Compute SE: sqrt((r-1)/r) * sd(estimates, ddof=1) matching R's formula
-    let se = if n_successful < 2 {
-        0.0
-    } else {
-        let n = n_successful as f64;
-        let mean = placebo_estimates.iter().sum::<f64>() / n;
-        let variance = placebo_estimates
-            .iter()
-            .map(|x| (x - mean).powi(2))
-            .sum::<f64>()
-            / (n - 1.0);
-        ((n - 1.0) / n).sqrt() * variance.sqrt()
-    };
-
-    let estimates_arr = Array1::from_vec(placebo_estimates);
-    Ok((se, estimates_arr.to_pyarray_bound(py)))
-}
-
-/// Compute bootstrap variance for SDID in parallel.
-///
-/// Resamples all units (control + treated) with replacement, renormalizes
-/// original unit weights for the resampled controls, and computes the
-/// SDID estimator with **fixed** weights (no re-estimation).
-///
-/// This matches R's `synthdid::vcov(method="bootstrap")`.
-///
-/// # Arguments
-/// * `y_pre_control` - Control outcomes in pre-treatment (n_pre, n_control)
-/// * `y_post_control` - Control outcomes in post-treatment (n_post, n_control)
-/// * `y_pre_treated` - Treated outcomes in pre-treatment (n_pre, n_treated)
-/// * `y_post_treated` - Treated outcomes in post-treatment (n_post, n_treated)
-/// * `unit_weights` - Original unit weights (n_control,)
-/// * `time_weights` - Original time weights (n_pre,)
-/// * `n_bootstrap` - Number of bootstrap iterations
-/// * `seed` - Random seed
-///
-/// # Returns
-/// (se, bootstrap_estimates, n_failed)
-#[pyfunction]
-#[pyo3(signature = (y_pre_control, y_post_control, y_pre_treated, y_post_treated,
-                     unit_weights, time_weights, n_bootstrap, seed))]
-#[allow(clippy::too_many_arguments)]
-pub fn bootstrap_variance_sdid<'py>(
-    py: Python<'py>,
-    y_pre_control: PyReadonlyArray2<'py, f64>,
-    y_post_control: PyReadonlyArray2<'py, f64>,
-    y_pre_treated: PyReadonlyArray2<'py, f64>,
-    y_post_treated: PyReadonlyArray2<'py, f64>,
-    unit_weights: PyReadonlyArray1<'py, f64>,
-    time_weights: PyReadonlyArray1<'py, f64>,
-    n_bootstrap: usize,
-    seed: u64,
-) -> PyResult<(f64, Bound<'py, PyArray1<f64>>, usize)> {
-    // Convert to owned arrays for Send across threads
-    let y_pre_c = y_pre_control.as_array().to_owned();
-    let y_post_c = y_post_control.as_array().to_owned();
-    let y_pre_t = y_pre_treated.as_array().to_owned();
-    let y_post_t = y_post_treated.as_array().to_owned();
-    let omega = unit_weights.as_array().to_owned();
-    let lambda = time_weights.as_array().to_owned();
-
-    let n_control = y_pre_c.ncols();
-    let n_treated = y_pre_t.ncols();
-    let n_total = n_control + n_treated;
-    let n_pre = y_pre_c.nrows();
-
-    // Build full panel: (n_pre+n_post, n_control+n_treated)
-    let n_post = y_post_c.nrows();
-    let n_times = n_pre + n_post;
-    let mut y_full = Array2::zeros((n_times, n_total));
-    for t in 0..n_pre {
-        for j in 0..n_control {
-            y_full[[t, j]] = y_pre_c[[t, j]];
-        }
-        for j in 0..n_treated {
-            y_full[[t, n_control + j]] = y_pre_t[[t, j]];
-        }
-    }
-    for t in 0..n_post {
-        for j in 0..n_control {
-            y_full[[n_pre + t, j]] = y_post_c[[t, j]];
-        }
-        for j in 0..n_treated {
-            y_full[[n_pre + t, n_control + j]] = y_post_t[[t, j]];
-        }
-    }
-
-    // Parallel bootstrap loop
-    let bootstrap_estimates: Vec<f64> = (0..n_bootstrap)
-        .into_par_iter()
-        .filter_map(|b| {
-            use rand::prelude::*;
-            use rand_xoshiro::Xoshiro256PlusPlus;
-
-            let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(b as u64));
-
-            // Resample all units with replacement
-            let boot_idx: Vec<usize> = (0..n_total)
-                .map(|_| rng.gen_range(0..n_total))
-                .collect();
-
-            // Identify control vs treated in resampled set
-            let mut boot_control_idx = Vec::new();
-            let mut boot_treated_idx = Vec::new();
-            for &idx in &boot_idx {
-                if idx < n_control {
-                    boot_control_idx.push(idx);
-                } else {
-                    boot_treated_idx.push(idx);
-                }
-            }
-
-            // Skip degenerate samples
-            if boot_control_idx.is_empty() || boot_treated_idx.is_empty() {
-                return None;
-            }
-
-            // Renormalize unit weights for resampled controls
-            let boot_omega_raw: Array1<f64> =
-                Array1::from_iter(boot_control_idx.iter().map(|&i| omega[i]));
-            let boot_omega = weights::sum_normalize_internal(&boot_omega_raw);
-
-            let n_boot_control = boot_control_idx.len();
-            let n_boot_treated = boot_treated_idx.len();
-
-            // Extract resampled submatrices
-            let mut y_boot_pre_c = Array2::zeros((n_pre, n_boot_control));
-            let mut y_boot_post_c = Array2::zeros((n_post, n_boot_control));
-            for (new_j, &old_j) in boot_control_idx.iter().enumerate() {
-                for t in 0..n_pre {
-                    y_boot_pre_c[[t, new_j]] = y_full[[t, old_j]];
-                }
-                for t in 0..n_post {
-                    y_boot_post_c[[t, new_j]] = y_full[[n_pre + t, old_j]];
-                }
-            }
-
-            // Compute treated means for resampled treated units
-            let mut y_boot_pre_t_mean = Array1::zeros(n_pre);
-            let mut y_boot_post_t_mean = Array1::zeros(n_post);
-            for &old_j in &boot_treated_idx {
-                for t in 0..n_pre {
-                    y_boot_pre_t_mean[t] += y_full[[t, old_j]];
-                }
-                for t in 0..n_post {
-                    y_boot_post_t_mean[t] += y_full[[n_pre + t, old_j]];
-                }
-            }
-            y_boot_pre_t_mean /= n_boot_treated as f64;
-            y_boot_post_t_mean /= n_boot_treated as f64;
-
-            // Compute ATT with FIXED time_weights and renormalized omega
-            let tau = sdid_estimator_internal(
-                &y_boot_pre_c,
-                &y_boot_post_c,
-                &y_boot_pre_t_mean,
-                &y_boot_post_t_mean,
-                &boot_omega,
-                &lambda,
-            );
-
-            if tau.is_finite() {
-                Some(tau)
-            } else {
-                None
-            }
-        })
-        .collect();
-
-    let n_successful = bootstrap_estimates.len();
-    let n_failed = n_bootstrap - n_successful;
-
-    // Compute SE: std(estimates, ddof=1) — NO sqrt((r-1)/r) for bootstrap
-    let se = if n_successful < 2 {
-        0.0
-    } else {
-        let n = n_successful as f64;
-        let mean = bootstrap_estimates.iter().sum::<f64>() / n;
-        let variance = bootstrap_estimates
-            .iter()
-            .map(|x| (x - mean).powi(2))
-            .sum::<f64>()
-            / (n - 1.0);
-        variance.sqrt()
-    };
-
-    let estimates_arr = Array1::from_vec(bootstrap_estimates);
-    Ok((se, estimates_arr.to_pyarray_bound(py), n_failed))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use ndarray::array;
-
-    #[test]
-    fn test_fisher_yates_permutation_length() {
-        use rand::SeedableRng;
-        use rand_xoshiro::Xoshiro256PlusPlus;
-        let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
-        let perm = fisher_yates_permutation(&mut rng, 10);
-        assert_eq!(perm.len(), 10);
-        // All elements should be present
-        let mut sorted = perm.clone();
-        sorted.sort();
-        assert_eq!(sorted, (0..10).collect::<Vec<_>>());
-    }
-
-    #[test]
-    fn test_fisher_yates_permutation_different_seeds() {
-        use rand::SeedableRng;
-        use rand_xoshiro::Xoshiro256PlusPlus;
-        let mut rng1 = Xoshiro256PlusPlus::seed_from_u64(42);
-        let mut rng2 = Xoshiro256PlusPlus::seed_from_u64(43);
-        let perm1 = fisher_yates_permutation(&mut rng1, 20);
-        let perm2 = fisher_yates_permutation(&mut rng2, 20);
-        assert_ne!(perm1, perm2);
-    }
-
-    #[test]
-    fn test_extract_submatrix_cols() {
-        let m = array![[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]];
-        let sub = extract_submatrix_cols(&m, &[0, 2]);
-        assert_eq!(sub.nrows(), 2);
-        assert_eq!(sub.ncols(), 2);
-        assert!((sub[[0, 0]] - 1.0).abs() < 1e-10);
-        assert!((sub[[0, 1]] - 3.0).abs() < 1e-10);
-        assert!((sub[[1, 0]] - 5.0).abs() < 1e-10);
-        assert!((sub[[1, 1]] - 7.0).abs() < 1e-10);
-    }
-
-    #[test]
-    fn test_column_means() {
-        let m = array![[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]];
-        let means = column_means(&m, &[1, 3]);
-        // Mean of columns 1 and 3:
-        // Row 0: (2+4)/2 = 3
-        // Row 1: (6+8)/2 = 7
-        assert!((means[0] - 3.0).abs() < 1e-10);
-        assert!((means[1] - 7.0).abs() < 1e-10);
-    }
-
-    #[test]
-    fn test_sdid_estimator_internal_zero_weights() {
-        // When all time weights are on last pre-period and unit weights are uniform,
-        // should reduce to a simpler DiD
-        let y_pre_c = array![[1.0, 2.0], [3.0, 4.0]]; // (2 pre, 2 control)
-        let y_post_c = array![[5.0, 6.0]]; // (1 post, 2 control)
-        let y_pre_t = array![2.0, 4.0]; // (2 pre,) means
-        let y_post_t = array![10.0]; // (1 post,) means
-
-        let omega = array![0.5, 0.5]; // uniform
-        let lambda = array![0.0, 1.0]; // all weight on last pre-period
-
-        let tau = sdid_estimator_internal(
-            &y_pre_c, &y_post_c, &y_pre_t, &y_post_t, &omega, &lambda,
-        );
-
-        // Manual computation:
-        // weighted_pre_control = [0, 1] @ [[1,2],[3,4]] = [3, 4]
-        // weighted_pre_treated = [0, 1] @ [2, 4] = 4
-        // mean_post_control = [5, 6]
-        // mean_post_treated = 10
-        // did_treated = 10 - 4 = 6
-        // did_control = [0.5, 0.5] @ ([5,6] - [3,4]) = [0.5, 0.5] @ [2, 2] = 2
-        // tau = 6 - 2 = 4
-        assert!((tau - 4.0).abs() < 1e-10, "Expected 4.0, got {}", tau);
-    }
-}
diff --git a/tests/test_methodology_sdid.py b/tests/test_methodology_sdid.py
index 3eb86645..dd3cb40d 100644
--- a/tests/test_methodology_sdid.py
+++ b/tests/test_methodology_sdid.py
@@ -1039,6 +1039,94 @@ def test_floor_equivalence(self):
         assert abs(w_floor.sum() - 1.0) < 1e-10, "Weights should sum to 1"
 
 
+class TestBackendSEConsistency:
+    """Test that pure Python and Rust-accelerated backends produce identical SEs."""
+
+    def test_placebo_se_matches_across_backends(self):
+        """SyntheticDiD placebo SE is identical regardless of backend.
+
+        After removing the Rust outer-loop variance fast paths, both backends
+        use the same Python loop for placebo/bootstrap variance. The only
+        difference is that inner Frank-Wolfe weight calls may dispatch to Rust
+        (faer) vs NumPy. With the same seed, RNG sequences are identical, so
+        SEs should match within floating-point tolerance (rtol=1e-4 allows for
+        accumulated faer vs NumPy differences across hundreds of iterations).
+        """
+        import sys
+
+        df = _make_panel(n_control=20, n_treated=3, n_pre=5, n_post=3,
+                         att=5.0, seed=42)
+        post_periods = list(range(5, 8))
+
+        # Run with default backend (Rust-accelerated inner calls if available)
+        sdid_default = SyntheticDiD(
+            variance_method="placebo", n_bootstrap=50, seed=42
+        )
+        results_default = sdid_default.fit(
+            df, "outcome", "treated", "unit", "period", post_periods
+        )
+
+        # Run with pure Python backend
+        utils_mod = sys.modules["diff_diff.utils"]
+        with patch.object(utils_mod, "HAS_RUST_BACKEND", False):
+            sdid_py = SyntheticDiD(
+                variance_method="placebo", n_bootstrap=50, seed=42
+            )
+            results_py = sdid_py.fit(
+                df.copy(), "outcome", "treated", "unit", "period", post_periods
+            )
+
+        # ATT must be identical (same data, same algorithm)
+        np.testing.assert_allclose(
+            results_default.att, results_py.att, rtol=1e-6,
+            err_msg="ATT should match between backends"
+        )
+
+        # SE must match within tolerance (faer vs NumPy in FW inner loop)
+        np.testing.assert_allclose(
+            results_default.se, results_py.se, rtol=1e-4,
+            err_msg="Placebo SE should match between backends"
+        )
+
+    def test_bootstrap_se_matches_across_backends(self):
+        """SyntheticDiD bootstrap SE is identical regardless of backend."""
+        import sys
+
+        df = _make_panel(n_control=20, n_treated=3, n_pre=5, n_post=3,
+                         att=5.0, seed=42)
+        post_periods = list(range(5, 8))
+
+        # Run with default backend
+        sdid_default = SyntheticDiD(
+            variance_method="bootstrap", n_bootstrap=50, seed=42
+        )
+        results_default = sdid_default.fit(
+            df, "outcome", "treated", "unit", "period", post_periods
+        )
+
+        # Run with pure Python backend
+        utils_mod = sys.modules["diff_diff.utils"]
+        with patch.object(utils_mod, "HAS_RUST_BACKEND", False):
+            sdid_py = SyntheticDiD(
+                variance_method="bootstrap", n_bootstrap=50, seed=42
+            )
+            results_py = sdid_py.fit(
+                df.copy(), "outcome", "treated", "unit", "period", post_periods
+            )
+
+        # ATT must match
+        np.testing.assert_allclose(
+            results_default.att, results_py.att, rtol=1e-6,
+            err_msg="ATT should match between backends"
+        )
+
+        # Bootstrap SE must match (same RNG, same loop, only inner FW differs)
+        np.testing.assert_allclose(
+            results_default.se, results_py.se, rtol=1e-4,
+            err_msg="Bootstrap SE should match between backends"
+        )
+
+
 class TestEmptyPostGuard:
     """Test compute_time_weights guard for empty Y_post_control."""
 
diff --git a/tests/test_rust_backend.py b/tests/test_rust_backend.py
index 6bcc2698..376585b6 100644
--- a/tests/test_rust_backend.py
+++ b/tests/test_rust_backend.py
@@ -1871,280 +1871,6 @@ def test_full_sdid_rust_vs_python(self):
         )
 
 
-@pytest.mark.skipif(not HAS_RUST_BACKEND, reason="Rust backend not available")
-class TestSDIDVarianceRustBackend:
-    """Test suite for SDID parallel variance estimation in Rust."""
-
-    @staticmethod
-    def _make_sdid_data(n_control=10, n_treated=2, n_pre=6, n_post=2,
-                        true_effect=3.0, seed=42):
-        """Create well-conditioned SDID test data."""
-        rng = np.random.default_rng(seed)
-        Y_pre_control = rng.standard_normal((n_pre, n_control))
-        Y_post_control = rng.standard_normal((n_post, n_control))
-        Y_pre_treated = rng.standard_normal((n_pre, n_treated))
-        Y_post_treated = Y_pre_treated[:n_post, :] + true_effect + rng.standard_normal((n_post, n_treated)) * 0.3
-
-        Y_pre_treated_mean = np.mean(Y_pre_treated, axis=1)
-        Y_post_treated_mean = np.mean(Y_post_treated, axis=1)
-
-        return (Y_pre_control, Y_post_control, Y_pre_treated,
-                Y_post_treated, Y_pre_treated_mean, Y_post_treated_mean)
-
-    def test_placebo_se_returns_valid(self):
-        """Rust placebo SE produces valid float > 0."""
-        from diff_diff._rust_backend import placebo_variance_sdid
-
-        data = self._make_sdid_data()
-        Y_pre_c, Y_post_c, _, _, Y_pre_t_mean, Y_post_t_mean = data
-
-        se, estimates = placebo_variance_sdid(
-            Y_pre_c, Y_post_c, Y_pre_t_mean, Y_post_t_mean,
-            2,    # n_treated
-            0.5,  # zeta_omega
-            0.01, # zeta_lambda
-            1e-5, # min_decrease
-            True, 100, 10000, 50, 42,
-        )
-
-        assert isinstance(se, float)
-        assert se > 0, f"SE should be > 0, got {se}"
-        assert np.isfinite(se), f"SE should be finite, got {se}"
-        assert len(estimates) > 0, "Should have some estimates"
-
-    def test_placebo_se_reproducible(self):
-        """Same seed produces same SE and estimates."""
-        from diff_diff._rust_backend import placebo_variance_sdid
-
-        data = self._make_sdid_data()
-        Y_pre_c, Y_post_c, _, _, Y_pre_t_mean, Y_post_t_mean = data
-
-        args = (Y_pre_c, Y_post_c, Y_pre_t_mean, Y_post_t_mean,
-                2, 0.5, 0.01, 1e-5, True, 100, 10000, 30, 42)
-
-        se1, est1 = placebo_variance_sdid(*args)
-        se2, est2 = placebo_variance_sdid(*args)
-
-        assert abs(se1 - se2) < 1e-10, f"SEs should match: {se1} vs {se2}"
-        np.testing.assert_array_equal(est1, est2)
-
-    def test_placebo_estimates_correct_count(self):
-        """All replications succeed on well-conditioned data."""
-        from diff_diff._rust_backend import placebo_variance_sdid
-
-        data = self._make_sdid_data()
-        Y_pre_c, Y_post_c, _, _, Y_pre_t_mean, Y_post_t_mean = data
-        replications = 50
-
-        _, estimates = placebo_variance_sdid(
-            Y_pre_c, Y_post_c, Y_pre_t_mean, Y_post_t_mean,
-            2, 0.5, 0.01, 1e-5, True, 100, 10000, replications, 42,
-        )
-
-        assert len(estimates) == replications, \
-            f"Expected {replications} estimates, got {len(estimates)}"
-
-    def test_placebo_se_statistically_matches_python(self):
-        """Rust and Python SEs are within 30% (different RNG sequences)."""
-        from diff_diff._rust_backend import placebo_variance_sdid
-        from diff_diff.utils import (
-            compute_sdid_unit_weights as py_unit_weights,
-            compute_time_weights as py_time_weights,
-            compute_sdid_estimator,
-        )
-        from diff_diff.utils import _sum_normalize
-
-        data = self._make_sdid_data(seed=123)
-        Y_pre_c, Y_post_c, _, _, Y_pre_t_mean, Y_post_t_mean = data
-        n_treated = 2
-        replications = 100
-
-        # Rust SE (average over multiple seeds for stability)
-        rust_ses = []
-        for seed in [42, 43, 44]:
-            se_r, _ = placebo_variance_sdid(
-                Y_pre_c, Y_post_c, Y_pre_t_mean, Y_post_t_mean,
-                n_treated, 0.5, 0.01, 1e-5, True, 100, 10000, replications, seed,
-            )
-            rust_ses.append(se_r)
-        rust_mean_se = np.mean(rust_ses)
-
-        # Python SE (average over multiple seeds)
-        python_ses = []
-        for seed in [42, 43, 44]:
-            rng = np.random.default_rng(seed)
-            n_control = Y_pre_c.shape[1]
-            n_pseudo_control = n_control - n_treated
-            estimates = []
-            for _ in range(replications):
-                perm = rng.permutation(n_control)
-                pc_idx = perm[:n_pseudo_control]
-                pt_idx = perm[n_pseudo_control:]
-                y_pre_pc = Y_pre_c[:, pc_idx]
-                y_post_pc = Y_post_c[:, pc_idx]
-                y_pre_pt_mean = np.mean(Y_pre_c[:, pt_idx], axis=1)
-                y_post_pt_mean = np.mean(Y_post_c[:, pt_idx], axis=1)
-                omega = py_unit_weights(y_pre_pc, y_pre_pt_mean, zeta_omega=0.5, min_decrease=1e-5)
-                lam = py_time_weights(y_pre_pc, y_post_pc, zeta_lambda=0.01, min_decrease=1e-5)
-                tau = compute_sdid_estimator(y_pre_pc, y_post_pc, y_pre_pt_mean, y_post_pt_mean, omega, lam)
-                estimates.append(tau)
-            n = len(estimates)
-            py_se = np.sqrt((n - 1) / n) * np.std(estimates, ddof=1)
-            python_ses.append(py_se)
-        python_mean_se = np.mean(python_ses)
-
-        # SEs should be in the same ballpark (within 30%)
-        ratio = rust_mean_se / python_mean_se if python_mean_se > 0 else float('inf')
-        assert 0.7 < ratio < 1.3, \
-            f"SE ratio {ratio:.2f} out of range: Rust={rust_mean_se:.4f}, Python={python_mean_se:.4f}"
-
-    def test_placebo_insufficient_controls(self):
-        """Returns (0.0, []) when n_control <= n_treated."""
-        from diff_diff._rust_backend import placebo_variance_sdid
-
-        Y_pre_c = np.random.randn(5, 2)
-        Y_post_c = np.random.randn(2, 2)
-        Y_pre_t_mean = np.random.randn(5)
-        Y_post_t_mean = np.random.randn(2)
-
-        se, estimates = placebo_variance_sdid(
-            Y_pre_c, Y_post_c, Y_pre_t_mean, Y_post_t_mean,
-            3,    # n_treated > n_control=2
-            0.5, 0.01, 1e-5, True, 100, 10000, 50, 42,
-        )
-
-        assert se == 0.0
-        assert len(estimates) == 0
-
-    def test_bootstrap_se_returns_valid(self):
-        """Rust bootstrap SE produces valid float > 0."""
-        from diff_diff._rust_backend import bootstrap_variance_sdid
-
-        data = self._make_sdid_data()
-        Y_pre_c, Y_post_c, Y_pre_t, Y_post_t, _, _ = data
-
-        # Need unit/time weights
-        omega = np.ones(Y_pre_c.shape[1]) / Y_pre_c.shape[1]
-        lam = np.ones(Y_pre_c.shape[0]) / Y_pre_c.shape[0]
-
-        se, estimates, n_failed = bootstrap_variance_sdid(
-            Y_pre_c, Y_post_c, Y_pre_t, Y_post_t,
-            omega, lam, 50, 42,
-        )
-
-        assert isinstance(se, float)
-        assert se > 0, f"SE should be > 0, got {se}"
-        assert np.isfinite(se)
-        assert len(estimates) > 0
-
-    def test_bootstrap_se_reproducible(self):
-        """Same seed produces same SE."""
-        from diff_diff._rust_backend import bootstrap_variance_sdid
-
-        data = self._make_sdid_data()
-        Y_pre_c, Y_post_c, Y_pre_t, Y_post_t, _, _ = data
-        omega = np.ones(Y_pre_c.shape[1]) / Y_pre_c.shape[1]
-        lam = np.ones(Y_pre_c.shape[0]) / Y_pre_c.shape[0]
-
-        args = (Y_pre_c, Y_post_c, Y_pre_t, Y_post_t, omega, lam, 30, 42)
-
-        se1, est1, _ = bootstrap_variance_sdid(*args)
-        se2, est2, _ = bootstrap_variance_sdid(*args)
-
-        assert abs(se1 - se2) < 1e-10
-        np.testing.assert_array_equal(est1, est2)
-
-    def test_bootstrap_se_statistically_matches_python(self):
-        """Rust and Python bootstrap SEs are within 30%."""
-        from diff_diff._rust_backend import bootstrap_variance_sdid
-        from diff_diff.utils import compute_sdid_estimator, _sum_normalize
-
-        data = self._make_sdid_data(seed=99)
-        Y_pre_c, Y_post_c, Y_pre_t, Y_post_t, _, _ = data
-        n_control = Y_pre_c.shape[1]
-        n_treated = Y_pre_t.shape[1]
-        n_total = n_control + n_treated
-        n_pre = Y_pre_c.shape[0]
-        n_bootstrap = 200
-
-        omega = np.ones(n_control) / n_control
-        lam = np.ones(n_pre) / n_pre
-
-        # Rust SE
-        se_rust, _, _ = bootstrap_variance_sdid(
-            Y_pre_c, Y_post_c, Y_pre_t, Y_post_t,
-            omega, lam, n_bootstrap, 42,
-        )
-
-        # Python SE
-        Y_full = np.block([
-            [Y_pre_c, Y_pre_t],
-            [Y_post_c, Y_post_t]
-        ])
-        rng = np.random.default_rng(42)
-        py_estimates = []
-        for _ in range(n_bootstrap):
-            boot_idx = rng.choice(n_total, size=n_total, replace=True)
-            boot_is_control = boot_idx < n_control
-            if not np.any(boot_is_control) or np.all(boot_is_control):
-                continue
-            boot_omega = _sum_normalize(omega[boot_idx[boot_is_control]])
-            Y_boot = Y_full[:, boot_idx]
-            Y_boot_pre_t_mean = np.mean(Y_boot[:n_pre, ~boot_is_control], axis=1)
-            Y_boot_post_t_mean = np.mean(Y_boot[n_pre:, ~boot_is_control], axis=1)
-            tau = compute_sdid_estimator(
-                Y_boot[:n_pre, boot_is_control],
-                Y_boot[n_pre:, boot_is_control],
-                Y_boot_pre_t_mean, Y_boot_post_t_mean,
-                boot_omega, lam,
-            )
-            py_estimates.append(tau)
-        se_python = float(np.std(py_estimates, ddof=1))
-
-        ratio = se_rust / se_python if se_python > 0 else float('inf')
-        assert 0.5 < ratio < 2.0, \
-            f"Bootstrap SE ratio {ratio:.2f}: Rust={se_rust:.4f}, Python={se_python:.4f}"
-
-    def test_full_sdid_fit_uses_rust_variance(self):
-        """SyntheticDiD.fit() uses Rust variance when available and produces valid results."""
-        import pandas as pd
-        from diff_diff import SyntheticDiD
-
-        np.random.seed(42)
-        n_control, n_treated, n_pre, n_post = 8, 2, 6, 2
-        true_effect = 3.0
-        data = []
-        for i in range(n_control + n_treated):
-            is_treated = i >= n_control
-            for t in range(n_pre + n_post):
-                post = t >= n_pre
-                y = 1.0 + 0.5 * i + 0.3 * t + np.random.randn() * 0.3
-                if is_treated and post:
-                    y += true_effect
-                data.append({
-                    'unit': i, 'time': t, 'outcome': y,
-                    'treated': 1 if is_treated else 0,
-                })
-        df = pd.DataFrame(data)
-        post_periods = list(range(n_pre, n_pre + n_post))
-
-        # Fit with placebo variance (uses Rust)
-        sdid = SyntheticDiD(variance_method="placebo", n_bootstrap=50, seed=42)
-        results = sdid.fit(df, 'outcome', 'treated', 'unit', 'time', post_periods)
-
-        assert np.isfinite(results.att), f"ATT should be finite: {results.att}"
-        assert np.isfinite(results.se), f"SE should be finite: {results.se}"
-        assert results.se > 0, f"SE should be > 0: {results.se}"
-
-        # Fit with bootstrap variance (uses Rust)
-        sdid_boot = SyntheticDiD(variance_method="bootstrap", n_bootstrap=50, seed=42)
-        results_boot = sdid_boot.fit(df, 'outcome', 'treated', 'unit', 'time', post_periods)
-
-        assert np.isfinite(results_boot.att), f"Bootstrap ATT should be finite: {results_boot.att}"
-        assert np.isfinite(results_boot.se), f"Bootstrap SE should be finite: {results_boot.se}"
-        assert results_boot.se > 0, f"Bootstrap SE should be > 0: {results_boot.se}"
-
-
 class TestFallbackWhenNoRust:
     """Test that pure Python fallback works when Rust is unavailable."""