igerber
diff --git a/‎diff_diff/estimators.py‎
Lines changed: 11 additions & 2 deletions b/‎diff_diff/estimators.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎diff_diff/linalg.py‎
Lines changed: 78 additions & 49 deletions b/‎diff_diff/linalg.py‎
Lines changed: 78 additions & 49 deletions
diff --git a/‎diff_diff/results.py‎
Lines changed: 35 additions & 0 deletions b/‎diff_diff/results.py‎
Lines changed: 35 additions & 0 deletions
@@ -181,6 +181,10 @@ def fit(
             List of categorical column names for high-dimensional fixed effects.
             Uses within-transformation (demeaning) instead of dummy variables.
             More efficient for large numbers of categories (e.g., firm, individual).
+        survey_design : SurveyDesign, optional
+            Survey design specification for design-based inference. When provided,
+            uses Taylor Series Linearization for variance estimation and
+            applies sampling weights to the regression.
 
         Returns
         -------
@@ -787,6 +791,10 @@ def fit(  # type: ignore[override]
             is detected (suggests CallawaySantAnna instead). Does NOT affect
             standard error computation -- use the ``cluster`` parameter for
             cluster-robust SEs.
+        survey_design : SurveyDesign, optional
+            Survey design specification for design-based inference. When provided,
+            uses Taylor Series Linearization for variance estimation and
+            applies sampling weights to the regression.
 
         Returns
         -------
@@ -951,8 +959,8 @@ def fit(  # type: ignore[override]
         # Resolve survey design if provided
         from diff_diff.survey import _resolve_effective_cluster, _resolve_survey_for_fit
 
-        resolved_survey, survey_weights, survey_weight_type, _ = _resolve_survey_for_fit(
-            survey_design, data, self.inference
+        resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
+            _resolve_survey_for_fit(survey_design, data, self.inference)
         )
 
         # Handle absorbed fixed effects (within-transformation)
@@ -1161,6 +1169,7 @@ def fit(  # type: ignore[override]
             r_squared=r_squared,
             reference_period=reference_period,
             interaction_indices=interaction_indices,
+            survey_metadata=survey_metadata,
         )
 
         self._coefficients = coefficients
 
@@ -390,6 +390,14 @@ def solve_ols(
         rank-deficient matrices. Use only when you know the design matrix is
         full rank. If the matrix is actually rank-deficient, results may be
         incorrect (minimum-norm solution instead of R-style NA handling).
+    weights : ndarray of shape (n,), optional
+        Observation weights for Weighted Least Squares. When provided,
+        minimizes sum(w_i * (y_i - X_i @ beta)^2). Weights should be
+        pre-normalized (e.g., mean=1 for pweights).
+    weight_type : str, default "pweight"
+        Type of weights: "pweight" (inverse selection probability),
+        "fweight" (frequency), or "aweight" (inverse variance).
+        Affects variance estimation but not coefficient computation.
 
     Returns
     -------
@@ -497,6 +505,11 @@ def solve_ols(
         X = X * sqrt_w[:, np.newaxis]
         y = y * sqrt_w
 
+    # When weights are present, compute vcov separately on original-scale data
+    # to avoid double-weighting. The backend only computes point estimates.
+    _weighted_vcov_external = weights is not None
+    _backend_return_vcov = return_vcov and not _weighted_vcov_external
+
     # Fast path: skip rank check and use Rust directly when requested
     # This saves O(nk²) QR overhead but won't detect rank-deficient matrices
     result = None  # Will hold the tuple from backend functions
@@ -507,23 +520,20 @@ def solve_ols(
                 X,
                 y,
                 cluster_ids=cluster_ids,
-                return_vcov=return_vcov,
+                return_vcov=_backend_return_vcov,
                 return_fitted=return_fitted,
             )
             # result is None on numerical instability → fall through
         if result is None:
-            # Fall through to Python without rank check (user guarantees full rank)
             result = _solve_ols_numpy(
                 X,
                 y,
                 cluster_ids=cluster_ids,
-                return_vcov=return_vcov,
+                return_vcov=_backend_return_vcov,
                 return_fitted=return_fitted,
                 rank_deficient_action=rank_deficient_action,
                 column_names=column_names,
                 _skip_rank_check=True,
-                weights=weights,
-                weight_type=weight_type,
             )
     else:
         # Check for rank deficiency using fast pivoted QR decomposition.
@@ -546,14 +556,13 @@ def solve_ols(
                 X,
                 y,
                 cluster_ids=cluster_ids,
-                return_vcov=return_vcov,
+                return_vcov=_backend_return_vcov,
                 return_fitted=return_fitted,
             )
 
             if result is not None:
-                # Check for NaN vcov: Rust SVD may detect rank-deficiency that QR missed
                 vcov_check = result[-1]
-                if return_vcov and vcov_check is not None and np.any(np.isnan(vcov_check)):
+                if _backend_return_vcov and vcov_check is not None and np.any(np.isnan(vcov_check)):
                     warnings.warn(
                         "Rust backend detected ill-conditioned matrix (NaN in variance-covariance). "
                         "Re-running with Python backend for proper rank detection.",
@@ -563,35 +572,41 @@ def solve_ols(
                     result = None  # Force Python fallback below
 
         if result is None:
-            # Python backend for: weighted, rank-deficient, Rust instability, no Rust
             result = _solve_ols_numpy(
                 X,
                 y,
                 cluster_ids=cluster_ids,
-                return_vcov=return_vcov,
+                return_vcov=_backend_return_vcov,
                 return_fitted=return_fitted,
                 rank_deficient_action=rank_deficient_action,
                 column_names=column_names,
-                _precomputed_rank_info=(
-                    (rank, dropped_cols, pivot)
-                    if not (weights is not None and _original_X is not None)
-                    else None
-                ),
-                weights=weights,
-                weight_type=weight_type,
+                _precomputed_rank_info=(rank, dropped_cols, pivot),
             )
 
-    # Back-transform residuals to original scale when WLS was applied.
-    # WLS solves on transformed (X_w, y_w) but residuals should be y - X @ beta.
+    # Back-transform residuals and compute weighted vcov on original-scale data.
+    # The WLS transform (sqrt(w) scaling) is for point estimates only. Vcov must
+    # be computed on original X and residuals with weights applied exactly once.
     if _original_X is not None and _original_y is not None:
         if return_fitted:
             coefficients, _resid_w, _fitted_w, vcov_out = result
-            fitted_orig = np.dot(_original_X, coefficients)
-            residuals_orig = _original_y - fitted_orig
-            result = (coefficients, residuals_orig, fitted_orig, vcov_out)
         else:
             coefficients, _resid_w, vcov_out = result
-            residuals_orig = _original_y - np.dot(_original_X, coefficients)
+
+        fitted_orig = np.dot(_original_X, coefficients)
+        residuals_orig = _original_y - fitted_orig
+
+        if return_vcov:
+            vcov_out = _compute_robust_vcov_numpy(
+                _original_X,
+                residuals_orig,
+                cluster_ids,
+                weights=weights,
+                weight_type=weight_type,
+            )
+
+        if return_fitted:
+            result = (coefficients, residuals_orig, fitted_orig, vcov_out)
+        else:
             result = (coefficients, residuals_orig, vcov_out)
 
     return result
@@ -608,8 +623,6 @@ def _solve_ols_numpy(
     column_names: Optional[List[str]] = None,
     _precomputed_rank_info: Optional[Tuple[int, np.ndarray, np.ndarray]] = None,
     _skip_rank_check: bool = False,
-    weights: Optional[np.ndarray] = None,
-    weight_type: str = "pweight",
 ) -> Union[
     Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]],
     Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]],
@@ -716,8 +729,6 @@ def _solve_ols_numpy(
                 X_reduced,
                 residuals,
                 cluster_ids,
-                weights=weights,
-                weight_type=weight_type,
             )
             vcov = _expand_vcov_with_nan(vcov_reduced, k, kept_cols)
     else:
@@ -732,13 +743,7 @@ def _solve_ols_numpy(
         # Compute variance-covariance matrix if requested
         vcov = None
         if return_vcov:
-            vcov = _compute_robust_vcov_numpy(
-                X,
-                residuals,
-                cluster_ids,
-                weights=weights,
-                weight_type=weight_type,
-            )
+            vcov = _compute_robust_vcov_numpy(X, residuals, cluster_ids)
 
     if return_fitted:
         return coefficients, residuals, fitted, vcov
@@ -892,8 +897,8 @@ def _compute_robust_vcov_numpy(
     if weights is not None and weight_type == "fweight":
         n_eff = int(np.sum(weights))
 
-    # Compute weighted scores: pweight/fweight multiply by w; aweight and
-    # unweighted use raw residuals (aweight errors are ~homoskedastic after WLS)
+    # Compute weighted scores for cluster-robust meat (outer product of sums).
+    # pweight/fweight multiply by w; aweight and unweighted use raw residuals.
     _use_weighted_scores = weights is not None and weight_type not in ("aweight",)
     if _use_weighted_scores:
         scores = X * (weights * residuals)[:, np.newaxis]
@@ -902,8 +907,12 @@ def _compute_robust_vcov_numpy(
 
     if cluster_ids is None:
         # HC1 (heteroskedasticity-robust) standard errors
+        # For HC1, meat = X' diag(w * u²) X (NOT scores'scores which gives w²*u²)
         adjustment = n_eff / (n_eff - k)
-        meat = scores.T @ scores
+        if _use_weighted_scores:
+            meat = np.dot(X.T, X * (weights * residuals**2)[:, np.newaxis])
+        else:
+            meat = np.dot(X.T, X * (residuals**2)[:, np.newaxis])
     else:
         # Cluster-robust standard errors (vectorized via groupby)
         cluster_ids = np.asarray(cluster_ids)
@@ -1450,22 +1459,42 @@ def fit(
                 # Rank-deficient: compute vcov for identified coefficients only
                 kept_cols = np.where(~nan_mask)[0]
                 X_reduced = X[:, kept_cols]
-                mse = np.sum(residuals**2) / (n - k_effective)
-                try:
-                    vcov_reduced = np.linalg.solve(
-                        X_reduced.T @ X_reduced, mse * np.eye(k_effective)
-                    )
-                except np.linalg.LinAlgError:
-                    vcov_reduced = np.linalg.pinv(X_reduced.T @ X_reduced) * mse
+                if self.weights is not None:
+                    # Weighted classical vcov: use weighted RSS and X'WX
+                    w = self.weights
+                    mse = np.sum(w * residuals**2) / (n - k_effective)
+                    XtWX_reduced = X_reduced.T @ (X_reduced * w[:, np.newaxis])
+                    try:
+                        vcov_reduced = np.linalg.solve(XtWX_reduced, mse * np.eye(k_effective))
+                    except np.linalg.LinAlgError:
+                        vcov_reduced = np.linalg.pinv(XtWX_reduced) * mse
+                else:
+                    mse = np.sum(residuals**2) / (n - k_effective)
+                    try:
+                        vcov_reduced = np.linalg.solve(
+                            X_reduced.T @ X_reduced, mse * np.eye(k_effective)
+                        )
+                    except np.linalg.LinAlgError:
+                        vcov_reduced = np.linalg.pinv(X_reduced.T @ X_reduced) * mse
                 # Expand to full size with NaN for dropped columns
                 vcov = _expand_vcov_with_nan(vcov_reduced, k, kept_cols)
             else:
                 # Full rank: standard computation
-                mse = np.sum(residuals**2) / (n - k)
-                try:
-                    vcov = np.linalg.solve(X.T @ X, mse * np.eye(k))
-                except np.linalg.LinAlgError:
-                    vcov = np.linalg.pinv(X.T @ X) * mse
+                if self.weights is not None:
+                    # Weighted classical vcov: use weighted RSS and X'WX
+                    w = self.weights
+                    mse = np.sum(w * residuals**2) / (n - k)
+                    XtWX = X.T @ (X * w[:, np.newaxis])
+                    try:
+                        vcov = np.linalg.solve(XtWX, mse * np.eye(k))
+                    except np.linalg.LinAlgError:
+                        vcov = np.linalg.pinv(XtWX) * mse
+                else:
+                    mse = np.sum(residuals**2) / (n - k)
+                    try:
+                        vcov = np.linalg.solve(X.T @ X, mse * np.eye(k))
+                    except np.linalg.LinAlgError:
+                        vcov = np.linalg.pinv(X.T @ X) * mse
 
         # Compute survey vcov if applicable
         if _use_survey_vcov:
 
@@ -347,6 +347,8 @@ class MultiPeriodDiDResults:
     r_squared: Optional[float] = field(default=None)
     reference_period: Optional[Any] = field(default=None)
     interaction_indices: Optional[Dict[Any, int]] = field(default=None, repr=False)
+    # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
+    survey_metadata: Optional[Any] = field(default=None)
 
     def __repr__(self) -> str:
         """Concise string representation."""
@@ -400,6 +402,28 @@ def summary(self, alpha: Optional[float] = None) -> str:
         if self.r_squared is not None:
             lines.append(f"{'R-squared:':<25} {self.r_squared:>10.4f}")
 
+        # Add survey design info
+        if self.survey_metadata is not None:
+            sm = self.survey_metadata
+            lines.extend(
+                [
+                    "",
+                    "-" * 80,
+                    "Survey Design".center(80),
+                    "-" * 80,
+                    f"{'Weight type:':<25} {sm.weight_type:>10}",
+                ]
+            )
+            if sm.n_strata is not None:
+                lines.append(f"{'Strata:':<25} {sm.n_strata:>10}")
+            if sm.n_psu is not None:
+                lines.append(f"{'PSU/Cluster:':<25} {sm.n_psu:>10}")
+            lines.append(f"{'Effective sample size:':<25} {sm.effective_n:>10.1f}")
+            lines.append(f"{'Design effect (DEFF):':<25} {sm.design_effect:>10.2f}")
+            if sm.df_survey is not None:
+                lines.append(f"{'Survey d.f.:':<25} {sm.df_survey:>10}")
+            lines.append("-" * 80)
+
         # Pre-period effects (parallel trends test)
         pre_effects = {p: pe for p, pe in self.period_effects.items() if p in self.pre_periods}
         if pre_effects:
@@ -548,6 +572,17 @@ def to_dict(self) -> Dict[str, Any]:
             result[f"se_period_{period}"] = pe.se
             result[f"pval_period_{period}"] = pe.p_value
 
+        # Add survey metadata if present
+        if self.survey_metadata is not None:
+            sm = self.survey_metadata
+            result["weight_type"] = sm.weight_type
+            result["effective_n"] = sm.effective_n
+            result["design_effect"] = sm.design_effect
+            result["sum_weights"] = sm.sum_weights
+            result["n_strata"] = sm.n_strata
+            result["n_psu"] = sm.n_psu
+            result["df_survey"] = sm.df_survey
+
         return result
 
     def to_dataframe(self) -> pd.DataFrame: