Guard W_max==0 division in twostep nuclear norm solver + update docstrings

igerber · claude · igerber · commit 44124ca15f9c · 2026-03-08T17:46:17.000-04:00
- Add conditional threshold when W_max==0 to prevent ZeroDivisionError,
  matching Rust backend behavior (trop.rs:665)
- Update Python and Rust docstrings to reflect correct FISTA/Nesterov
  acceleration formulas (L_f = 2·max(W), η = 1/(2·max(W)))
- Add regression test for all-zero weights edge case

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/trop.py b/diff_diff/trop.py
@@ -1990,10 +1990,11 @@ def _weighted_nuclear_norm_solve(
         paper's Equation 2 (page 7). The full objective is:
             min_L Σ W_{ti}(R_{ti} - L_{ti})² + λ_nn||L||_*
 
-        This uses a proximal gradient / soft-impute approach (Mazumder et al. 2010):
-            L_{k+1} = prox_{λ||·||_*}(L_k + W ⊙ (R - L_k))
-
-        where W ⊙ denotes element-wise multiplication with normalized weights.
+        This uses proximal gradient descent (Mazumder et al. 2010) with
+        FISTA/Nesterov acceleration. Lipschitz constant L_f = 2·max(W),
+        step size η = 1/(2·max(W)), proximal threshold η·λ_nn:
+            G_k = L_k + (W/max(W)) ⊙ (R - L_k)
+            L_{k+1} = prox_{η·λ_nn·||·||_*}(G_k)
 
         IMPORTANT: For observations with W=0 (treated observations), we keep
         L values from the previous iteration rather than setting L = R, which
@@ -2068,7 +2069,8 @@ def _weighted_nuclear_norm_solve(
 
             # Proximal step: soft-threshold singular values
             L_prev = L.copy()
-            L = self._soft_threshold_svd(gradient_step, lambda_nn / (2.0 * W_max))
+            threshold = lambda_nn / (2.0 * W_max) if W_max > 0 else lambda_nn / 2.0
+            L = self._soft_threshold_svd(gradient_step, threshold)
             t_fista = t_fista_new
 
             # Check convergence
diff --git a/rust/src/trop.rs b/rust/src/trop.rs
@@ -620,9 +620,10 @@ fn compute_weight_matrix(
 ///
 /// Minimizes: Σ W_{ti}(Y_{ti} - α_i - β_t - L_{ti})² + λ_nn||L||_*
 ///
-/// Paper alignment: Uses weighted proximal gradient for L update:
-///   L ← prox_{η·λ_nn·||·||_*}(L + η·(W ⊙ (R - L)))
-/// where η ≤ 1/max(W) for convergence.
+/// Paper alignment: Uses weighted proximal gradient for L update with
+/// Lipschitz constant L_f = 2·max(W), step size η = 1/(2·max(W)):
+///   G = L + (W/max(W)) ⊙ (R - L)
+///   L ← prox_{η·λ_nn·||·||_*}(G)
 ///
 /// Returns None if estimation fails due to numerical issues.
 #[allow(clippy::too_many_arguments)]
diff --git a/tests/test_trop.py b/tests/test_trop.py
@@ -2693,6 +2693,26 @@ def test_twostep_nonuniform_weights_objective(self):
             f"Nuclear norm not reduced: {nuclear_norm_L} >= {nuclear_norm_R}"
         )
 
+    def test_zero_weights_no_division_error(self):
+        """Verify solver handles all-zero weights without ZeroDivisionError."""
+        rng = np.random.default_rng(99)
+        Y = rng.normal(0, 1, (6, 4))
+        W = np.zeros((6, 4))
+        L_init = rng.normal(0, 1, (6, 4))
+
+        trop_est = TROP(method="twostep", n_bootstrap=2)
+        result = trop_est._weighted_nuclear_norm_solve(
+            Y=Y,
+            W=W,
+            L_init=L_init,
+            alpha=np.zeros(4),
+            beta=np.zeros(6),
+            lambda_nn=0.3,
+        )
+
+        assert np.isfinite(result).all(), "Result contains NaN or Inf"
+        assert result.shape == (6, 4), f"Expected (6, 4), got {result.shape}"
+
 
 class TestTROPJointMethod:
     """Tests for TROP method='joint'.