Address AI review: runtime warning, docstring, summary refactor, edge-case test

igerber · claude · igerber · commit b732007b3e95 · 2026-04-14T08:40:28.000-04:00
- Add UserWarning in dCDH HonestDiD extraction about placebo-based pre-periods
- Update REGISTRY.md to explicitly document library extension semantics
- Update fit() docstring for honest_did (was "Reserved for Phase 3")
- Include exception class name in HonestDiD failure warning
- Factor summary() Phase 3 blocks into 5 private helper methods
- Add test_dcdh_emits_placebo_warning and test_dcdh_empty_consecutive_block_raises

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -538,7 +538,12 @@ def fit(
             pool to groups in the same set (Web Appendix Section 1.4).
             Requires ``L_max >= 1`` and time-invariant values per group.
         honest_did : bool, default=False
-            **Reserved for Phase 3** (HonestDiD integration on placebos).
+            Run HonestDiD sensitivity analysis (Rambachan & Roth 2023) on
+            the placebo + event study surface. Requires ``L_max >= 1``.
+            Default: relative magnitudes (DeltaRM, Mbar=1.0). Results
+            stored on ``results.honest_did_results``; ``None`` with a
+            warning if the solver fails. For custom parameters, call
+            ``compute_honest_did(results, ...)`` post-hoc instead.
         heterogeneity : str, optional
             Column name for a time-invariant covariate to test for
             heterogeneous effects (Web Appendix Section 1.5, Lemma 7).
@@ -2413,8 +2418,8 @@ def fit(
                 )
             except (ValueError, np.linalg.LinAlgError) as exc:
                 warnings.warn(
-                    f"HonestDiD computation failed: {exc}. "
-                    f"results.honest_did_results will be None. "
+                    f"HonestDiD computation failed ({type(exc).__name__}): "
+                    f"{exc}. results.honest_did_results will be None. "
                     f"You can retry with compute_honest_did(results, ...) "
                     f"using different parameters.",
                     UserWarning,
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -806,126 +806,12 @@ def summary(self, alpha: Optional[float] = None) -> str:
 
             lines.extend([""])
 
-        # --- Covariate adjustment diagnostics (DID^X) ---
-        if self.covariate_residuals is not None:
-            cov_df = self.covariate_residuals
-            control_names = sorted(cov_df["covariate"].unique())
-            n_baselines = cov_df["baseline_treatment"].nunique()
-            failed = int((cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum())
-            lines.extend(
-                [
-                    thin,
-                    "Covariate Adjustment (DID^X) Diagnostics".center(width),
-                    thin,
-                    f"{'Controls:':<35} {', '.join(control_names):>10}",
-                    f"{'Baselines residualized:':<35} {n_baselines:>10}",
-                    f"{'Failed strata:':<35} {failed:>10}",
-                    thin,
-                    "",
-                ]
-            )
-
-        # --- Linear trends cumulated level effects ---
-        if self.linear_trends_effects is not None:
-            lines.extend(
-                [
-                    thin,
-                    "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width),
-                    thin,
-                    header_row,
-                    thin,
-                ]
-            )
-            for l_h in sorted(self.linear_trends_effects.keys()):
-                entry = self.linear_trends_effects[l_h]
-                lines.append(
-                    _format_inference_row(
-                        f"Level_{l_h}",
-                        entry["effect"],
-                        entry["se"],
-                        entry["t_stat"],
-                        entry["p_value"],
-                    )
-                )
-            lines.extend([thin, ""])
-
-        # --- Heterogeneity test ---
-        if self.heterogeneity_effects is not None:
-            lines.extend(
-                [
-                    thin,
-                    "Heterogeneity Test (Section 1.5, partial)".center(width),
-                    thin,
-                    f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} "
-                    f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
-                    thin,
-                ]
-            )
-            for l_h in sorted(self.heterogeneity_effects.keys()):
-                entry = self.heterogeneity_effects[l_h]
-                lines.append(
-                    _format_inference_row(
-                        f"l={l_h}",
-                        entry["beta"],
-                        entry["se"],
-                        entry["t_stat"],
-                        entry["p_value"],
-                    )
-                )
-            lines.extend(
-                [
-                    thin,
-                    "Note: Post-treatment regressions only (no placebo/joint test).",
-                    "",
-                ]
-            )
-
-        # --- Design-2 switch-in / switch-out ---
-        if self.design2_effects is not None:
-            d2 = self.design2_effects
-            si = d2.get("switch_in", {})
-            so = d2.get("switch_out", {})
-            lines.extend(
-                [
-                    thin,
-                    "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width),
-                    thin,
-                    f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}",
-                    f"{'Switch-in effect (mean):':<35} "
-                    f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}"
-                    f"  (N={si.get('n_groups', 0)})",
-                    f"{'Switch-out effect (mean):':<35} "
-                    f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}"
-                    f"  (N={so.get('n_groups', 0)})",
-                    thin,
-                    "",
-                ]
-            )
-
-        # --- HonestDiD sensitivity ---
-        if self.honest_did_results is not None:
-            hd = self.honest_did_results
-            method_label = hd.method.replace("_", " ").title()
-            m_val = hd.M
-            sig_label = "Yes" if hd.is_significant else "No"
-            conf_pct = int((1 - hd.alpha) * 100)
-            lines.extend(
-                [
-                    thin,
-                    "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width),
-                    thin,
-                    f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
-                    f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
-                    f"{'Identified set:':<35} "
-                    f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
-                    f"{'Robust ' + str(conf_pct) + '% CI:':<35} "
-                    f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]",
-                    f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} "
-                    f"{sig_label:>10}",
-                    thin,
-                    "",
-                ]
-            )
+        # --- Phase 3 extension blocks (factored into helpers) ---
+        self._render_covariate_section(lines, width, thin)
+        self._render_linear_trends_section(lines, width, thin, header_row)
+        self._render_heterogeneity_section(lines, width, thin)
+        self._render_design2_section(lines, width, thin)
+        self._render_honest_did_section(lines, width, thin)
 
         # --- TWFE diagnostic ---
         if self.twfe_beta_fe is not None:
@@ -971,6 +857,148 @@ def print_summary(self, alpha: Optional[float] = None) -> None:
         """Print the formatted summary to stdout."""
         print(self.summary(alpha))
 
+    # ------------------------------------------------------------------
+    # Summary section helpers (Phase 3 blocks)
+    # ------------------------------------------------------------------
+
+    def _render_covariate_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.covariate_residuals is None:
+            return
+        cov_df = self.covariate_residuals
+        control_names = sorted(cov_df["covariate"].unique())
+        n_baselines = cov_df["baseline_treatment"].nunique()
+        failed = int(
+            (cov_df.groupby("baseline_treatment")["theta_hat"].first().isna()).sum()
+        )
+        lines.extend(
+            [
+                thin,
+                "Covariate Adjustment (DID^X) Diagnostics".center(width),
+                thin,
+                f"{'Controls:':<35} {', '.join(control_names):>10}",
+                f"{'Baselines residualized:':<35} {n_baselines:>10}",
+                f"{'Failed strata:':<35} {failed:>10}",
+                thin,
+                "",
+            ]
+        )
+
+    def _render_linear_trends_section(
+        self, lines: List[str], width: int, thin: str, header_row: str
+    ) -> None:
+        if self.linear_trends_effects is None:
+            return
+        lines.extend(
+            [
+                thin,
+                "Cumulated Level Effects (DID^{fd}, trends_linear)".center(width),
+                thin,
+                header_row,
+                thin,
+            ]
+        )
+        for l_h in sorted(self.linear_trends_effects.keys()):
+            entry = self.linear_trends_effects[l_h]
+            lines.append(
+                _format_inference_row(
+                    f"Level_{l_h}",
+                    entry["effect"],
+                    entry["se"],
+                    entry["t_stat"],
+                    entry["p_value"],
+                )
+            )
+        lines.extend([thin, ""])
+
+    def _render_heterogeneity_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.heterogeneity_effects is None:
+            return
+        lines.extend(
+            [
+                thin,
+                "Heterogeneity Test (Section 1.5, partial)".center(width),
+                thin,
+                f"{'Horizon':<15} {'beta^het':>12} {'Std. Err.':>12} "
+                f"{'t-stat':>10} {'P>|t|':>10} {'Sig.':>6}",
+                thin,
+            ]
+        )
+        for l_h in sorted(self.heterogeneity_effects.keys()):
+            entry = self.heterogeneity_effects[l_h]
+            lines.append(
+                _format_inference_row(
+                    f"l={l_h}",
+                    entry["beta"],
+                    entry["se"],
+                    entry["t_stat"],
+                    entry["p_value"],
+                )
+            )
+        lines.extend(
+            [
+                thin,
+                "Note: Post-treatment regressions only (no placebo/joint test).",
+                "",
+            ]
+        )
+
+    def _render_design2_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.design2_effects is None:
+            return
+        d2 = self.design2_effects
+        si = d2.get("switch_in", {})
+        so = d2.get("switch_out", {})
+        lines.extend(
+            [
+                thin,
+                "Design-2: Switch-In / Switch-Out (Section 1.6)".center(width),
+                thin,
+                f"{'Join-then-leave groups:':<35} {d2.get('n_design2_groups', 0):>10}",
+                f"{'Switch-in effect (mean):':<35} "
+                f"{_fmt_float(si.get('mean_effect', float('nan'))):>10}"
+                f"  (N={si.get('n_groups', 0)})",
+                f"{'Switch-out effect (mean):':<35} "
+                f"{_fmt_float(so.get('mean_effect', float('nan'))):>10}"
+                f"  (N={so.get('n_groups', 0)})",
+                thin,
+                "",
+            ]
+        )
+
+    def _render_honest_did_section(
+        self, lines: List[str], width: int, thin: str
+    ) -> None:
+        if self.honest_did_results is None:
+            return
+        hd = self.honest_did_results
+        method_label = hd.method.replace("_", " ").title()
+        m_val = hd.M
+        sig_label = "Yes" if hd.is_significant else "No"
+        conf_pct = int((1 - hd.alpha) * 100)
+        lines.extend(
+            [
+                thin,
+                "HonestDiD Sensitivity (Rambachan-Roth 2023)".center(width),
+                thin,
+                f"{'Method:':<35} {method_label} (M={_fmt_float(m_val)})",
+                f"{'Original estimate:':<35} {_fmt_float(hd.original_estimate):>10}",
+                f"{'Identified set:':<35} "
+                f"[{_fmt_float(hd.lb)}, {_fmt_float(hd.ub)}]",
+                f"{'Robust ' + str(conf_pct) + '% CI:':<35} "
+                f"[{_fmt_float(hd.ci_lb)}, {_fmt_float(hd.ci_ub)}]",
+                f"{'Significant at ' + str(int(hd.alpha * 100)) + '%:':<35} "
+                f"{sig_label:>10}",
+                thin,
+                "",
+            ]
+        )
+
     # ------------------------------------------------------------------
     # to_dataframe
     # ------------------------------------------------------------------
diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py
@@ -824,6 +824,20 @@ def _extract_event_study_params(
             )
 
             if isinstance(results, ChaisemartinDHaultfoeuilleResults):
+                import warnings
+
+                warnings.warn(
+                    "HonestDiD on dCDH results uses DID^{pl}_l placebo "
+                    "estimates as pre-period coefficients, not standard "
+                    "event-study pre-treatment coefficients. The Rambachan-"
+                    "Roth restrictions bound violations of the parallel "
+                    "trends assumption underlying the dCDH placebo "
+                    "estimand. This is a library extension; interpretation "
+                    "differs from canonical event-study HonestDiD.",
+                    UserWarning,
+                    stacklevel=3,
+                )
+
                 if results.placebo_event_study is None:
                     raise ValueError(
                         "ChaisemartinDHaultfoeuilleResults must have placebo_event_study "
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -617,7 +617,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 - **Note (Phase 3 heterogeneity testing - partial implementation):** Partial implementation of the heterogeneity test from Web Appendix Section 1.5 (Assumption 15, Lemma 7). Computes post-treatment saturated OLS regressions of `S_g * (Y_{g, F_g-1+l} - Y_{g, F_g-1})` on a time-invariant covariate `X_g` plus cohort indicator dummies. Standard OLS inference is valid (paper shows no DID error correction needed). **Deviation from R `predict_het`:** R's full `predict_het` option additionally computes placebo regressions and a joint null test, and disallows combination with `controls`. This implementation provides only post-treatment regressions. **Rejected combinations:** `controls` (matching R), `trends_linear` (heterogeneity test uses raw level changes, incompatible with second-differenced outcomes), and `trends_nonparam` (heterogeneity test does not thread state-set control-pool restrictions). Results stored in `results.heterogeneity_effects`. Activated via `heterogeneity="covariate_column"` in `fit()`.
 
-- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
+- **Note (HonestDiD integration):** HonestDiD sensitivity analysis (Rambachan & Roth 2023) is available on the placebo + event study surface via `honest_did=True` in `fit()` or `compute_honest_did(results)` post-hoc. **Library extension:** dCDH HonestDiD uses `DID^{pl}_l` placebo estimates as pre-period coefficients rather than standard event-study pre-treatment coefficients. The Rambachan-Roth restrictions bound violations of the parallel trends assumption underlying the dCDH placebo estimand; interpretation differs from canonical event-study HonestDiD. A `UserWarning` is emitted at runtime. Uses diagonal variance (no full VCV available for dCDH). Relative magnitudes (DeltaRM) with Mbar=1.0 is the default when called from `fit()`. When `trends_linear=True`, bounds apply to the second-differenced estimand (parallel trends in first differences). Requires `L_max >= 1` for multi-horizon placebos. Gaps in the horizon grid from `trends_nonparam` support-trimming are handled by filtering to the largest consecutive block and warning.
 
 - **Note (Phase 3 Design-2 switch-in/switch-out):** Convenience wrapper for Web Appendix Section 1.6 (Assumption 16). Identifies groups with exactly 2 treatment changes (join then leave), reports switch-in and switch-out mean effects. This is a descriptive summary, not a full re-estimation with specialized control pools as described in the paper. **Always uses raw (unadjusted) outcomes** regardless of active `controls`, `trends_linear`, or `trends_nonparam` options - those adjustments apply to the main estimator surface but not to the Design-2 descriptive block. For full adjusted Design-2 estimation with proper control pools, the paper recommends "running the command on a restricted subsample and using `trends_nonparam` for the entry-timing grouping." Activated via `design2=True` in `fit()`, requires `drop_larger_lower=False` to retain 2-switch groups.
 
diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py
@@ -1398,3 +1398,34 @@ def test_dcdh_no_placebos_raises(self):
             )
         with pytest.raises(ValueError, match="placebo_event_study"):
             compute_honest_did(r)
+
+    def test_dcdh_emits_placebo_warning(self):
+        """compute_honest_did on dCDH emits warning about placebo-based pre-periods."""
+        import warnings
+
+        results = self._fit_dcdh()
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            compute_honest_did(results)
+        placebo_warnings = [
+            x for x in w
+            if "placebo" in str(x.message).lower()
+            and "pre-period" in str(x.message).lower()
+        ]
+        assert len(placebo_warnings) >= 1, (
+            "Expected a UserWarning about placebo-based pre-period inputs"
+        )
+
+    def test_dcdh_empty_consecutive_block_raises(self):
+        """ValueError when all placebos have NaN SE (no valid pre-periods)."""
+        import warnings
+
+        # Fit real results, then corrupt placebo SEs to NaN
+        results = self._fit_dcdh()
+        for h in results.placebo_event_study:
+            results.placebo_event_study[h]["se"] = float("nan")
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            with pytest.raises(ValueError, match="No placebo horizons with finite SEs"):
+                compute_honest_did(results)