From ccf9e7f38add945053cd3d737f994faf8f6f0e5c Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 11:10:28 -0400
Subject: [PATCH 1/6] Fix mypy errors, add notebook CI, clean up TODO

- Add TYPE_CHECKING-guarded method stubs to 3 bootstrap mixin classes,
  resolving all 9 mypy attr-defined errors (staggered_bootstrap.py,
  two_stage_bootstrap.py, imputation_bootstrap.py)
- Add GitHub Actions workflow to execute 15 tutorial notebooks in CI
  via nbmake (triggered on PR/push/weekly schedule)
- Add nbmake>=1.5 to dev dependencies
- Clean up TODO.md: remove completed/crossed-out items, correct Sphinx
  warning diagnosis (376 from manual pages, not 1460 from stubs), mark
  CallawaySantAnna HonestDiD support as done, add C-LF implementation
  note

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/notebooks.yml   | 52 +++++++++++++++++++++++++++++++
 TODO.md                           | 29 +++++------------
 diff_diff/imputation_bootstrap.py | 38 +++++++++++++++++++++-
 diff_diff/staggered_bootstrap.py  | 20 +++++++++++-
 diff_diff/two_stage_bootstrap.py  | 23 +++++++++++++-
 pyproject.toml                    |  1 +
 6 files changed, 139 insertions(+), 24 deletions(-)
 create mode 100644 .github/workflows/notebooks.yml

diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml
new file mode 100644
index 00000000..7cbf67bc
--- /dev/null
+++ b/.github/workflows/notebooks.yml
@@ -0,0 +1,52 @@
+name: Tutorial Notebooks
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'docs/tutorials/**'
+      - 'diff_diff/**'
+      - 'pyproject.toml'
+      - '.github/workflows/notebooks.yml'
+  pull_request:
+    branches: [main]
+    paths:
+      - 'docs/tutorials/**'
+      - 'diff_diff/**'
+      - 'pyproject.toml'
+      - '.github/workflows/notebooks.yml'
+  schedule:
+    # Weekly Sunday 6am UTC — catches external breakage (dataset URLs, etc.)
+    - cron: '0 6 * * 0'
+
+jobs:
+  execute-notebooks:
+    name: Execute tutorial notebooks
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          pip install numpy pandas scipy matplotlib nbmake pytest
+
+      - name: Execute notebooks
+        run: |
+          DIFF_DIFF_BACKEND=python PYTHONPATH=. pytest --nbmake docs/tutorials/ \
+            --nbmake-timeout=300 \
+            -v \
+            --tb=short
+
+      - name: Upload failed notebook outputs
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: failed-notebook-outputs
+          path: docs/tutorials/*.ipynb
+          retention-days: 7
diff --git a/TODO.md b/TODO.md
index 71770031..e69e4bbe 100644
--- a/TODO.md
+++ b/TODO.md
@@ -65,11 +65,9 @@ Deferred items from PR reviews that were not addressed before merge.
 
 | Issue | Location | PR | Priority |
 |-------|----------|----|----------|
-| Tutorial notebooks not executed in CI | `docs/tutorials/*.ipynb` | #159 | Low |
 | R comparison tests spawn separate `Rscript` per test (slow CI) | `tests/test_methodology_twfe.py:294` | #139 | Low |
 | CS R helpers hard-code `xformla = ~ 1`; no covariate-adjusted R benchmark for IRLS path | `tests/test_methodology_callaway.py` | #202 | Low |
-| ~~Context-dependent doc snippets pass via blanket NameError~~ | `tests/test_doc_snippets.py` | #206 | ~~Low~~ — resolved: allow-list replaces blanket catch |
-| ~1,460 `duplicate object description` Sphinx warnings — each class attribute is documented in both module API pages and autosummary stubs; fix by adding `:no-index:` to one location or restructuring API docs to avoid overlap | `docs/api/*.rst`, `docs/api/_autosummary/` | — | Low |
+| ~376 `duplicate object description` Sphinx warnings — caused by autodoc `:members:` on dataclass attributes within manual API pages (not from autosummary stubs); fix requires restructuring `docs/api/*.rst` pages to avoid documenting the same attribute via both `:members:` and inline `autosummary` tables | `docs/api/*.rst` | — | Low |
 
 ---
 
@@ -88,29 +86,15 @@ Different estimators compute SEs differently. Consider unified interface.
 
 ### Type Annotations
 
-Mypy reports 9 errors (down from 81 before spring cleanup). All remaining are
-mixin `attr-defined` errors — methods accessed via `self` that live on the
-concrete class, not the mixin. Fixing these requires Protocol classes, which is
-low priority.
-
-| Category | Count | Notes |
-|----------|-------|-------|
-| attr-defined (mixin methods) | 9 | Structural — requires Protocol refactor |
-
-**Resolved in spring cleanup:**
-- [x] `@overload` on `solve_ols` / `_solve_ols_numpy` — eliminated all unpacking mismatches
-- [x] `assert X is not None` guards — eliminated all Optional indexing errors
-- [x] Mixin scalar attribute stubs — eliminated 26 mixin attr-defined errors
-- [x] Matplotlib `tab10` lookup fix
+Mypy reports 0 errors. All mixin `attr-defined` errors resolved via
+`TYPE_CHECKING`-guarded method stubs in bootstrap mixin classes.
 
 ## Deprecated Code
 
 Deprecated parameters still present for backward compatibility:
 
-- [x] `bootstrap_weight_type` in `CallawaySantAnna` (`staggered.py`)
+- `bootstrap_weight_type` in `CallawaySantAnna` (`staggered.py`)
   - Deprecated in favor of `bootstrap_weights` parameter
-  - ✅ Deprecation warning updated to say "removed in v3.0"
-  - ✅ README.md and tutorial 02 updated to use `bootstrap_weights`
   - Remove in next major version (v3.0)
 
 ---
@@ -126,7 +110,10 @@ Deprecated parameters still present for backward compatibility:
 Enhancements for `honest_did.py`:
 
 - [ ] Improved C-LF implementation with direct optimization instead of grid search
-- [ ] Support for CallawaySantAnnaResults (currently only MultiPeriodDiDResults)
+  (current implementation uses simplified FLCI approach with estimation uncertainty
+  adjustment; see `honest_did.py:947`)
+- [x] Support for CallawaySantAnnaResults (implemented in `honest_did.py:612-653`;
+  requires `aggregate='event_study'` when calling `CallawaySantAnna.fit()`)
 - [ ] Event-study-specific bounds for each post-period
 - [ ] Hybrid inference methods
 - [ ] Simulation-based power analysis for honest bounds
diff --git a/diff_diff/imputation_bootstrap.py b/diff_diff/imputation_bootstrap.py
index 436ffc12..3c14316b 100644
--- a/diff_diff/imputation_bootstrap.py
+++ b/diff_diff/imputation_bootstrap.py
@@ -6,7 +6,7 @@
 """
 
 import warnings
-from typing import Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
 
 import numpy as np
 import pandas as pd
@@ -68,6 +68,42 @@ class ImputationDiDBootstrapMixin:
     anticipation: int
     horizon_max: Optional[int]
 
+    if TYPE_CHECKING:
+
+        def _compute_cluster_psi_sums(
+            self,
+            df: pd.DataFrame,
+            outcome: str,
+            unit: str,
+            time: str,
+            first_treat: str,
+            covariates: Optional[List[str]],
+            omega_0_mask: pd.Series,
+            omega_1_mask: pd.Series,
+            unit_fe: Dict[Any, float],
+            time_fe: Dict[Any, float],
+            grand_mean: float,
+            delta_hat: Optional[np.ndarray],
+            weights: np.ndarray,
+            cluster_var: str,
+            kept_cov_mask: Optional[np.ndarray] = None,
+        ) -> Tuple[np.ndarray, np.ndarray]: ...
+
+        @staticmethod
+        def _build_cohort_rel_times(
+            df: pd.DataFrame,
+            first_treat: str,
+        ) -> Dict[Any, Set[int]]: ...
+
+        @staticmethod
+        def _compute_balanced_cohort_mask(
+            df_treated: pd.DataFrame,
+            first_treat: str,
+            all_horizons: List[int],
+            balance_e: int,
+            cohort_rel_times: Dict[Any, Set[int]],
+        ) -> np.ndarray: ...
+
     def _precompute_bootstrap_psi(
         self,
         df: pd.DataFrame,
diff --git a/diff_diff/staggered_bootstrap.py b/diff_diff/staggered_bootstrap.py
index de037cf1..4a220474 100644
--- a/diff_diff/staggered_bootstrap.py
+++ b/diff_diff/staggered_bootstrap.py
@@ -29,7 +29,9 @@
 )
 
 if TYPE_CHECKING:
-    pass
+    import pandas as pd
+
+    from diff_diff.staggered_aggregation import PrecomputedData
 
 
 # =============================================================================
@@ -117,6 +119,22 @@ class CallawaySantAnnaBootstrapMixin:
     seed: Optional[int]
     anticipation: int
 
+    if TYPE_CHECKING:
+
+        def _compute_combined_influence_function(
+            self,
+            gt_pairs: List[Tuple[Any, Any]],
+            weights: np.ndarray,
+            effects: np.ndarray,
+            groups_for_gt: np.ndarray,
+            influence_func_info: Dict,
+            df: "pd.DataFrame",
+            unit: str,
+            precomputed: Optional["PrecomputedData"] = None,
+            global_unit_to_idx: Optional[Dict[Any, int]] = None,
+            n_global_units: Optional[int] = None,
+        ) -> Tuple[np.ndarray, Optional[List]]: ...
+
     def _run_multiplier_bootstrap(
         self,
         group_time_effects: Dict[Tuple[Any, Any], Dict[str, Any]],
diff --git a/diff_diff/two_stage_bootstrap.py b/diff_diff/two_stage_bootstrap.py
index 0932c68e..56899621 100644
--- a/diff_diff/two_stage_bootstrap.py
+++ b/diff_diff/two_stage_bootstrap.py
@@ -7,7 +7,7 @@
 """
 
 import warnings
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
 
 import numpy as np
 import pandas as pd
@@ -41,6 +41,27 @@ class TwoStageDiDBootstrapMixin:
     seed: Optional[int]
     horizon_max: Optional[int]
 
+    if TYPE_CHECKING:
+        from scipy import sparse
+
+        def _build_fe_design(
+            self,
+            df: pd.DataFrame,
+            unit: str,
+            time: str,
+            covariates: Optional[List[str]],
+            omega_0_mask: pd.Series,
+        ) -> Tuple[
+            "sparse.csr_matrix", "sparse.csr_matrix", Dict[Any, int], Dict[Any, int]
+        ]: ...
+
+        @staticmethod
+        def _compute_gmm_scores(
+            c_by_cluster: np.ndarray,
+            gamma_hat: np.ndarray,
+            s2_by_cluster: np.ndarray,
+        ) -> np.ndarray: ...
+
     def _compute_cluster_S_scores(
         self,
         df: pd.DataFrame,
diff --git a/pyproject.toml b/pyproject.toml
index 710a1be3..3e8fa05b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,6 +59,7 @@ dev = [
     "mypy>=1.0",
     "maturin>=1.4,<2.0",
     "matplotlib>=3.5",
+    "nbmake>=1.5",
 ]
 docs = [
     "sphinx>=6.0",

From 07fe1fb8300f24e1439da5a53b3cc2845fc58618 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 11:32:18 -0400
Subject: [PATCH 2/6] Fix notebook CI: add repo root to kernel Python path

PYTHONPATH=. only affects the shell, not the Jupyter kernel spawned by
nbmake. Write a .pth file into site-packages so the kernel can import
diff_diff. Also add ipykernel dependency and set DIFF_DIFF_BACKEND via
env block.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/notebooks.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml
index 7cbf67bc..c279a3ff 100644
--- a/.github/workflows/notebooks.yml
+++ b/.github/workflows/notebooks.yml
@@ -34,11 +34,15 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install numpy pandas scipy matplotlib nbmake pytest
+          pip install numpy pandas scipy matplotlib nbmake pytest ipykernel
+          # Add repo root to Python path so Jupyter kernels can import diff_diff
+          python -c "import site; print(site.getsitepackages()[0])" | xargs -I{} sh -c 'echo "$PWD" > {}/diff_diff_dev.pth'
 
       - name: Execute notebooks
+        env:
+          DIFF_DIFF_BACKEND: python
         run: |
-          DIFF_DIFF_BACKEND=python PYTHONPATH=. pytest --nbmake docs/tutorials/ \
+          pytest --nbmake docs/tutorials/ \
             --nbmake-timeout=300 \
             -v \
             --tb=short

From b731c338420e22eb20c444b44265bfe0d4e26860 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 11:43:04 -0400
Subject: [PATCH 3/6] Address P3 review feedback on notebook workflow

- Reword schedule comment: "smoke test" instead of overstating
  dataset URL breakage detection (loaders have fallbacks)
- Add "Keep in sync" comment on manual dep list, matching the
  convention in rust-test.yml python-fallback job
- Note why pip install -e . isn't used (requires Rust toolchain)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/notebooks.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml
index c279a3ff..01e8fba1 100644
--- a/.github/workflows/notebooks.yml
+++ b/.github/workflows/notebooks.yml
@@ -16,7 +16,7 @@ on:
       - 'pyproject.toml'
       - '.github/workflows/notebooks.yml'
   schedule:
-    # Weekly Sunday 6am UTC — catches external breakage (dataset URLs, etc.)
+    # Weekly Sunday 6am UTC — smoke test that notebooks still execute cleanly
     - cron: '0 6 * * 0'
 
 jobs:
@@ -33,9 +33,11 @@ jobs:
           python-version: '3.11'
 
       - name: Install dependencies
+        # Keep in sync with pyproject.toml [project.dependencies] and [project.optional-dependencies.dev]
         run: |
           pip install numpy pandas scipy matplotlib nbmake pytest ipykernel
           # Add repo root to Python path so Jupyter kernels can import diff_diff
+          # (pip install -e . requires the Rust/maturin toolchain; .pth avoids that)
           python -c "import site; print(site.getsitepackages()[0])" | xargs -I{} sh -c 'echo "$PWD" > {}/diff_diff_dev.pth'
 
       - name: Execute notebooks

From 0dc7e2c92826b1954e43dc29e12749526768dbd0 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 12:24:15 -0400
Subject: [PATCH 4/6] Fix notebook CI: increase timeout, exclude slow TROP
 notebook

- Increase per-notebook timeout from 300s to 600s (pure Python mode
  without Rust backend is significantly slower for Monte Carlo and
  optimization-heavy notebooks)
- Exclude 10_trop.ipynb (LOOCV grid search exceeds 600s in pure
  Python mode)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/notebooks.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml
index 01e8fba1..44d10fbd 100644
--- a/.github/workflows/notebooks.yml
+++ b/.github/workflows/notebooks.yml
@@ -45,9 +45,12 @@ jobs:
           DIFF_DIFF_BACKEND: python
         run: |
           pytest --nbmake docs/tutorials/ \
-            --nbmake-timeout=300 \
+            --nbmake-timeout=600 \
+            --ignore=docs/tutorials/10_trop.ipynb \
             -v \
             --tb=short
+          # Excluded notebooks:
+          #   10_trop — LOOCV grid search too slow for pure-Python CI (>600s)
 
       - name: Upload failed notebook outputs
         if: failure()

From 1c6bb7d77fe948d2fa0f3591f9c57317b0a22fe7 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 12:55:48 -0400
Subject: [PATCH 5/6] Fix tutorial 07: use correct post_periods for event study

The notebook was passing all periods (pre and post) as post_periods
to MultiPeriodDiD, causing HonestDiD to fail with "No pre-period
effects found" since the results had no pre-period classification.

Fix: pass only actual post-treatment periods [5-9] to post_periods.
MultiPeriodDiD automatically estimates pre-period coefficients for
the event study, and HonestDiD can now correctly identify them.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/tutorials/07_pretrends_power.ipynb | 85 ++-----------------------
 1 file changed, 4 insertions(+), 81 deletions(-)

diff --git a/docs/tutorials/07_pretrends_power.ipynb b/docs/tutorials/07_pretrends_power.ipynb
index 45f8fe81..6654afc6 100644
--- a/docs/tutorials/07_pretrends_power.ipynb
+++ b/docs/tutorials/07_pretrends_power.ipynb
@@ -131,28 +131,7 @@
    "id": "cell-6",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Fit event study with ALL periods (pre and post) relative to reference period\n",
-    "# For pre-trends power analysis, we need coefficients for pre-periods too\n",
-    "mp_did = MultiPeriodDiD()\n",
-    "\n",
-    "# Use period 4 as the reference period (last pre-period, excluded from estimation)\n",
-    "# Estimate coefficients for all other periods: 0, 1, 2, 3 (pre) and 5, 6, 7, 8, 9 (post)\n",
-    "all_estimation_periods = [0, 1, 2, 3, 5, 6, 7, 8, 9]  # All except reference period 4\n",
-    "\n",
-    "event_results = mp_did.fit(\n",
-    "    df,\n",
-    "    outcome='outcome',\n",
-    "    treatment='treated',\n",
-    "    time='period',\n",
-    "    post_periods=all_estimation_periods  # Include all periods for full event study\n",
-    ")\n",
-    "\n",
-    "# Note: For standard DiD analysis, we'd normally use post_periods=[5,6,7,8,9]\n",
-    "# But for pre-trends power analysis, we need pre-period coefficients too\n",
-    "\n",
-    "print(event_results.summary())"
-   ]
+   "source": "# Fit event study with ALL periods (pre and post) relative to reference period\n# For pre-trends power analysis, we need coefficients for pre-periods too\nmp_did = MultiPeriodDiD()\n\n# Use period 4 as the reference period (last pre-period, excluded from estimation)\n# Specify post_periods as the actual post-treatment periods; MultiPeriodDiD\n# automatically estimates pre-period coefficients for the event study.\nevent_results = mp_did.fit(\n    df,\n    outcome='outcome',\n    treatment='treated',\n    time='period',\n    post_periods=[5, 6, 7, 8, 9]\n)\n\nprint(event_results.summary())"
   },
   {
    "cell_type": "code",
@@ -199,24 +178,7 @@
    "id": "cell-10",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Create a PreTrendsPower object\n",
-    "pt = PreTrendsPower(\n",
-    "    alpha=0.05,      # Significance level for pre-trends test\n",
-    "    power=0.80,      # Target power for MDV calculation\n",
-    "    violation_type='linear'  # Type of violation to consider\n",
-    ")\n",
-    "\n",
-    "# Define the actual pre-treatment periods (those before treatment starts at period 5)\n",
-    "# These are the periods we want to analyze for pre-trends power\n",
-    "pre_treatment_periods = [0, 1, 2, 3]\n",
-    "\n",
-    "# Fit to the event study results, specifying which periods are pre-treatment\n",
-    "# This is needed because we estimated all periods as post_periods in the event study\n",
-    "pt_results = pt.fit(event_results, pre_periods=pre_treatment_periods)\n",
-    "\n",
-    "print(pt_results.summary())"
-   ]
+   "source": "# Create a PreTrendsPower object\npt = PreTrendsPower(\n    alpha=0.05,      # Significance level for pre-trends test\n    power=0.80,      # Target power for MDV calculation\n    violation_type='linear'  # Type of violation to consider\n)\n\n# Define the actual pre-treatment periods (those before treatment starts at period 5)\n# These are the periods we want to analyze for pre-trends power\npre_treatment_periods = [0, 1, 2, 3]\n\n# Fit to the event study results, specifying which periods are pre-treatment\npt_results = pt.fit(event_results, pre_periods=pre_treatment_periods)\n\nprint(pt_results.summary())"
   },
   {
    "cell_type": "markdown",
@@ -558,46 +520,7 @@
    "id": "cell-30",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Typical workflow for pre-trends power analysis\n",
-    "\n",
-    "# Step 1: Estimate event study with ALL periods (pre and post) relative to reference\n",
-    "# For pre-trends power analysis, we need pre-period coefficients\n",
-    "mp_did = MultiPeriodDiD()\n",
-    "\n",
-    "# Reference period is 4 (last pre-period)\n",
-    "# Estimate coefficients for periods 0, 1, 2, 3 (pre) and 5, 6, 7, 8, 9 (post)\n",
-    "all_estimation_periods = [0, 1, 2, 3, 5, 6, 7, 8, 9]\n",
-    "pre_treatment_periods = [0, 1, 2, 3]  # Define which are pre-treatment\n",
-    "\n",
-    "results = mp_did.fit(\n",
-    "    df, \n",
-    "    outcome='outcome',\n",
-    "    treatment='treated', \n",
-    "    time='period',\n",
-    "    post_periods=all_estimation_periods\n",
-    ")\n",
-    "\n",
-    "# Step 2: Assess power of the pre-trends test  \n",
-    "print(\"Step 2: Pre-Trends Power Analysis\")\n",
-    "pt = PreTrendsPower(alpha=0.05, power=0.80, violation_type='linear')\n",
-    "pt_results = pt.fit(results, pre_periods=pre_treatment_periods)\n",
-    "print(f\"MDV (80% power): {pt_results.mdv:.3f}\")\n",
-    "print(\"\")\n",
-    "\n",
-    "# Step 3: Interpret\n",
-    "print(\"Step 3: Interpretation\")\n",
-    "print(f\"Your pre-trends test could only detect violations >= {pt_results.mdv:.3f}\")\n",
-    "print(f\"Violations smaller than this would likely go undetected.\")\n",
-    "print(\"\")\n",
-    "\n",
-    "# Step 4: Connect to Honest DiD for robust inference\n",
-    "print(\"Step 4: Robust Inference with Honest DiD\")\n",
-    "honest = HonestDiD(method='smoothness', M=pt_results.mdv)\n",
-    "honest_results = honest.fit(results)\n",
-    "print(f\"Robust 95% CI (M=MDV): [{honest_results.ci_lb:.3f}, {honest_results.ci_ub:.3f}]\")\n",
-    "print(f\"Conclusion: {'Effect is robust' if honest_results.is_significant else 'Effect may not be robust'}\")"
-   ]
+   "source": "# Typical workflow for pre-trends power analysis\n\n# Step 1: Estimate event study with proper pre/post period classification\nmp_did = MultiPeriodDiD()\n\n# Specify actual post-treatment periods; pre-period coefficients are\n# estimated automatically by MultiPeriodDiD for the event study\npre_treatment_periods = [0, 1, 2, 3]  # Define which are pre-treatment\n\nresults = mp_did.fit(\n    df, \n    outcome='outcome',\n    treatment='treated', \n    time='period',\n    post_periods=[5, 6, 7, 8, 9]\n)\n\n# Step 2: Assess power of the pre-trends test  \nprint(\"Step 2: Pre-Trends Power Analysis\")\npt = PreTrendsPower(alpha=0.05, power=0.80, violation_type='linear')\npt_results = pt.fit(results, pre_periods=pre_treatment_periods)\nprint(f\"MDV (80% power): {pt_results.mdv:.3f}\")\nprint(\"\")\n\n# Step 3: Interpret\nprint(\"Step 3: Interpretation\")\nprint(f\"Your pre-trends test could only detect violations >= {pt_results.mdv:.3f}\")\nprint(f\"Violations smaller than this would likely go undetected.\")\nprint(\"\")\n\n# Step 4: Connect to Honest DiD for robust inference\nprint(\"Step 4: Robust Inference with Honest DiD\")\nhonest = HonestDiD(method='smoothness', M=pt_results.mdv)\nhonest_results = honest.fit(results)\nprint(f\"Robust 95% CI (M=MDV): [{honest_results.ci_lb:.3f}, {honest_results.ci_ub:.3f}]\")\nprint(f\"Conclusion: {'Effect is robust' if honest_results.is_significant else 'Effect may not be robust'}\")"
   },
   {
    "cell_type": "markdown",
@@ -693,4 +616,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
\ No newline at end of file

From fd8b192bbd3b0398b8b4ec202b054e9c250891c1 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 14:01:13 -0400
Subject: [PATCH 6/6] Exclude power analysis notebook from CI (>600s timeout)

06_power_analysis.ipynb runs SyntheticDiD simulate_power which is a
Monte Carlo simulation too slow for pure-Python CI without the Rust
backend. Same category as the already-excluded TROP notebook.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/notebooks.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml
index 44d10fbd..97c20446 100644
--- a/.github/workflows/notebooks.yml
+++ b/.github/workflows/notebooks.yml
@@ -46,11 +46,13 @@ jobs:
         run: |
           pytest --nbmake docs/tutorials/ \
             --nbmake-timeout=600 \
+            --ignore=docs/tutorials/06_power_analysis.ipynb \
             --ignore=docs/tutorials/10_trop.ipynb \
             -v \
             --tb=short
-          # Excluded notebooks:
-          #   10_trop — LOOCV grid search too slow for pure-Python CI (>600s)
+          # Excluded notebooks (too slow for pure-Python CI without Rust backend):
+          #   06_power_analysis — SyntheticDiD simulate_power Monte Carlo (>600s)
+          #   10_trop — LOOCV grid search (>600s)
 
       - name: Upload failed notebook outputs
         if: failure()