Skip to content

Commit 4d63845

Browse files
authored
Merge pull request #29 from DoubleML/jh_logistic
Coverage simulations for LPLR
2 parents 6e0f15c + 1275beb commit 4d63845

20 files changed

+506
-116
lines changed

.github/workflows/plr_sim.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ jobs:
2121
'scripts/plm/plr_ate_sensitivity.py',
2222
'scripts/plm/plr_cate.py',
2323
'scripts/plm/plr_gate.py',
24+
'scripts/plm/lplr_ate.py',
2425
]
2526

2627
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
__pycache__/
2+
.idea/
23

34
# Logs
45
monte-cover/logs/

doc/_quarto-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ website:
2121
- plm/plr_gate.qmd
2222
- plm/plr_cate.qmd
2323
- plm/pliv.qmd
24+
- plm/lplr.qmd
2425
# DID
2526
- did/did_pa.qmd
2627
- did/did_cs.qmd

doc/_website.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ website:
2424
- plm/plr.qmd
2525
- plm/plr_gate.qmd
2626
- plm/plr_cate.qmd
27+
- plm/lplr.qmd
2728
- plm/pliv.qmd
2829
- text: "DID"
2930
menu:

doc/plm/lplr.qmd

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
---
2+
title: "Logistic PLR Models"
3+
4+
jupyter: python3
5+
---
6+
7+
```{python}
8+
#| echo: false
9+
10+
import numpy as np
11+
import pandas as pd
12+
from itables import init_notebook_mode
13+
import os
14+
import sys
15+
16+
doc_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
17+
if doc_dir not in sys.path:
18+
sys.path.append(doc_dir)
19+
20+
from utils.style_tables import generate_and_show_styled_table
21+
22+
init_notebook_mode(all_interactive=True)
23+
```
24+
25+
## ATE Coverage
26+
27+
The simulations are based on the the [make_lplr_LZZ2020](https://docs.doubleml.org/stable/api/generated/doubleml.plm.datasets.make_lplr_LZZ2020.html)-DGP with $500$ observations.
28+
29+
::: {.callout-note title="Metadata" collapse="true"}
30+
31+
```{python}
32+
#| echo: false
33+
metadata_file = '../../results/plm/lplr_ate_metadata.csv'
34+
metadata_df = pd.read_csv(metadata_file)
35+
print(metadata_df.T.to_string(header=False))
36+
```
37+
38+
:::
39+
40+
```{python}
41+
#| echo: false
42+
43+
# set up data and rename columns
44+
df_coverage = pd.read_csv("../../results/plm/lplr_ate_coverage.csv", index_col=None)
45+
46+
if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1:
47+
n_rep_coverage = df_coverage["repetition"].unique()[0]
48+
elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1:
49+
n_rep_coverage = df_coverage["n_rep"].unique()[0]
50+
else:
51+
n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined
52+
53+
display_columns_coverage = ["Learner m", "Learner M", "Learner t", "Bias", "CI Length", "Coverage"]
54+
```
55+
56+
### Nuisance space
57+
58+
```{python}
59+
# | echo: false
60+
61+
generate_and_show_styled_table(
62+
main_df=df_coverage,
63+
filters={"level": 0.95, "Score": "nuisance_space"},
64+
display_cols=display_columns_coverage,
65+
n_rep=n_rep_coverage,
66+
level_col="level",
67+
# rename_map={"Learner g": "Learner l"},
68+
coverage_highlight_cols=["Coverage"]
69+
)
70+
```
71+
72+
```{python}
73+
#| echo: false
74+
75+
generate_and_show_styled_table(
76+
main_df=df_coverage,
77+
filters={"level": 0.9, "Score": "nuisance_space"},
78+
display_cols=display_columns_coverage,
79+
n_rep=n_rep_coverage,
80+
level_col="level",
81+
# rename_map={"Learner g": "Learner l"},
82+
coverage_highlight_cols=["Coverage"]
83+
)
84+
```
85+
86+
### Instrument
87+
88+
89+
```{python}
90+
#| echo: false
91+
92+
generate_and_show_styled_table(
93+
main_df=df_coverage,
94+
filters={"level": 0.95, "Score": "instrument"},
95+
display_cols=display_columns_coverage,
96+
n_rep=n_rep_coverage,
97+
level_col="level",
98+
coverage_highlight_cols=["Coverage"]
99+
)
100+
```
101+
102+
```{python}
103+
#| echo: false
104+
105+
generate_and_show_styled_table(
106+
main_df=df_coverage,
107+
filters={"level": 0.9, "Score": "instrument"},
108+
display_cols=display_columns_coverage,
109+
n_rep=n_rep_coverage,
110+
level_col="level",
111+
coverage_highlight_cols=["Coverage"]
112+
)
113+
```

monte-cover/src/montecover/plm/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
from montecover.plm.plr_ate_sensitivity import PLRATESensitivityCoverageSimulation
66
from montecover.plm.plr_cate import PLRCATECoverageSimulation
77
from montecover.plm.plr_gate import PLRGATECoverageSimulation
8+
from montecover.plm.lplr_ate import LPLRATECoverageSimulation
89

910
__all__ = [
1011
"PLRATECoverageSimulation",
1112
"PLIVLATECoverageSimulation",
1213
"PLRGATECoverageSimulation",
1314
"PLRCATECoverageSimulation",
1415
"PLRATESensitivityCoverageSimulation",
16+
"LPLRATECoverageSimulation",
1517
]
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import warnings
2+
from typing import Any, Dict, Optional
3+
4+
import doubleml as dml
5+
from doubleml.plm.datasets import make_lplr_LZZ2020
6+
7+
from montecover.base import BaseSimulation
8+
from montecover.utils import create_learner_from_config
9+
10+
11+
class LPLRATECoverageSimulation(BaseSimulation):
12+
"""Simulation class for coverage properties of DoubleMLPLR for ATE estimation."""
13+
14+
def __init__(
15+
self,
16+
config_file: str,
17+
suppress_warnings: bool = True,
18+
log_level: str = "INFO",
19+
log_file: Optional[str] = None,
20+
use_failed_scores: bool = False,
21+
):
22+
super().__init__(
23+
config_file=config_file,
24+
suppress_warnings=suppress_warnings,
25+
log_level=log_level,
26+
log_file=log_file,
27+
)
28+
29+
# Calculate oracle values
30+
self._calculate_oracle_values()
31+
32+
self._use_failed_scores = use_failed_scores
33+
34+
def _process_config_parameters(self):
35+
"""Process simulation-specific parameters from config"""
36+
# Process ML models in parameter grid
37+
assert "learners" in self.dml_parameters, "No learners specified in the config file"
38+
39+
required_learners = ["ml_m", "ml_M", "ml_t"]
40+
for learner in self.dml_parameters["learners"]:
41+
for ml in required_learners:
42+
assert ml in learner, f"No {ml} specified in the config file"
43+
44+
def _calculate_oracle_values(self):
45+
"""Calculate oracle values for the simulation."""
46+
self.logger.info("Calculating oracle values")
47+
48+
self.oracle_values = dict()
49+
self.oracle_values["theta"] = self.dgp_parameters["theta"]
50+
51+
def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
52+
"""Run a single repetition with the given parameters."""
53+
# Extract parameters
54+
learner_config = dml_params["learners"]
55+
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
56+
learner_M_name, ml_M = create_learner_from_config(learner_config["ml_M"])
57+
learner_t_name, ml_t = create_learner_from_config(learner_config["ml_t"])
58+
score = dml_params["score"]
59+
60+
# Model
61+
dml_model = dml.DoubleMLLPLR(
62+
obj_dml_data=dml_data,
63+
ml_m=ml_m,
64+
ml_M=ml_M,
65+
ml_t=ml_t,
66+
score=score,
67+
error_on_convergence_failure= not self._use_failed_scores,)
68+
69+
try:
70+
dml_model.fit()
71+
except RuntimeError as e:
72+
self.logger.info(f"Exception during fit: {e}")
73+
return None
74+
75+
result = {
76+
"coverage": [],
77+
}
78+
for level in self.confidence_parameters["level"]:
79+
level_result = dict()
80+
level_result["coverage"] = self._compute_coverage(
81+
thetas=dml_model.coef,
82+
oracle_thetas=self.oracle_values["theta"],
83+
confint=dml_model.confint(level=level),
84+
joint_confint=None,
85+
)
86+
87+
# add parameters to the result
88+
for res in level_result.values():
89+
res.update(
90+
{
91+
"Learner m": learner_m_name,
92+
"Learner M": learner_M_name,
93+
"Learner t": learner_t_name,
94+
"Score": score,
95+
"level": level,
96+
}
97+
)
98+
for key, res in level_result.items():
99+
result[key].append(res)
100+
101+
return result
102+
103+
def summarize_results(self):
104+
"""Summarize the simulation results."""
105+
self.logger.info("Summarizing simulation results")
106+
107+
# Group by parameter combinations
108+
groupby_cols = ["Learner m", "Learner M", "Learner t", "Score", "level"]
109+
aggregation_dict = {
110+
"Coverage": "mean",
111+
"CI Length": "mean",
112+
"Bias": "mean",
113+
"repetition": "count",
114+
}
115+
116+
# Aggregate results (possibly multiple result dfs)
117+
result_summary = dict()
118+
for result_name, result_df in self.results.items():
119+
result_summary[result_name] = result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
120+
self.logger.debug(f"Summarized {result_name} results")
121+
122+
return result_summary
123+
124+
def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData:
125+
"""Generate data for the simulation."""
126+
return make_lplr_LZZ2020(**dgp_params)

results/plm/lplr_ate_config.yml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
simulation_parameters:
2+
repetitions: 500
3+
max_runtime: 19800
4+
random_seed: 42
5+
n_jobs: -2
6+
dgp_parameters:
7+
theta:
8+
- 0.5
9+
n_obs:
10+
- 500
11+
dim_x:
12+
- 20
13+
learner_definitions:
14+
lasso: &id001
15+
name: LassoCV
16+
logistic: &id002
17+
name: Logistic
18+
rf: &id003
19+
name: RF Regr.
20+
params:
21+
n_estimators: 100
22+
max_features: sqrt
23+
rf-class: &id004
24+
name: RF Clas.
25+
params:
26+
n_estimators: 100
27+
max_features: sqrt
28+
lgbm: &id005
29+
name: LGBM Regr.
30+
params:
31+
n_estimators: 500
32+
learning_rate: 0.01
33+
lgbm-class: &id006
34+
name: LGBM Clas.
35+
params:
36+
n_estimators: 500
37+
learning_rate: 0.01
38+
dml_parameters:
39+
learners:
40+
- ml_m: *id001
41+
ml_M: *id002
42+
ml_t: *id001
43+
- ml_m: *id003
44+
ml_M: *id004
45+
ml_t: *id003
46+
- ml_m: *id005
47+
ml_M: *id006
48+
ml_t: *id005
49+
score:
50+
- nuisance_space
51+
- instrument
52+
confidence_parameters:
53+
level:
54+
- 0.95
55+
- 0.9

results/plm/lplr_ate_coverage.csv

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Learner m,Learner M,Learner t,Score,level,Coverage,CI Length,Bias,repetition
2+
LGBM Regr.,LGBM Clas.,LGBM Regr.,instrument,0.9,0.872,0.6540916267945179,0.17501445022837125,500
3+
LGBM Regr.,LGBM Clas.,LGBM Regr.,instrument,0.95,0.928,0.7793982455949509,0.17501445022837125,500
4+
LGBM Regr.,LGBM Clas.,LGBM Regr.,nuisance_space,0.9,0.88,0.598241346108922,0.15586913796966942,500
5+
LGBM Regr.,LGBM Clas.,LGBM Regr.,nuisance_space,0.95,0.946,0.7128485314583201,0.15586913796966942,500
6+
LassoCV,Logistic,LassoCV,instrument,0.9,0.856,0.5890452894815547,0.16482024691605957,500
7+
LassoCV,Logistic,LassoCV,instrument,0.95,0.924,0.7018907541253692,0.16482024691605957,500
8+
LassoCV,Logistic,LassoCV,nuisance_space,0.9,0.868,0.5820699058557912,0.1507959338822808,500
9+
LassoCV,Logistic,LassoCV,nuisance_space,0.95,0.93,0.6935790718815301,0.1507959338822808,500
10+
RF Regr.,RF Clas.,RF Regr.,instrument,0.9,0.884,0.39484117997902796,0.09883032061915417,500
11+
RF Regr.,RF Clas.,RF Regr.,instrument,0.95,0.95,0.4704822846799266,0.09883032061915417,500
12+
RF Regr.,RF Clas.,RF Regr.,nuisance_space,0.9,0.886,0.38499391911236014,0.09772003875711463,500
13+
RF Regr.,RF Clas.,RF Regr.,nuisance_space,0.95,0.94,0.45874854963578754,0.09772003875711463,500

results/plm/lplr_ate_metadata.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
DoubleML Version,Script,Date,Total Runtime (minutes),Python Version,Config File
2+
0.11.dev0,LPLRATECoverageSimulation,2025-11-18 03:13,39.79484195311864,3.12.9,scripts/plm/lplr_ate_config.yml

0 commit comments

Comments
 (0)