Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
__pycache__/
.idea/

# Logs
monte-cover/logs/
Expand Down
1 change: 1 addition & 0 deletions doc/_quarto-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ website:
- plm/plr_gate.qmd
- plm/plr_cate.qmd
- plm/pliv.qmd
- plm/lplr.qmd
# DID
- did/did_pa.qmd
- did/did_cs.qmd
Expand Down
1 change: 1 addition & 0 deletions doc/_website.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ website:
- plm/plr_gate.qmd
- plm/plr_cate.qmd
- plm/pliv.qmd
- plm/lplr.qmd
- text: "DID"
menu:
- did/did_pa_multi.qmd
Expand Down
113 changes: 113 additions & 0 deletions doc/plm/lplr.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
---
title: "Logistic Partial Linear Regression Models"

jupyter: python3
---

```{python}
#| echo: false

import numpy as np
import pandas as pd
from itables import init_notebook_mode
import os
import sys

doc_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
if doc_dir not in sys.path:
sys.path.append(doc_dir)

from utils.style_tables import generate_and_show_styled_table

init_notebook_mode(all_interactive=True)
```

## ATE Coverage

The simulations are based on the the [make_lplr_LZZ2020](https://docs.doubleml.org/stable/api/generated/doubleml.datasets.make_lplr_LZZ2020.html)-DGP with $500$ observations.

::: {.callout-note title="Metadata" collapse="true"}

```{python}
#| echo: false
metadata_file = '../../results/plm/lplr_ate_metadata.csv'
metadata_df = pd.read_csv(metadata_file)
print(metadata_df.T.to_string(header=False))
```

:::

```{python}
#| echo: false

# set up data and rename columns
df_coverage = pd.read_csv("../../results/plm/lplr_ate_coverage.csv", index_col=None)

if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1:
n_rep_coverage = df_coverage["repetition"].unique()[0]
elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1:
n_rep_coverage = df_coverage["n_rep"].unique()[0]
else:
n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined

display_columns_coverage = ["Learner m", "Learner M", "Learner t", "Bias", "CI Length", "Coverage"]
```

### Nuisance space

```{python}
# | echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.95, "Score": "nuisance_space"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
# rename_map={"Learner g": "Learner l"},
coverage_highlight_cols=["Coverage"]
)
```

```{python}
#| echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.9, "Score": "nuisance_space"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
# rename_map={"Learner g": "Learner l"},
coverage_highlight_cols=["Coverage"]
)
```

### Instrument


```{python}
#| echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.95, "Score": "instrument"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
coverage_highlight_cols=["Coverage"]
)
```

```{python}
#| echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.9, "Score": "instrument"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
coverage_highlight_cols=["Coverage"]
)
```
2 changes: 2 additions & 0 deletions monte-cover/src/montecover/plm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
from montecover.plm.plr_ate_sensitivity import PLRATESensitivityCoverageSimulation
from montecover.plm.plr_cate import PLRCATECoverageSimulation
from montecover.plm.plr_gate import PLRGATECoverageSimulation
from montecover.plm.lplr_ate import LPLRATECoverageSimulation

__all__ = [
"PLRATECoverageSimulation",
"PLIVLATECoverageSimulation",
"PLRGATECoverageSimulation",
"PLRCATECoverageSimulation",
"PLRATESensitivityCoverageSimulation",
"LPLRATECoverageSimulation",
]
126 changes: 126 additions & 0 deletions monte-cover/src/montecover/plm/lplr_ate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import warnings
from typing import Any, Dict, Optional

import doubleml as dml
from doubleml.plm.datasets import make_lplr_LZZ2020

from montecover.base import BaseSimulation
from montecover.utils import create_learner_from_config


class LPLRATECoverageSimulation(BaseSimulation):
"""Simulation class for coverage properties of DoubleMLPLR for ATE estimation."""

def __init__(
self,
config_file: str,
suppress_warnings: bool = True,
log_level: str = "INFO",
log_file: Optional[str] = None,
use_failed_scores: bool = False,
):
super().__init__(
config_file=config_file,
suppress_warnings=suppress_warnings,
log_level=log_level,
log_file=log_file,
)

# Calculate oracle values
self._calculate_oracle_values()

self._use_failed_scores = use_failed_scores

def _process_config_parameters(self):
"""Process simulation-specific parameters from config"""
# Process ML models in parameter grid
assert "learners" in self.dml_parameters, "No learners specified in the config file"

required_learners = ["ml_m", "ml_M", "ml_t"]
for learner in self.dml_parameters["learners"]:
for ml in required_learners:
assert ml in learner, f"No {ml} specified in the config file"

def _calculate_oracle_values(self):
"""Calculate oracle values for the simulation."""
self.logger.info("Calculating oracle values")

self.oracle_values = dict()
self.oracle_values["theta"] = self.dgp_parameters["theta"]

def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
"""Run a single repetition with the given parameters."""
# Extract parameters
learner_config = dml_params["learners"]
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
learner_M_name, ml_M = create_learner_from_config(learner_config["ml_M"])
learner_t_name, ml_t = create_learner_from_config(learner_config["ml_t"])
score = dml_params["score"]

# Model
dml_model = dml.DoubleMLLPLR(
obj_dml_data=dml_data,
ml_m=ml_m,
ml_M=ml_M,
ml_t=ml_t,
score=score,
error_on_convergence_failure= not self._use_failed_scores,)

try:
dml_model.fit()
except RuntimeError as e:
self.logger.info(f"Exception during fit: {e}")
return None

result = {
"coverage": [],
}
for level in self.confidence_parameters["level"]:
level_result = dict()
level_result["coverage"] = self._compute_coverage(
thetas=dml_model.coef,
oracle_thetas=self.oracle_values["theta"],
confint=dml_model.confint(level=level),
joint_confint=None,
)

# add parameters to the result
for res in level_result.values():
res.update(
{
"Learner m": learner_m_name,
"Learner M": learner_M_name,
"Learner t": learner_t_name,
"Score": score,
"level": level,
}
)
for key, res in level_result.items():
result[key].append(res)

return result

def summarize_results(self):
"""Summarize the simulation results."""
self.logger.info("Summarizing simulation results")

# Group by parameter combinations
groupby_cols = ["Learner m", "Learner M", "Learner t", "Score", "level"]
aggregation_dict = {
"Coverage": "mean",
"CI Length": "mean",
"Bias": "mean",
"repetition": "count",
}

# Aggregate results (possibly multiple result dfs)
result_summary = dict()
for result_name, result_df in self.results.items():
result_summary[result_name] = result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
self.logger.debug(f"Summarized {result_name} results")

return result_summary

def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData:
"""Generate data for the simulation."""
return make_lplr_LZZ2020(**dgp_params)
91 changes: 91 additions & 0 deletions results/plm/logistic_ate_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
simulation_parameters:
repetitions: 1000
max_runtime: 86400
random_seed: 42
n_jobs: -2
dgp_parameters:
theta:
- 0.5
n_obs:
- 500
dim_x:
- 20
learner_definitions:
lasso: &id001
name: LassoCV
logistic: &id002
name: Logistic
rf: &id003
name: RF Regr.
params:
n_estimators: 100
max_features: sqrt
rf-class: &id004
name: RF Clas.
params:
n_estimators: 100
max_features: sqrt
lgbm: &id005
name: LGBM Regr.
params:
n_estimators: 500
learning_rate: 0.01
lgbm-class: &id006
name: LGBM Clas.
params:
n_estimators: 500
learning_rate: 0.01
dml_parameters:
learners:
- ml_m: *id001
ml_M: *id002
ml_t: *id001
- ml_m: *id003
ml_M: *id004
ml_t: *id003
- ml_m: *id005
ml_M: *id006
ml_t: *id005
- ml_m: *id003
ml_M: *id006
ml_t: *id005
- ml_m: *id005
ml_M: *id004
ml_t: *id005
- ml_m: *id005
ml_M: *id006
ml_t: *id003
- ml_m: *id005
ml_M: *id004
ml_t: *id003
- ml_m: *id003
ml_M: *id006
ml_t: *id003
- ml_m: *id003
ml_M: *id004
ml_t: *id005
- ml_m: *id001
ml_M: *id006
ml_t: *id005
- ml_m: *id005
ml_M: *id002
ml_t: *id005
- ml_m: *id005
ml_M: *id006
ml_t: *id001
- ml_m: *id001
ml_M: *id004
ml_t: *id003
- ml_m: *id003
ml_M: *id002
ml_t: *id003
- ml_m: *id003
ml_M: *id004
ml_t: *id001
score:
- nuisance_space
- instrument
confidence_parameters:
level:
- 0.95
- 0.9
Loading