DoubleML
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doubleml/data/tests/test_dml_data.py‎
Lines changed: 1 addition & 0 deletions b/‎doubleml/data/tests/test_dml_data.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doubleml/did/datasets/dgp_did_CS2021.py‎
Lines changed: 34 additions & 7 deletions b/‎doubleml/did/datasets/dgp_did_CS2021.py‎
Lines changed: 34 additions & 7 deletions
diff --git a/‎doubleml/did/datasets/dgp_did_cs_CS2021.py‎
Lines changed: 28 additions & 4 deletions b/‎doubleml/did/datasets/dgp_did_cs_CS2021.py‎
Lines changed: 28 additions & 4 deletions
diff --git a/‎doubleml/did/did.py‎
Lines changed: 78 additions & 0 deletions b/‎doubleml/did/did.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎doubleml/did/did_binary.py‎
Lines changed: 74 additions & 0 deletions b/‎doubleml/did/did_binary.py‎
Lines changed: 74 additions & 0 deletions
@@ -58,6 +58,7 @@ jobs:
         matrix.config.os != 'ubuntu-latest' ||
         matrix.config.python-version != '3.12'
       run: |
+        pytest --doctest-modules --ignore-glob="doubleml/**/tests/*" --ignore-glob="doubleml/tests/*"
         pytest -m ci
         pytest -m ci_rdd
 
 
@@ -569,6 +569,7 @@ def test_dml_data_w_missings(generate_data_irm_w_missings):
     assert dml_data.force_all_x_finite == "allow-nan"
 
 
+@pytest.mark.ci
 def test_dml_data_w_missing_d(generate_data1):
     data = generate_data1
     np.random.seed(3141)
 
@@ -105,11 +105,35 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
 
     6. Treatment assignment:
 
-       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is:
+       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is computed as follows:
 
-       .. math::
+       - Compute group-specific logits for each observation:
+
+         .. math::
+
+            \\text{logit}_{i,g} = f_{ps,g}(W_{ps})
+
+         The logits are clipped to the range [-2.5, 2.5] for numerical stability.
+
+       - Convert logits to uncapped probabilities via softmax:
+
+         .. math::
+
+            p^{\\text{uncapped}}_{i,g} = \\frac{\\exp(\\text{logit}_{i,g})}{\\sum_{g'} \\exp(\\text{logit}_{i,g'})}
+
+       - Clip uncapped probabilities to the range [0.05, 0.95]:
+
+         .. math::
+
+            p^{\\text{clipped}}_{i,g} = \\min(\\max(p^{\\text{uncapped}}_{i,g}, 0.05), 0.95)
+
+       - Renormalize clipped probabilities so they sum to 1 for each observation:
+
+         .. math::
+
+            p_{i,g} = \\frac{p^{\text{clipped}}_{i,g}}{\\sum_{g'} p^{\\text{clipped}}_{i,g'}}
 
-           P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))}
+       - Assign each observation to a treatment group by sampling from the categorical distribution defined by :math:`p_{i,g}`.
 
        For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability:
 
@@ -159,7 +183,7 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
         `dim_x` (int, default=4):
             Dimension of feature vectors.
 
-        `xi` (float, default=0.9):
+        `xi` (float, default=0.5):
             Scale parameter for the propensity score function.
 
         `n_periods` (int, default=5):
@@ -188,7 +212,7 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
 
     c = kwargs.get("c", 0.0)
     dim_x = kwargs.get("dim_x", 4)
-    xi = kwargs.get("xi", 0.9)
+    xi = kwargs.get("xi", 0.75)
     n_periods = kwargs.get("n_periods", 5)
     anticipation_periods = kwargs.get("anticipation_periods", 0)
     n_pre_treat_periods = kwargs.get("n_pre_treat_periods", 2)
@@ -228,8 +252,11 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
         p = np.ones(n_treatment_groups) / n_treatment_groups
         d_index = np.random.choice(n_treatment_groups, size=n_obs, p=p)
     else:
-        unnormalized_p = np.exp(_f_ps_groups(features_ps, xi, n_groups=n_treatment_groups))
-        p = unnormalized_p / unnormalized_p.sum(1, keepdims=True)
+        logits = np.clip(_f_ps_groups(features_ps, xi, n_groups=n_treatment_groups), a_min=-2.5, a_max=2.5)
+        unnormalized_p = np.exp(logits)
+        p_uncapped = unnormalized_p / unnormalized_p.sum(1, keepdims=True)
+        p_clipped = np.clip(p_uncapped, a_min=0.05, a_max=0.95)
+        p = p_clipped / p_clipped.sum(1, keepdims=True)
         d_index = np.array([np.random.choice(n_treatment_groups, p=p_row) for p_row in p])
 
     # fixed effects (shape (n_obs, n_time_periods))
 
@@ -85,11 +85,35 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd
 
     6. Treatment assignment:
 
-       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is:
+       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is computed as follows:
 
-       .. math::
+       - Compute group-specific logits for each observation:
+
+         .. math::
+
+            \\text{logit}_{i,g} = f_{ps,g}(W_{ps})
+
+         The logits are clipped to the range [-2.5, 2.5] for numerical stability.
+
+       - Convert logits to uncapped probabilities via softmax:
+
+         .. math::
+
+            p^{\\text{uncapped}}_{i,g} = \\frac{\\exp(\\text{logit}_{i,g})}{\\sum_{g'} \\exp(\\text{logit}_{i,g'})}
+
+       - Clip uncapped probabilities to the range [0.05, 0.95]:
+
+         .. math::
+
+            p^{\\text{clipped}}_{i,g} = \\min(\\max(p^{\\text{uncapped}}_{i,g}, 0.05), 0.95)
+
+       - Renormalize clipped probabilities so they sum to 1 for each observation:
+
+         .. math::
+
+            p_{i,g} = \\frac{p^{\text{clipped}}_{i,g}}{\\sum_{g'} p^{\\text{clipped}}_{i,g'}}
 
-           P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))}
+       - Assign each observation to a treatment group by sampling from the categorical distribution defined by :math:`p_{i,g}`.
 
        For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability:
 
@@ -148,7 +172,7 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd
         `dim_x` (int, default=4):
             Dimension of feature vectors.
 
-        `xi` (float, default=0.9):
+        `xi` (float, default=0.5):
             Scale parameter for the propensity score function.
 
         `n_periods` (int, default=5):
 
@@ -9,6 +9,7 @@
 from doubleml.double_ml_score_mixins import LinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 
 
 # TODO: Remove DoubleMLDIDData with version 0.12.0
@@ -427,6 +428,83 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        """
+        Optuna-based hyperparameter tuning for DID nuisance models.
+
+        Performs tuning once on the whole dataset using cross-validation,
+        returning the same optimal parameters for all folds.
+        """
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            if self.score == "observational":
+                scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None}
+            else:
+                scoring_methods = {"ml_g0": None, "ml_g1": None}
+
+        # Separate data by treatment status for conditional mean tuning
+        mask_d0 = d == 0
+        mask_d1 = d == 1
+
+        x_d0 = x[mask_d0, :]
+        y_d0 = y[mask_d0]
+        g0_tune_res = _dml_tune_optuna(
+            y_d0,
+            x_d0,
+            self._learner["ml_g"],
+            optuna_params["ml_g0"],
+            scoring_methods["ml_g0"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g0",
+        )
+
+        x_d1 = x[mask_d1, :]
+        y_d1 = y[mask_d1]
+        g1_tune_res = _dml_tune_optuna(
+            y_d1,
+            x_d1,
+            self._learner["ml_g"],
+            optuna_params["ml_g1"],
+            scoring_methods["ml_g1"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g1",
+        )
+
+        # Tune propensity score on full dataset for observational score
+        m_tune_res = None
+        if self.score == "observational":
+            m_tune_res = _dml_tune_optuna(
+                d,
+                x,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+        if self.score == "observational":
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res}
+        else:
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res}
+
+        return results
+
     def sensitivity_benchmark(self, benchmarking_set, fit_args=None):
         """
         Computes a benchmark for a given set of features.
 
@@ -23,6 +23,7 @@
     _check_score,
 )
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -666,6 +667,79 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False)
+        x, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            if self.score == "observational":
+                scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None}
+            else:
+                scoring_methods = {"ml_g0": None, "ml_g1": None}
+
+        mask_d0 = d == 0
+        mask_d1 = d == 1
+
+        x_d0 = x[mask_d0, :]
+        y_d0 = y[mask_d0]
+        g0_param_grid = optuna_params["ml_g0"]
+        g0_scoring = scoring_methods["ml_g0"]
+        g0_tune_res = _dml_tune_optuna(
+            y_d0,
+            x_d0,
+            self._learner["ml_g"],
+            g0_param_grid,
+            g0_scoring,
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g0",
+        )
+
+        x_d1 = x[mask_d1, :]
+        y_d1 = y[mask_d1]
+        g1_param_grid = optuna_params["ml_g1"]
+        g1_scoring = scoring_methods["ml_g1"]
+        g1_tune_res = _dml_tune_optuna(
+            y_d1,
+            x_d1,
+            self._learner["ml_g"],
+            g1_param_grid,
+            g1_scoring,
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g1",
+        )
+
+        m_tune_res = None
+        if self.score == "observational":
+            m_tune_res = _dml_tune_optuna(
+                d,
+                x,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+        if self.score == "observational":
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res}
+        else:
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res}
+
+        return results
+
     def _sensitivity_element_est(self, preds):
         y = self._y_data_subset
         d = self._g_data_subset