DataDog · Yun-Kim · Nov 3, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
@@ -1561,50 +1561,12 @@ def _set_dict_attribute(span: Span, key, value: Dict[str, Any]) -> None:
         existing_value.update(value)
         span._set_ctx_item(key, existing_value)
 
-    @classmethod
-    def submit_evaluation_for(
-        cls,
-        label: str,
-        metric_type: str,
-        value: Union[str, int, float, bool],
-        span: Optional[dict] = None,
-        span_with_tag_value: Optional[Dict[str, str]] = None,
-        tags: Optional[Dict[str, str]] = None,
-        ml_app: Optional[str] = None,
-        timestamp_ms: Optional[int] = None,
-        metadata: Optional[Dict[str, object]] = None,
-        assessment: Optional[str] = None,
-        reasoning: Optional[str] = None,
-    ) -> None:
-        """
-        Submits a custom evaluation metric for a given span. This method is deprecated and will be
-        removed in the next major version of ddtrace (4.0). Please use `LLMObs.submit_evaluation()` instead.
-        """
-        log.warning(
-            "LLMObs.submit_evaluation_for() is deprecated and will be removed in the next major "
-            "version of ddtrace (4.0). Please use LLMObs.submit_evaluation() instead."
-        )
-        return cls.submit_evaluation(
-            label=label,
-            metric_type=metric_type,
-            value=value,
-            span=span,
-            span_with_tag_value=span_with_tag_value,
-            tags=tags,
-            ml_app=ml_app,
-            timestamp_ms=timestamp_ms,
-            metadata=metadata,
-            assessment=assessment,
-            reasoning=reasoning,
-        )
-
     @classmethod
     def submit_evaluation(
         cls,
         label: str,
         metric_type: str,
         value: Union[str, int, float, bool],
-        span_context: Optional[Dict[str, str]] = None,
         span: Optional[dict] = None,
         span_with_tag_value: Optional[Dict[str, str]] = None,
         tags: Optional[Dict[str, str]] = None,
@@ -1621,9 +1583,6 @@ def submit_evaluation(
         :param str metric_type: The type of the evaluation metric. One of "categorical", "score", "boolean".
         :param value: The value of the evaluation metric.
                       Must be a string (categorical), integer (score), float (score), or boolean (boolean).
-        :param dict span_context: A dictionary containing the span_id and trace_id of interest. This is a
-                            deprecated parameter and will be removed in the next major version of
-                            ddtrace (4.0). Please use `span` or `span_with_tag_value` instead.
         :param dict span: A dictionary of shape {'span_id': str, 'trace_id': str} uniquely identifying
                             the span associated with this evaluation.
         :param dict span_with_tag_value: A dictionary with the format {'tag_key': str, 'tag_value': str}
@@ -1637,13 +1596,6 @@ def submit_evaluation(
         :param str assessment: An assessment of this evaluation. Must be either "pass" or "fail".
         :param str reasoning: An explanation of the evaluation result.
         """
-        if span_context is not None:
-            log.warning(
-                "The `span_context` parameter is deprecated and will be removed in the next major version of "
-                "ddtrace (4.0). Please use `span` or `span_with_tag_value` instead."
-            )
-            span = span or span_context
-
         if cls.enabled is False:
             log.debug(
                 "LLMObs.submit_evaluation() called when LLMObs is not enabled. ",

diff --git a/releasenotes/notes/remove-submit-evaluation-for-ef0c5a217eb18a46.yaml b/releasenotes/notes/remove-submit-evaluation-for-ef0c5a217eb18a46.yaml
@@ -0,0 +1,7 @@
+upgrade:
+  - |
+    LLM Observability: ``LLMObs.submit_evaluation_for()`` has been removed. Please use ``LLMObs.submit_evaluation()`` instead for submitting evaluations.
+    To migrate:
+        - ``LLMObs.submit_evaluation_for(...)`` users: rename to ``LLMObs.submit_evaluation(...)``
+        - ``LLMObs.submit_evaluation_for(...)`` users: rename the ``span_context`` argument to ``span``, i.e.
+        ``LLMObs.submit_evaluation(span_context={"span_id": ..., "trace_id": ...}, ...)`` to ``LLMObs.submit_evaluation(span={"span_id": ..., "trace_id": ...}, ...)``
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
@@ -1930,8 +1930,8 @@ def test_submit_evaluation_invalid_reasoning_raises_warning(llmobs, mock_llmobs_
     mock_llmobs_logs.warning.assert_called_once_with("Failed to parse reasoning. reasoning must be a string.")
 
 
-def test_submit_evaluation_for_enqueues_writer_with_reasoning(llmobs, mock_llmobs_eval_metric_writer):
-    llmobs.submit_evaluation_for(
+def test_submit_evaluation_enqueues_writer_with_reasoning(llmobs, mock_llmobs_eval_metric_writer):
+    llmobs.submit_evaluation(
         span={"span_id": "123", "trace_id": "456"},
         label="toxicity",
         metric_type="categorical",
@@ -1955,7 +1955,7 @@ def test_submit_evaluation_for_enqueues_writer_with_reasoning(llmobs, mock_llmob
         )
     )
     mock_llmobs_eval_metric_writer.reset()
-    llmobs.submit_evaluation_for(
+    llmobs.submit_evaluation(
         span={"span_id": "123", "trace_id": "456"},
         label="toxicity",
         metric_type="categorical",