From 17b019256ad91b6fdd9f4583676e3b170e0049bd Mon Sep 17 00:00:00 2001
From: Tobias Wochinger <tobias.wochinger@clickhouse.com>
Date: Wed, 1 Apr 2026 18:47:15 +0200
Subject: [PATCH 1/3] feat(scores): add TEXT type to score overloads and
 docstrings

Extend string-value overloads in create_score, score_current_span,
score_current_trace, score, and score_trace to accept TEXT alongside
CATEGORICAL. Update all related docstrings. Add ExperimentScoreType
to exclude TEXT from experiments/evals. Add integration test for TEXT
scores.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 langfuse/_client/client.py | 18 ++++++------
 langfuse/_client/span.py   | 12 ++++----
 langfuse/experiment.py     |  5 ++--
 langfuse/types.py          |  5 +++-
 tests/test_core_sdk.py     | 58 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py
index 85ec83a4e..45b4ede5a 100644
--- a/langfuse/_client/client.py
+++ b/langfuse/_client/client.py
@@ -1747,7 +1747,7 @@ def create_score(
         trace_id: Optional[str] = None,
         score_id: Optional[str] = None,
         observation_id: Optional[str] = None,
-        data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
+        data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL",
         comment: Optional[str] = None,
         config_id: Optional[str] = None,
         metadata: Optional[Any] = None,
@@ -1777,13 +1777,13 @@ def create_score(
 
         Args:
             name: Name of the score (e.g., "relevance", "accuracy")
-            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
+            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT)
             session_id: ID of the Langfuse session to associate the score with
             dataset_run_id: ID of the Langfuse dataset run to associate the score with
             trace_id: ID of the Langfuse trace to associate the score with
             observation_id: Optional ID of the specific observation to score. Trace ID must be provided too.
             score_id: Optional custom ID for the score (auto-generated if not provided)
-            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
+            data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
             comment: Optional comment or explanation for the score
             config_id: Optional ID of a score config defined in Langfuse
             metadata: Optional metadata to be attached to the score
@@ -1907,7 +1907,7 @@ def score_current_span(
         name: str,
         value: str,
         score_id: Optional[str] = None,
-        data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
+        data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL",
         comment: Optional[str] = None,
         config_id: Optional[str] = None,
         metadata: Optional[Any] = None,
@@ -1931,9 +1931,9 @@ def score_current_span(
 
         Args:
             name: Name of the score (e.g., "relevance", "accuracy")
-            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
+            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT)
             score_id: Optional custom ID for the score (auto-generated if not provided)
-            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
+            data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
             comment: Optional comment or explanation for the score
             config_id: Optional ID of a score config defined in Langfuse
             metadata: Optional metadata to be attached to the score
@@ -1997,7 +1997,7 @@ def score_current_trace(
         name: str,
         value: str,
         score_id: Optional[str] = None,
-        data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
+        data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL",
         comment: Optional[str] = None,
         config_id: Optional[str] = None,
         metadata: Optional[Any] = None,
@@ -2022,9 +2022,9 @@ def score_current_trace(
 
         Args:
             name: Name of the score (e.g., "user_satisfaction", "overall_quality")
-            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
+            value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT)
             score_id: Optional custom ID for the score (auto-generated if not provided)
-            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
+            data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
             comment: Optional comment or explanation for the score
             config_id: Optional ID of a score config defined in Langfuse
             metadata: Optional metadata to be attached to the score
diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py
index 2590262ce..599ca21f3 100644
--- a/langfuse/_client/span.py
+++ b/langfuse/_client/span.py
@@ -308,7 +308,7 @@ def score(
         value: str,
         score_id: Optional[str] = None,
         data_type: Optional[
-            Literal[ScoreDataType.CATEGORICAL]
+            Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT]
         ] = ScoreDataType.CATEGORICAL,
         comment: Optional[str] = None,
         config_id: Optional[str] = None,
@@ -335,9 +335,9 @@ def score(
 
         Args:
             name: Name of the score (e.g., "relevance", "accuracy")
-            value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL)
+            value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT)
             score_id: Optional custom ID for the score (auto-generated if not provided)
-            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
+            data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
             comment: Optional comment or explanation for the score
             config_id: Optional ID of a score config defined in Langfuse
             timestamp: Optional timestamp for the score (defaults to current UTC time)
@@ -395,7 +395,7 @@ def score_trace(
         value: str,
         score_id: Optional[str] = None,
         data_type: Optional[
-            Literal[ScoreDataType.CATEGORICAL]
+            Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT]
         ] = ScoreDataType.CATEGORICAL,
         comment: Optional[str] = None,
         config_id: Optional[str] = None,
@@ -423,9 +423,9 @@ def score_trace(
 
         Args:
             name: Name of the score (e.g., "user_satisfaction", "overall_quality")
-            value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL)
+            value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT)
             score_id: Optional custom ID for the score (auto-generated if not provided)
-            data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
+            data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
             comment: Optional comment or explanation for the score
             config_id: Optional ID of a score config defined in Langfuse
             timestamp: Optional timestamp for the score (defaults to current UTC time)
diff --git a/langfuse/experiment.py b/langfuse/experiment.py
index 6e4b32e10..67b50a900 100644
--- a/langfuse/experiment.py
+++ b/langfuse/experiment.py
@@ -17,8 +17,9 @@
     Union,
 )
 
-from langfuse.api import DatasetItem, ScoreDataType
+from langfuse.api import DatasetItem
 from langfuse.logger import langfuse_logger as logger
+from langfuse.types import ExperimentScoreType
 
 
 class LocalExperimentItem(TypedDict, total=False):
@@ -184,7 +185,7 @@ def __init__(
         value: Union[int, float, str, bool],
         comment: Optional[str] = None,
         metadata: Optional[Dict[str, Any]] = None,
-        data_type: Optional[ScoreDataType] = None,
+        data_type: Optional[ExperimentScoreType] = None,
         config_id: Optional[str] = None,
     ):
         """Initialize an Evaluation with the provided data.
diff --git a/langfuse/types.py b/langfuse/types.py
index 067088e40..39d4a1630 100644
--- a/langfuse/types.py
+++ b/langfuse/types.py
@@ -35,7 +35,10 @@ def my_evaluator(*, output: str, **kwargs) -> Evaluation:
 
 SpanLevel = Literal["DEBUG", "DEFAULT", "WARNING", "ERROR"]
 
-ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"]
+ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN", "TEXT"]
+
+# Text scores are not supported for evals and experiments
+ExperimentScoreType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"]
 
 
 class MaskFunction(Protocol):
diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py
index 91064de23..b05d5e8a5 100644
--- a/tests/test_core_sdk.py
+++ b/tests/test_core_sdk.py
@@ -321,6 +321,64 @@ def test_create_categorical_score():
     assert created_score["stringValue"] == "high score"
 
 
+def test_create_text_score():
+    langfuse = Langfuse()
+    api_wrapper = LangfuseAPI()
+
+    # Create a span and set trace properties
+    with langfuse.start_as_current_observation(name="test-span") as span:
+        with propagate_attributes(
+            trace_name="this-is-so-great-new",
+            user_id="test",
+            metadata={"test": "test"},
+        ):
+            # Get trace ID for later use
+            trace_id = span.trace_id
+
+    # Ensure data is sent
+    langfuse.flush()
+    sleep(2)
+
+    # Create a text score
+    score_id = create_uuid()
+    langfuse.create_score(
+        score_id=score_id,
+        trace_id=trace_id,
+        name="this-is-a-score",
+        value="This is a detailed text evaluation of the output quality.",
+        data_type="TEXT",
+    )
+
+    # Create a generation in the same trace
+    generation = langfuse.start_observation(
+        as_type="generation",
+        name="yet another child",
+        metadata="test",
+        trace_context={"trace_id": trace_id},
+    )
+    generation.end()
+
+    # Ensure data is sent
+    langfuse.flush()
+    sleep(2)
+
+    # Retrieve and verify
+    trace = api_wrapper.get_trace(trace_id)
+
+    # Find the score we created by name
+    created_score = next(
+        (s for s in trace["scores"] if s["name"] == "this-is-a-score"), None
+    )
+    assert created_score is not None, "Score not found in trace"
+    assert created_score["id"] == score_id
+    assert created_score["dataType"] == "TEXT"
+    assert created_score["value"] is None
+    assert (
+        created_score["stringValue"]
+        == "This is a detailed text evaluation of the output quality."
+    )
+
+
 def test_create_score_with_custom_timestamp():
     langfuse = Langfuse()
     api_wrapper = LangfuseAPI()

From 25e446f3ed999e2b9b9d0dc61df20fbc829575a7 Mon Sep 17 00:00:00 2001
From: Tobias Wochinger <tobias.wochinger@clickhouse.com>
Date: Wed, 1 Apr 2026 18:53:13 +0200
Subject: [PATCH 2/3] docs: simplify openapi spec update instructions in
 CONTRIBUTING.md

The spec update is now automated via PR from the langfuse repo.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CONTRIBUTING.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 53fede752..946400a5d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -45,9 +45,7 @@ uv run mypy langfuse --no-error-summary
 
 ### Update openapi spec
 
-1. Generate Fern Python SDK in [langfuse](https://github.com/langfuse/langfuse) and copy the files generated in `generated/python` into the `langfuse/api` folder in this repo.
-2. Execute the linter by running `uv run ruff format .`
-3. Rebuild and deploy the package to PyPi.
+A PR with the changes is automatically created upon changing the Spec in the langfuse repo.
 
 ### Publish release
 

From b039ae75a612baf2794d4993b2ba9aec400f93e0 Mon Sep 17 00:00:00 2001
From: Tobias Wochinger <tobias.wochinger@clickhouse.com>
Date: Wed, 1 Apr 2026 19:02:09 +0200
Subject: [PATCH 3/3] fix(scores): update stale casts and export
 ExperimentScoreType

Update cast(Literal["CATEGORICAL"], ...) to include "TEXT" in
score/score_trace/score_current_span/score_current_trace impl bodies.
Add ExperimentScoreType to __all__ in types.py.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 langfuse/_client/client.py | 4 ++--
 langfuse/_client/span.py   | 4 ++--
 langfuse/types.py          | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py
index 45b4ede5a..04d8fae2c 100644
--- a/langfuse/_client/client.py
+++ b/langfuse/_client/client.py
@@ -1971,7 +1971,7 @@ def score_current_span(
                 name=name,
                 value=cast(str, value),
                 score_id=score_id,
-                data_type=cast(Literal["CATEGORICAL"], data_type),
+                data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type),
                 comment=comment,
                 config_id=config_id,
                 metadata=metadata,
@@ -2060,7 +2060,7 @@ def score_current_trace(
                 name=name,
                 value=cast(str, value),
                 score_id=score_id,
-                data_type=cast(Literal["CATEGORICAL"], data_type),
+                data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type),
                 comment=comment,
                 config_id=config_id,
                 metadata=metadata,
diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py
index 599ca21f3..bd0c638a7 100644
--- a/langfuse/_client/span.py
+++ b/langfuse/_client/span.py
@@ -364,7 +364,7 @@ def score(
             trace_id=self.trace_id,
             observation_id=self.id,
             score_id=score_id,
-            data_type=cast(Literal["CATEGORICAL"], data_type),
+            data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type),
             comment=comment,
             config_id=config_id,
             timestamp=timestamp,
@@ -451,7 +451,7 @@ def score_trace(
             value=cast(str, value),
             trace_id=self.trace_id,
             score_id=score_id,
-            data_type=cast(Literal["CATEGORICAL"], data_type),
+            data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type),
             comment=comment,
             config_id=config_id,
             timestamp=timestamp,
diff --git a/langfuse/types.py b/langfuse/types.py
index 39d4a1630..c3029e713 100644
--- a/langfuse/types.py
+++ b/langfuse/types.py
@@ -76,6 +76,7 @@ class TraceContext(TypedDict):
 __all__ = [
     "SpanLevel",
     "ScoreDataType",
+    "ExperimentScoreType",
     "MaskFunction",
     "ParsedMediaReference",
     "TraceContext",