From 17b019256ad91b6fdd9f4583676e3b170e0049bd Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 1 Apr 2026 18:47:15 +0200 Subject: [PATCH 1/3] feat(scores): add TEXT type to score overloads and docstrings Extend string-value overloads in create_score, score_current_span, score_current_trace, score, and score_trace to accept TEXT alongside CATEGORICAL. Update all related docstrings. Add ExperimentScoreType to exclude TEXT from experiments/evals. Add integration test for TEXT scores. Co-Authored-By: Claude Opus 4.6 (1M context) --- langfuse/_client/client.py | 18 ++++++------ langfuse/_client/span.py | 12 ++++---- langfuse/experiment.py | 5 ++-- langfuse/types.py | 5 +++- tests/test_core_sdk.py | 58 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 18 deletions(-) diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index 85ec83a4e..45b4ede5a 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -1747,7 +1747,7 @@ def create_score( trace_id: Optional[str] = None, score_id: Optional[str] = None, observation_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -1777,13 +1777,13 @@ def create_score( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) session_id: ID of the Langfuse session to associate the score with dataset_run_id: ID of the Langfuse dataset run to associate the score with trace_id: ID of the Langfuse trace to associate the score with observation_id: Optional ID of the specific observation to score. Trace ID must be provided too. score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -1907,7 +1907,7 @@ def score_current_span( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -1931,9 +1931,9 @@ def score_current_span( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score @@ -1997,7 +1997,7 @@ def score_current_trace( name: str, value: str, score_id: Optional[str] = None, - data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", + data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL", comment: Optional[str] = None, config_id: Optional[str] = None, metadata: Optional[Any] = None, @@ -2022,9 +2022,9 @@ def score_current_trace( Args: name: Name of the score (e.g., "user_satisfaction", "overall_quality") - value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL) + value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse metadata: Optional metadata to be attached to the score diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py index 2590262ce..599ca21f3 100644 --- a/langfuse/_client/span.py +++ b/langfuse/_client/span.py @@ -308,7 +308,7 @@ def score( value: str, score_id: Optional[str] = None, data_type: Optional[ - Literal[ScoreDataType.CATEGORICAL] + Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT] ] = ScoreDataType.CATEGORICAL, comment: Optional[str] = None, config_id: Optional[str] = None, @@ -335,9 +335,9 @@ def score( Args: name: Name of the score (e.g., "relevance", "accuracy") - value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL) + value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse timestamp: Optional timestamp for the score (defaults to current UTC time) @@ -395,7 +395,7 @@ def score_trace( value: str, score_id: Optional[str] = None, data_type: Optional[ - Literal[ScoreDataType.CATEGORICAL] + Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT] ] = ScoreDataType.CATEGORICAL, comment: Optional[str] = None, config_id: Optional[str] = None, @@ -423,9 +423,9 @@ def score_trace( Args: name: Name of the score (e.g., "user_satisfaction", "overall_quality") - value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL) + value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT) score_id: Optional custom ID for the score (auto-generated if not provided) - data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL) + data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT) comment: Optional comment or explanation for the score config_id: Optional ID of a score config defined in Langfuse timestamp: Optional timestamp for the score (defaults to current UTC time) diff --git a/langfuse/experiment.py b/langfuse/experiment.py index 6e4b32e10..67b50a900 100644 --- a/langfuse/experiment.py +++ b/langfuse/experiment.py @@ -17,8 +17,9 @@ Union, ) -from langfuse.api import DatasetItem, ScoreDataType +from langfuse.api import DatasetItem from langfuse.logger import langfuse_logger as logger +from langfuse.types import ExperimentScoreType class LocalExperimentItem(TypedDict, total=False): @@ -184,7 +185,7 @@ def __init__( value: Union[int, float, str, bool], comment: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, - data_type: Optional[ScoreDataType] = None, + data_type: Optional[ExperimentScoreType] = None, config_id: Optional[str] = None, ): """Initialize an Evaluation with the provided data. diff --git a/langfuse/types.py b/langfuse/types.py index 067088e40..39d4a1630 100644 --- a/langfuse/types.py +++ b/langfuse/types.py @@ -35,7 +35,10 @@ def my_evaluator(*, output: str, **kwargs) -> Evaluation: SpanLevel = Literal["DEBUG", "DEFAULT", "WARNING", "ERROR"] -ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"] +ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN", "TEXT"] + +# Text scores are not supported for evals and experiments +ExperimentScoreType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"] class MaskFunction(Protocol): diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index 91064de23..b05d5e8a5 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -321,6 +321,64 @@ def test_create_categorical_score(): assert created_score["stringValue"] == "high score" +def test_create_text_score(): + langfuse = Langfuse() + api_wrapper = LangfuseAPI() + + # Create a span and set trace properties + with langfuse.start_as_current_observation(name="test-span") as span: + with propagate_attributes( + trace_name="this-is-so-great-new", + user_id="test", + metadata={"test": "test"}, + ): + # Get trace ID for later use + trace_id = span.trace_id + + # Ensure data is sent + langfuse.flush() + sleep(2) + + # Create a text score + score_id = create_uuid() + langfuse.create_score( + score_id=score_id, + trace_id=trace_id, + name="this-is-a-score", + value="This is a detailed text evaluation of the output quality.", + data_type="TEXT", + ) + + # Create a generation in the same trace + generation = langfuse.start_observation( + as_type="generation", + name="yet another child", + metadata="test", + trace_context={"trace_id": trace_id}, + ) + generation.end() + + # Ensure data is sent + langfuse.flush() + sleep(2) + + # Retrieve and verify + trace = api_wrapper.get_trace(trace_id) + + # Find the score we created by name + created_score = next( + (s for s in trace["scores"] if s["name"] == "this-is-a-score"), None + ) + assert created_score is not None, "Score not found in trace" + assert created_score["id"] == score_id + assert created_score["dataType"] == "TEXT" + assert created_score["value"] is None + assert ( + created_score["stringValue"] + == "This is a detailed text evaluation of the output quality." + ) + + def test_create_score_with_custom_timestamp(): langfuse = Langfuse() api_wrapper = LangfuseAPI() From 25e446f3ed999e2b9b9d0dc61df20fbc829575a7 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 1 Apr 2026 18:53:13 +0200 Subject: [PATCH 2/3] docs: simplify openapi spec update instructions in CONTRIBUTING.md The spec update is now automated via PR from the langfuse repo. Co-Authored-By: Claude Opus 4.6 (1M context) --- CONTRIBUTING.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 53fede752..946400a5d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -45,9 +45,7 @@ uv run mypy langfuse --no-error-summary ### Update openapi spec -1. Generate Fern Python SDK in [langfuse](https://github.com/langfuse/langfuse) and copy the files generated in `generated/python` into the `langfuse/api` folder in this repo. -2. Execute the linter by running `uv run ruff format .` -3. Rebuild and deploy the package to PyPi. +A PR with the changes is automatically created upon changing the Spec in the langfuse repo. ### Publish release From b039ae75a612baf2794d4993b2ba9aec400f93e0 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 1 Apr 2026 19:02:09 +0200 Subject: [PATCH 3/3] fix(scores): update stale casts and export ExperimentScoreType Update cast(Literal["CATEGORICAL"], ...) to include "TEXT" in score/score_trace/score_current_span/score_current_trace impl bodies. Add ExperimentScoreType to __all__ in types.py. Co-Authored-By: Claude Opus 4.6 (1M context) --- langfuse/_client/client.py | 4 ++-- langfuse/_client/span.py | 4 ++-- langfuse/types.py | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index 45b4ede5a..04d8fae2c 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -1971,7 +1971,7 @@ def score_current_span( name=name, value=cast(str, value), score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, metadata=metadata, @@ -2060,7 +2060,7 @@ def score_current_trace( name=name, value=cast(str, value), score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, metadata=metadata, diff --git a/langfuse/_client/span.py b/langfuse/_client/span.py index 599ca21f3..bd0c638a7 100644 --- a/langfuse/_client/span.py +++ b/langfuse/_client/span.py @@ -364,7 +364,7 @@ def score( trace_id=self.trace_id, observation_id=self.id, score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, timestamp=timestamp, @@ -451,7 +451,7 @@ def score_trace( value=cast(str, value), trace_id=self.trace_id, score_id=score_id, - data_type=cast(Literal["CATEGORICAL"], data_type), + data_type=cast(Literal["CATEGORICAL", "TEXT"], data_type), comment=comment, config_id=config_id, timestamp=timestamp, diff --git a/langfuse/types.py b/langfuse/types.py index 39d4a1630..c3029e713 100644 --- a/langfuse/types.py +++ b/langfuse/types.py @@ -76,6 +76,7 @@ class TraceContext(TypedDict): __all__ = [ "SpanLevel", "ScoreDataType", + "ExperimentScoreType", "MaskFunction", "ParsedMediaReference", "TraceContext",