Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 66 additions & 7 deletions src/galileo/__future__/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,21 @@
create_code_scorer_version_scorers_scorer_id_version_code_post,
create_scorers_post,
get_validate_code_scorer_task_result_scorers_code_validate_task_id_get,
update_scorers_scorer_id_patch,
validate_code_scorer_scorers_code_validate_post,
)
from galileo.resources.models import (
BodyCreateCodeScorerVersionScorersScorerIdVersionCodePost,
BodyValidateCodeScorerScorersCodeValidatePost,
CreateScorerRequest,
HTTPValidationError,
OutputTypeEnum,
ScorerTypes,
TaskResultStatus,
UpdateScorerRequest,
)
from galileo.resources.models.invalid_result import InvalidResult
from galileo.resources.types import File, Unset
from galileo.resources.types import UNSET, File, Unset
from galileo.schema.metrics import GalileoMetrics, LocalMetricConfig
from galileo.schema.metrics import Metric as LegacyMetric
from galileo.scorers import Scorers
Expand Down Expand Up @@ -408,21 +411,77 @@ def _populate_from_scorer_response(self, scorer_response: Any) -> None:
else:
self.node_level = None

def update(self, **kwargs: Any) -> None:
def update(self, **kwargs: Any) -> Metric:
"""
Update this metric's properties.

Currently not implemented as the API doesn't support updating scorers.
Accepts keyword arguments for any combination of the supported fields.
Only the fields explicitly passed are sent to the API; omitted fields
are left unchanged.

Parameters
----------
name (str): New name for the metric.
description (str): New description for the metric.
tags (list[str]): New tags for the metric.

Returns
-------
Metric: This metric instance with updated attributes.

Raises
------
NotImplementedError: Always raised as updates are not supported.
ValidationError: If this is a local metric (not server-side).
ValueError: If the metric has no ID set, unknown fields are passed,
or the API returns no response.
Exception: If the API call fails.

Examples
--------
metric = Metric.get(name="my-metric")
metric.update(name="renamed-metric", tags=["evaluation", "prod"])
"""
raise NotImplementedError(
"Updating metrics is not yet supported by the API. "
"Consider creating a new metric with the desired properties instead."
if isinstance(self, LocalMetric):
raise ValidationError("Local metrics don't exist on the server and can't be updated.")

if self.id is None:
raise ValueError("Metric ID is not set. Cannot update a local-only metric.")

if self.sync_state == SyncState.DELETED:
raise ValueError("Cannot update a deleted metric.")

# Only expose fields that are safe for user-facing updates.
# The API supports additional fields (model_type, ground_truth, output_type, etc.)
# but those are managed through dedicated methods or at creation time.
valid_fields = {"name", "description", "tags"}
invalid_fields = set(kwargs) - valid_fields
if invalid_fields:
raise ValueError(f"Invalid update fields: {sorted(invalid_fields)!r}. Valid fields: {sorted(valid_fields)}")

body = UpdateScorerRequest(
name=kwargs.get("name", UNSET), description=kwargs.get("description", UNSET), tags=kwargs.get("tags", UNSET)
)

try:
logger.info(f"Metric.update: id='{self.id}' name='{self.name}' - started")
config = GalileoPythonConfig.get()
response = update_scorers_scorer_id_patch.sync(scorer_id=self.id, client=config.api_client, body=body)

if isinstance(response, HTTPValidationError):
raise ValueError(f"Failed to update metric: {response.detail}")

if response is None:
raise ValueError(f"Unable to update metric: {self.id}")

self._populate_from_scorer_response(response)
self._set_state(SyncState.SYNCED)
logger.info(f"Metric.update: id='{self.id}' - completed")
return self
except Exception as e:
self._set_state(SyncState.FAILED_SYNC, error=e)
logger.error(f"Metric.update: id='{self.id}' - failed: {e}")
raise

def delete(self) -> None:
"""
Delete this metric.
Expand Down
95 changes: 91 additions & 4 deletions tests/future/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from galileo.__future__ import CodeMetric, LlmMetric, Metric
from galileo.__future__ import CodeMetric, LlmMetric, LocalMetric, Metric
from galileo.__future__.shared.base import SyncState
from galileo.__future__.shared.exceptions import ValidationError
from galileo.resources.models import OutputTypeEnum, ScorerTypes
Expand Down Expand Up @@ -554,13 +554,100 @@ def test_refresh_raises_error_when_metric_no_longer_exists(
class TestMetricUpdate:
"""Test suite for Metric.update() method."""

def test_update_raises_not_implemented_error(self, reset_configuration: None) -> None:
"""Test update() raises NotImplementedError."""
def test_update_local_metric_raises_validation_error(self, reset_configuration: None) -> None:
# Given: a local (non-server-side) metric
def scorer_fn(trace):
return 1.0

metric = LocalMetric(name="local-metric", scorer_fn=scorer_fn)

# When/Then: update() raises ValidationError because local metrics have no server record
with pytest.raises(ValidationError, match="Local metrics don't exist on the server"):
metric.update(name="New Name")

def test_update_without_id_raises_value_error(self, reset_configuration: None) -> None:
# Given: an LlmMetric that has never been synced (no ID)
metric = LlmMetric(name="Test Metric", prompt="Test prompt")

# When/Then: update() raises ValueError because there is no server ID to update
with pytest.raises(ValueError, match="Metric ID is not set"):
metric.update(name="New Name")

def test_update_deleted_metric_raises_value_error(self, reset_configuration: None) -> None:
# Given: a metric that has been deleted
metric = LlmMetric(name="Test Metric", prompt="Test prompt")
metric.id = str(uuid4())
metric._set_state(SyncState.DELETED)

with pytest.raises(NotImplementedError, match="not yet supported"):
# When/Then: update() raises ValueError because the metric has been deleted
with pytest.raises(ValueError, match="Cannot update a deleted metric"):
metric.update(name="New Name")

def test_update_with_invalid_fields_raises_value_error(self, reset_configuration: None) -> None:
# Given: a synced metric with an unknown kwarg
metric = LlmMetric(name="Test Metric", prompt="Test prompt")
metric.id = str(uuid4())
metric._set_state(SyncState.SYNCED)

# When/Then: update() raises ValueError listing the invalid field
with pytest.raises(ValueError, match="Invalid update fields"):
metric.update(unknown_field="value")

@patch("galileo.__future__.metric.GalileoPythonConfig.get")
@patch("galileo.__future__.metric.update_scorers_scorer_id_patch")
def test_update_calls_api_and_syncs_attributes(
self, mock_patch_api: MagicMock, mock_config_get: MagicMock, reset_configuration: None
) -> None:
# Given: a synced metric and a mocked API response
metric_id = str(uuid4())
mock_config_get.return_value.api_client = MagicMock()

mock_response = MagicMock()
mock_response.id = metric_id
mock_response.name = "Renamed Metric"
mock_response.scorer_type = MagicMock()
mock_response.tags = ["eval"]
mock_response.description = "updated"
mock_response.created_at = MagicMock()
mock_response.updated_at = MagicMock()
mock_response.defaults = None
mock_patch_api.sync.return_value = mock_response

metric = LlmMetric(name="Test Metric", prompt="Test prompt")
metric.id = metric_id
metric._set_state(SyncState.SYNCED)

# When: update() is called with a new name and tags
result = metric.update(name="Renamed Metric", tags=["eval"])

# Then: the API is called with the correct body and attributes are updated
mock_patch_api.sync.assert_called_once()
call_kwargs = mock_patch_api.sync.call_args.kwargs
assert call_kwargs["scorer_id"] == metric_id
assert call_kwargs["body"].name == "Renamed Metric"
assert call_kwargs["body"].tags == ["eval"]
assert result.name == "Renamed Metric"
assert result.is_synced()

@patch("galileo.__future__.metric.GalileoPythonConfig.get")
@patch("galileo.__future__.metric.update_scorers_scorer_id_patch")
def test_update_handles_api_failure(
self, mock_patch_api: MagicMock, mock_config_get: MagicMock, reset_configuration: None
) -> None:
# Given: a synced metric and an API that raises an error
mock_config_get.return_value.api_client = MagicMock()
mock_patch_api.sync.side_effect = RuntimeError("API error")

metric = LlmMetric(name="Test Metric", prompt="Test prompt")
metric.id = str(uuid4())
metric._set_state(SyncState.SYNCED)

# When/Then: the exception propagates and state is FAILED_SYNC
with pytest.raises(RuntimeError, match="API error"):
metric.update(name="New Name")

assert metric.sync_state == SyncState.FAILED_SYNC


class TestMetricMethods:
"""Test suite for other Metric methods."""
Expand Down
Loading