From aab3120c8d4ea09d4e7864ca650fa2e6adf463d7 Mon Sep 17 00:00:00 2001 From: thiagobomfin-galileo Date: Fri, 13 Mar 2026 13:44:31 -0300 Subject: [PATCH 1/3] feat(future): implement Metric.update() --- src/galileo/__future__/metric.py | 67 ++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/src/galileo/__future__/metric.py b/src/galileo/__future__/metric.py index 430c2195..cf052d33 100644 --- a/src/galileo/__future__/metric.py +++ b/src/galileo/__future__/metric.py @@ -21,18 +21,21 @@ create_code_scorer_version_scorers_scorer_id_version_code_post, create_scorers_post, get_validate_code_scorer_task_result_scorers_code_validate_task_id_get, + update_scorers_scorer_id_patch, validate_code_scorer_scorers_code_validate_post, ) from galileo.resources.models import ( BodyCreateCodeScorerVersionScorersScorerIdVersionCodePost, BodyValidateCodeScorerScorersCodeValidatePost, CreateScorerRequest, + HTTPValidationError, OutputTypeEnum, ScorerTypes, TaskResultStatus, + UpdateScorerRequest, ) from galileo.resources.models.invalid_result import InvalidResult -from galileo.resources.types import File, Unset +from galileo.resources.types import UNSET, File, Unset from galileo.schema.metrics import GalileoMetrics, LocalMetricConfig from galileo.schema.metrics import Metric as LegacyMetric from galileo.scorers import Scorers @@ -408,21 +411,71 @@ def _populate_from_scorer_response(self, scorer_response: Any) -> None: else: self.node_level = None - def update(self, **kwargs: Any) -> None: + def update(self, **kwargs: Any) -> Metric: """ Update this metric's properties. - Currently not implemented as the API doesn't support updating scorers. + Accepts keyword arguments for any combination of the supported fields. + Only the fields explicitly passed are sent to the API; omitted fields + are left unchanged. + + Parameters + ---------- + name (str): New name for the metric. + description (str): New description for the metric. + tags (list[str]): New tags for the metric. + + Returns + ------- + Metric: This metric instance with updated attributes. Raises ------ - NotImplementedError: Always raised as updates are not supported. + ValidationError: If this is a local metric (not server-side). + ValueError: If the metric has no ID set, unknown fields are passed, + or the API returns no response. + Exception: If the API call fails. + + Examples + -------- + metric = Metric.get(name="my-metric") + metric.update(name="renamed-metric", tags=["evaluation", "prod"]) """ - raise NotImplementedError( - "Updating metrics is not yet supported by the API. " - "Consider creating a new metric with the desired properties instead." + if isinstance(self, LocalMetric): + raise ValidationError("Local metrics don't exist on the server and can't be updated.") + + if self.id is None: + raise ValueError("Metric ID is not set. Cannot update a local-only metric.") + + valid_fields = {"name", "description", "tags"} + invalid_fields = set(kwargs) - valid_fields + if invalid_fields: + raise ValueError(f"Invalid update fields: {sorted(invalid_fields)!r}. Valid fields: {sorted(valid_fields)}") + + body = UpdateScorerRequest( + name=kwargs.get("name", UNSET), description=kwargs.get("description", UNSET), tags=kwargs.get("tags", UNSET) ) + try: + logger.info(f"Metric.update: id='{self.id}' name='{self.name}' - started") + config = GalileoPythonConfig.get() + response = update_scorers_scorer_id_patch.sync(scorer_id=self.id, client=config.api_client, body=body) + + if isinstance(response, HTTPValidationError): + raise ValueError(f"Failed to update metric: {response.detail}") + + if response is None: + raise ValueError(f"Unable to update metric: {self.id}") + + self._populate_from_scorer_response(response) + self._set_state(SyncState.SYNCED) + logger.info(f"Metric.update: id='{self.id}' - completed") + return self + except Exception as e: + self._set_state(SyncState.FAILED_SYNC, error=e) + logger.error(f"Metric.update: id='{self.id}' - failed: {e}") + raise + def delete(self) -> None: """ Delete this metric. From 4df5469aabcac7a0ce61721e1ab4322395ab17ac Mon Sep 17 00:00:00 2001 From: thiagobomfin-galileo Date: Fri, 13 Mar 2026 13:45:12 -0300 Subject: [PATCH 2/3] test(future): add TestMetricUpdate covering all guard conditions and the update flow --- tests/future/test_metric.py | 85 +++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 4 deletions(-) diff --git a/tests/future/test_metric.py b/tests/future/test_metric.py index e79891ad..072d4d5d 100644 --- a/tests/future/test_metric.py +++ b/tests/future/test_metric.py @@ -5,7 +5,7 @@ import pytest -from galileo.__future__ import CodeMetric, LlmMetric, Metric +from galileo.__future__ import CodeMetric, LlmMetric, LocalMetric, Metric from galileo.__future__.shared.base import SyncState from galileo.__future__.shared.exceptions import ValidationError from galileo.resources.models import OutputTypeEnum, ScorerTypes @@ -554,13 +554,90 @@ def test_refresh_raises_error_when_metric_no_longer_exists( class TestMetricUpdate: """Test suite for Metric.update() method.""" - def test_update_raises_not_implemented_error(self, reset_configuration: None) -> None: - """Test update() raises NotImplementedError.""" + def test_update_local_metric_raises_validation_error(self, reset_configuration: None) -> None: + # Given: a local (non-server-side) metric + def scorer_fn(trace): + return 1.0 + + metric = LocalMetric(name="local-metric", scorer_fn=scorer_fn) + + # When/Then: update() raises ValidationError because local metrics have no server record + with pytest.raises(ValidationError, match="Local metrics don't exist on the server"): + metric.update(name="New Name") + + def test_update_without_id_raises_value_error(self, reset_configuration: None) -> None: + # Given: an LlmMetric that has never been synced (no ID) metric = LlmMetric(name="Test Metric", prompt="Test prompt") - with pytest.raises(NotImplementedError, match="not yet supported"): + # When/Then: update() raises ValueError because there is no server ID to update + with pytest.raises(ValueError, match="Metric ID is not set"): metric.update(name="New Name") + def test_update_with_invalid_fields_raises_value_error(self, reset_configuration: None) -> None: + # Given: a synced metric with an unknown kwarg + metric = LlmMetric(name="Test Metric", prompt="Test prompt") + metric.id = str(uuid4()) + metric._set_state(SyncState.SYNCED) + + # When/Then: update() raises ValueError listing the invalid field + with pytest.raises(ValueError, match="Invalid update fields"): + metric.update(unknown_field="value") + + @patch("galileo.__future__.metric.GalileoPythonConfig.get") + @patch("galileo.__future__.metric.update_scorers_scorer_id_patch") + def test_update_calls_api_and_syncs_attributes( + self, mock_patch_api: MagicMock, mock_config_get: MagicMock, reset_configuration: None + ) -> None: + # Given: a synced metric and a mocked API response + metric_id = str(uuid4()) + mock_config_get.return_value.api_client = MagicMock() + + mock_response = MagicMock() + mock_response.id = metric_id + mock_response.name = "Renamed Metric" + mock_response.scorer_type = MagicMock() + mock_response.tags = ["eval"] + mock_response.description = "updated" + mock_response.created_at = MagicMock() + mock_response.updated_at = MagicMock() + mock_response.defaults = None + mock_patch_api.sync.return_value = mock_response + + metric = LlmMetric(name="Test Metric", prompt="Test prompt") + metric.id = metric_id + metric._set_state(SyncState.SYNCED) + + # When: update() is called with a new name and tags + result = metric.update(name="Renamed Metric", tags=["eval"]) + + # Then: the API is called with the correct body and attributes are updated + mock_patch_api.sync.assert_called_once() + call_kwargs = mock_patch_api.sync.call_args.kwargs + assert call_kwargs["scorer_id"] == metric_id + assert call_kwargs["body"].name == "Renamed Metric" + assert call_kwargs["body"].tags == ["eval"] + assert result.name == "Renamed Metric" + assert result.is_synced() + + @patch("galileo.__future__.metric.GalileoPythonConfig.get") + @patch("galileo.__future__.metric.update_scorers_scorer_id_patch") + def test_update_handles_api_failure( + self, mock_patch_api: MagicMock, mock_config_get: MagicMock, reset_configuration: None + ) -> None: + # Given: a synced metric and an API that raises an error + mock_config_get.return_value.api_client = MagicMock() + mock_patch_api.sync.side_effect = RuntimeError("API error") + + metric = LlmMetric(name="Test Metric", prompt="Test prompt") + metric.id = str(uuid4()) + metric._set_state(SyncState.SYNCED) + + # When/Then: the exception propagates and state is FAILED_SYNC + with pytest.raises(RuntimeError, match="API error"): + metric.update(name="New Name") + + assert metric.sync_state == SyncState.FAILED_SYNC + class TestMetricMethods: """Test suite for other Metric methods.""" From f2166b5b95ee26234f562be2a5ba584b3471ef7d Mon Sep 17 00:00:00 2001 From: thiagobomfin-galileo Date: Wed, 18 Mar 2026 19:10:18 -0300 Subject: [PATCH 3/3] fix ci --- src/galileo/__future__/metric.py | 6 ++++++ tests/future/test_metric.py | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/galileo/__future__/metric.py b/src/galileo/__future__/metric.py index cf052d33..a76adbe5 100644 --- a/src/galileo/__future__/metric.py +++ b/src/galileo/__future__/metric.py @@ -447,6 +447,12 @@ def update(self, **kwargs: Any) -> Metric: if self.id is None: raise ValueError("Metric ID is not set. Cannot update a local-only metric.") + if self.sync_state == SyncState.DELETED: + raise ValueError("Cannot update a deleted metric.") + + # Only expose fields that are safe for user-facing updates. + # The API supports additional fields (model_type, ground_truth, output_type, etc.) + # but those are managed through dedicated methods or at creation time. valid_fields = {"name", "description", "tags"} invalid_fields = set(kwargs) - valid_fields if invalid_fields: diff --git a/tests/future/test_metric.py b/tests/future/test_metric.py index 072d4d5d..395b8c48 100644 --- a/tests/future/test_metric.py +++ b/tests/future/test_metric.py @@ -573,6 +573,16 @@ def test_update_without_id_raises_value_error(self, reset_configuration: None) - with pytest.raises(ValueError, match="Metric ID is not set"): metric.update(name="New Name") + def test_update_deleted_metric_raises_value_error(self, reset_configuration: None) -> None: + # Given: a metric that has been deleted + metric = LlmMetric(name="Test Metric", prompt="Test prompt") + metric.id = str(uuid4()) + metric._set_state(SyncState.DELETED) + + # When/Then: update() raises ValueError because the metric has been deleted + with pytest.raises(ValueError, match="Cannot update a deleted metric"): + metric.update(name="New Name") + def test_update_with_invalid_fields_raises_value_error(self, reset_configuration: None) -> None: # Given: a synced metric with an unknown kwarg metric = LlmMetric(name="Test Metric", prompt="Test prompt")