Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions src/galileo/__future__/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,18 +526,52 @@ def save(self) -> Dataset:
"""
Save changes to this dataset.

This method is a placeholder for future functionality to update
dataset properties.
Persists any local modifications to the API. If the dataset has not been
created yet (LOCAL_ONLY state), this calls create() instead. If already
synced with no pending changes, this is a no-op.

Returns
-------
Dataset: This dataset instance.

Raises
------
NotImplementedError: This functionality is not yet implemented.
ValueError: If the dataset has been deleted or has no ID set.
Exception: If the API call fails.

Examples
--------
dataset = Dataset.get(name="my-dataset")
dataset.name = "renamed-dataset"
dataset._set_state(SyncState.DIRTY)
dataset.save()
assert dataset.is_synced()
"""
raise NotImplementedError(
"Dataset updates are not yet implemented. "
"Use add_rows() to add content or other specific methods to modify dataset state."
)
if self.sync_state == SyncState.LOCAL_ONLY:
return self.create()
if self.sync_state == SyncState.SYNCED:
logger.debug(f"Dataset.save: id='{self.id}' - already synced, no action needed")
return self
Comment on lines +550 to +554
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dataset.save now re-implements the entire state-transition + persistence workflow (LOCAL_ONLY -> create, SYNCED no-op, DELETED guard) before calling Datasets.update, which is essentially the same block that Project.save copies verbatim (see project.py lines 785‑809). Any change to the state checks, logging, or _set_state error handling now needs to be made in two places. Can we consolidate this into StateManagementMixin (e.g. a shared _persist_changes(create_fn, update_fn, attr_sync) helper) so dataset/project save reuse one canonical workflow and only supply the resource-specific update call?

Finding type: Code Dedup and Conventions | Severity: 🟢 Low


Want Baz to fix this for you? Activate Fixer

if self.sync_state == SyncState.DELETED:
raise ValueError("Cannot save a deleted dataset.")

# DIRTY or FAILED_SYNC
if self.id is None:
raise ValueError("Dataset ID is not set. Cannot update a dataset without an ID.")

try:
logger.info(f"Dataset.save: name='{self.name}' id='{self.id}' - started")
datasets_service = Datasets()
updated_dataset = datasets_service.update(self.id, name=self.name)

# Update attributes from response
self.name = updated_dataset.name
self.updated_at = updated_dataset.updated_at

self._set_state(SyncState.SYNCED)
Comment on lines +563 to +571
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dataset.save() claims to persist local modifications but only passes name to Datasets.update — should we include mutated fields such as draft in the update before marking SYNCED or restrict save() to renames and update its docstring?

Finding type: Logical Bugs | Severity: 🔴 High


Want Baz to fix this for you? Activate Fixer

Other fix methods

Fix in Cursor

Prompt for AI Agents:

In src/galileo/__future__/dataset.py around lines 563 to 571, the save() method
currently calls Datasets.update(self.id, name=self.name) and then marks the instance
SYNCED, but it ignores other mutable fields (e.g. draft, column_mapping) so local edits
are not sent to the API. Change save() to build an update payload from all mutable
attributes that should be persisted (at minimum include draft and column_mapping, plus
any other updatable fields the class exposes), pass that payload into
datasets_service.update(self.id, **payload), and then apply the returned updated_dataset
attributes (name, updated_at, draft, column_mapping, etc.) before setting state to
SYNCED. Also update the docstring to accurately describe which fields are persisted. If
adding fields is not desired, instead narrow the docstring and behavior to state that
save() only persists name changes and raise/handle accordingly.

logger.info(f"Dataset.save: id='{self.id}' - completed")
return self
except Exception as e:
self._set_state(SyncState.FAILED_SYNC, error=e)
logger.error(f"Dataset.save: id='{self.id}' - failed: {e}")
raise
49 changes: 43 additions & 6 deletions src/galileo/__future__/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from galileo.__future__.shared.base import StateManagementMixin, SyncState
from galileo.__future__.shared.exceptions import APIError, ValidationError
from galileo.projects import Projects
from galileo.resources.types import Unset

if TYPE_CHECKING:
from galileo.__future__.dataset import Dataset
Expand Down Expand Up @@ -760,20 +761,56 @@ def save(self) -> Project:
"""
Save changes to this project.

This method is a placeholder for future functionality to update
project properties.
Persists any local modifications to the API. If the project has not been
created yet (LOCAL_ONLY state), this calls create() instead. If already
synced with no pending changes, this is a no-op.

Returns
-------
Project: This project instance.

Raises
------
NotImplementedError: This functionality is not yet implemented.
ValueError: If the project has been deleted or has no ID set.
Exception: If the API call fails.

Examples
--------
project = Project.get(name="My Project")
project.name = "Renamed Project"
project._set_state(SyncState.DIRTY)
project.save()
assert project.is_synced()
"""
raise NotImplementedError(
"Project updates are not yet implemented. Use specific methods to modify project state."
)
if self.sync_state == SyncState.LOCAL_ONLY:
return self.create()
if self.sync_state == SyncState.SYNCED:
logger.debug(f"Project.save: id='{self.id}' - already synced, no action needed")
return self
if self.sync_state == SyncState.DELETED:
raise ValueError("Cannot save a deleted project.")

# DIRTY or FAILED_SYNC
if self.id is None:
raise ValueError("Project ID is not set. Cannot update a project without an ID.")

try:
logger.info(f"Project.save: name='{self.name}' id='{self.id}' - started")
projects_service = Projects()
updated_project = projects_service.update(self.id, name=self.name)

# Update attributes from response
if not isinstance(updated_project.name, Unset):
self.name = updated_project.name
self.updated_at = updated_project.updated_at
Comment on lines +797 to +805
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Project.save() only persists name but marks the project SYNCED, should we include other editable fields (at least type) in the update or prevent saving when only non-name fields changed?

Finding type: Logical Bugs | Severity: 🔴 High


Want Baz to fix this for you? Activate Fixer

Other fix methods

Fix in Cursor

Prompt for AI Agents:

In src/galileo/__future__/project.py around lines 797 to 805, the save() method
currently only sends name to Projects.update and only applies updated_project.name back
to self, so changes to other editable fields (e.g., type, labels, description) are never
persisted. Refactor save() to build an update payload that includes all editable fields
(name, type, labels, description, etc.) by checking each attribute against Unset and
including it if set; pass that payload into projects_service.update(self.id, ...). After
the API call, update each corresponding local attribute from the returned
updated_project (only when the returned value is not Unset) before setting the SyncState
to SYNCED so the local state accurately reflects the persisted remote state.


self._set_state(SyncState.SYNCED)
logger.info(f"Project.save: id='{self.id}' - completed")
return self
except Exception as e:
self._set_state(SyncState.FAILED_SYNC, error=e)
logger.error(f"Project.save: id='{self.id}' - failed: {e}")
raise


# Import at end to avoid circular import (log_stream.py imports Project)
Expand Down
50 changes: 50 additions & 0 deletions src/galileo/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
query_dataset_versions_datasets_dataset_id_versions_query_post,
query_datasets_datasets_query_post,
update_dataset_content_datasets_dataset_id_content_patch,
update_dataset_datasets_dataset_id_patch,
)
from galileo.resources.models import DatasetRow, ListDatasetVersionParams, ListDatasetVersionResponse
from galileo.resources.models.body_create_dataset_datasets_post import BodyCreateDatasetDatasetsPost
Expand All @@ -38,6 +39,7 @@
from galileo.resources.models.synthetic_dataset_extension_request import SyntheticDatasetExtensionRequest
from galileo.resources.models.synthetic_dataset_extension_response import SyntheticDatasetExtensionResponse
from galileo.resources.models.update_dataset_content_request import UpdateDatasetContentRequest
from galileo.resources.models.update_dataset_request import UpdateDatasetRequest
from galileo.resources.types import UNSET, File, Unset
from galileo.schema.datasets import DatasetRecord
from galileo.utils.datasets import validate_dataset_in_project
Expand Down Expand Up @@ -420,6 +422,54 @@ def delete(

return delete_dataset_datasets_dataset_id_delete.sync(client=self.config.api_client, dataset_id=dataset.id)

def update(self, dataset_id: str, *, name: Optional[str] = None) -> Dataset:
"""
Updates a dataset's properties.

Parameters
----------
dataset_id : str
The ID of the dataset to update.
name : str, optional
The new name for the dataset.

Returns
Comment on lines +425 to +436
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Datasets.update() performs a write but emits no lifecycle logs; AGENTS.md requires start/completion and failure logs with non-sensitive context. It raises DatasetAPIException/ValueError without logging failure details, so there's no audit trail or failure context for debugging. Can we add structured start/completion logs and log failures with non-sensitive context before raising to comply with AGENTS.md?

Finding type: AI Coding Guidelines | Severity: 🟠 Medium


Want Baz to fix this for you? Activate Fixer

Other fix methods

Fix in Cursor

Prompt for AI Agents:

In src/galileo/datasets.py around lines 425 to 460, the update method is missing
lifecycle logging. Add structured logs: log an INFO at the start of the update with
non-sensitive context {"dataset_id": dataset_id, "name": name}, log an INFO on
successful update with the same context plus the updated dataset id, and log an ERROR
before raising when the call returns HTTPValidationError, when response is empty
(ValueError), or on any other exception. Use the service logger available on the client
(e.g. self.config.logger) and include exception details in the error logs without
including sensitive payloads; ensure logs occur immediately before raising the
corresponding exceptions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit 609bcb7 addressed this comment by adding structured lifecycle logging to Datasets.update() with INFO logs at start and success and ERROR logs before raising for validation errors, empty responses, and exceptions.

-------
Dataset
The updated dataset.

Raises
------
DatasetAPIException
If the API request fails.
ValueError
If the server returns no response.
"""
logger.info("action='dataset.update' phase='start' dataset_id=%s name=%s", dataset_id, name)

body = UpdateDatasetRequest(name=name)

try:
Comment on lines +448 to +452
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Datasets.update sends name: null when name is None, violating the API's non-null name contract — should we only set name when provided (e.g. UpdateDatasetRequest(name=name if name is not None else UNSET))?

Finding type: Type Inconsistency | Severity: 🔴 High


Want Baz to fix this for you? Activate Fixer

Other fix methods

Fix in Cursor

Prompt for AI Agents:

In src/galileo/datasets.py around lines 448 to 452, the update method constructs
UpdateDatasetRequest(name=name) even when name is None which will serialize to name:
null and may clear a non-nullable dataset name. Change this so the request omits the
name field when the caller did not provide it: either pass name=UNSET when name is None
(using UNSET from galileo.resources.types) or build the request kwargs only with 'name'
if name is not None, then instantiate UpdateDatasetRequest(**kwargs). Ensure imports are
adjusted if needed and keep the existing logging/error handling intact.

response = update_dataset_datasets_dataset_id_patch.sync(
dataset_id=dataset_id, client=self.config.api_client, body=body
)
except Exception as e:
logger.error("action='dataset.update' phase='error' dataset_id=%s error=%s", dataset_id, e)
raise

if isinstance(response, HTTPValidationError):
logger.error(
"action='dataset.update' phase='error' dataset_id=%s validation_error=%s", dataset_id, response.detail
)
raise DatasetAPIException(response.detail)

if not response:
logger.error("action='dataset.update' phase='error' dataset_id=%s reason='empty response'", dataset_id)
raise ValueError(f"Unable to update dataset: {dataset_id}")

logger.info("action='dataset.update' phase='complete' dataset_id=%s name=%s", response.id, name)
return Dataset(dataset_db=response)

def create(
self, name: str, content: DatasetType, *, project_id: Optional[str] = None, project_name: Optional[str] = None
) -> Dataset:
Expand Down
58 changes: 58 additions & 0 deletions src/galileo/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
get_project_projects_project_id_get,
get_projects_projects_get,
list_user_project_collaborators_projects_project_id_users_get,
update_project_projects_project_id_put,
update_user_project_collaborator_projects_project_id_users_user_id_patch,
)
from galileo.resources.models.collaborator_role import CollaboratorRole
Expand All @@ -25,6 +26,8 @@
from galileo.resources.models.project_db import ProjectDB
from galileo.resources.models.project_db_thin import ProjectDBThin
from galileo.resources.models.project_type import ProjectType
from galileo.resources.models.project_update import ProjectUpdate
from galileo.resources.models.project_update_response import ProjectUpdateResponse
from galileo.resources.models.user_collaborator import UserCollaborator
from galileo.resources.models.user_collaborator_create import UserCollaboratorCreate
from galileo.resources.types import UNSET, Unset
Expand Down Expand Up @@ -297,6 +300,61 @@ def create(self, name: str) -> Project:

return Project(project=response)

def update(self, project_id: str, *, name: Optional[str] = None) -> ProjectUpdateResponse:
"""
Updates a project's properties.

Parameters
----------
project_id : str
The ID of the project to update.
name : str, optional
The new name for the project.

Returns
Comment on lines +303 to +314
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Projects.update() doesn't emit the required start/completion lifecycle logs (AGENTS.md requires write methods to log) and it doesn't log failure paths: ProjectsAPIException for non-200 responses and ValueError for empty responses are raised with no telemetry. These write failures are therefore silent in telemetry. Can we emit start/completion logs and log each failure path before raising?

Finding type: AI Coding Guidelines | Severity: 🟢 Low


Want Baz to fix this for you? Activate Fixer

Other fix methods

Fix in Cursor

Prompt for AI Agents:

In src/galileo/projects.py around lines 303-343, the update method is missing lifecycle
logging and failure telemetry. Add an initial start log (e.g. _logger.info with
action='project.update', phase='start', project_id=project_id, name=name), and add a
completion log on success (phase='complete', project_id, name). Before raising
ProjectsAPIException on non-200 responses, log an error with response.status_code and
content; before raising for HTTPValidationError, log the validation error details;
before raising ValueError for an empty response, log an error indicating no response was
returned. Use the existing _logger and keep logs concise and structured so every failure
path is logged prior to raising.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit 609bcb7 addressed this comment by adding lifecycle logs around Projects.update(), logging a start event, error events for non-200/validation/empty responses, and a completion event on success, ensuring every failure path records telemetry before raising.

-------
ProjectUpdateResponse
The updated project data returned by the API.

Raises
------
ProjectsAPIException
If the server returns an error response.
ValueError
If the server returns no response.
"""
_logger.info("action='project.update' phase='start' project_id=%s name=%s", project_id, name)

body = ProjectUpdate(name=name)

detailed_response = update_project_projects_project_id_put.sync_detailed(
project_id=project_id, client=self.config.api_client, body=body
)

if detailed_response.status_code != httpx.codes.OK:
_logger.error(
"action='project.update' phase='error' project_id=%s status=%s content=%s",
project_id,
detailed_response.status_code,
detailed_response.content,
)
raise ProjectsAPIException(detailed_response.content)

response = detailed_response.parsed

if isinstance(response, HTTPValidationError):
_logger.error(
"action='project.update' phase='error' project_id=%s validation_error=%s", project_id, response.detail
)
raise ProjectsAPIException(f"Failed to update project: {response.detail}")

if not response:
_logger.error("action='project.update' phase='error' project_id=%s reason='empty response'", project_id)
raise ValueError(f"Unable to update project: {project_id}")

_logger.info("action='project.update' phase='complete' project_id=%s name=%s", project_id, name)
return response

def share_project_with_user(
self, project_id: str, user_id: str, role: CollaboratorRole = CollaboratorRole.VIEWER
) -> UserCollaborator:
Expand Down
Loading
Loading