-
Notifications
You must be signed in to change notification settings - Fork 11
feat: implement Project.save() and Dataset.save() #503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
53e7e4a
483a64b
714bbb9
ed15a4a
609bcb7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -526,18 +526,52 @@ def save(self) -> Dataset: | |
| """ | ||
| Save changes to this dataset. | ||
|
|
||
| This method is a placeholder for future functionality to update | ||
| dataset properties. | ||
| Persists any local modifications to the API. If the dataset has not been | ||
| created yet (LOCAL_ONLY state), this calls create() instead. If already | ||
| synced with no pending changes, this is a no-op. | ||
|
|
||
| Returns | ||
| ------- | ||
| Dataset: This dataset instance. | ||
|
|
||
| Raises | ||
| ------ | ||
| NotImplementedError: This functionality is not yet implemented. | ||
| ValueError: If the dataset has been deleted or has no ID set. | ||
| Exception: If the API call fails. | ||
|
|
||
| Examples | ||
| -------- | ||
| dataset = Dataset.get(name="my-dataset") | ||
| dataset.name = "renamed-dataset" | ||
| dataset._set_state(SyncState.DIRTY) | ||
| dataset.save() | ||
| assert dataset.is_synced() | ||
| """ | ||
| raise NotImplementedError( | ||
| "Dataset updates are not yet implemented. " | ||
| "Use add_rows() to add content or other specific methods to modify dataset state." | ||
| ) | ||
| if self.sync_state == SyncState.LOCAL_ONLY: | ||
| return self.create() | ||
| if self.sync_state == SyncState.SYNCED: | ||
| logger.debug(f"Dataset.save: id='{self.id}' - already synced, no action needed") | ||
| return self | ||
| if self.sync_state == SyncState.DELETED: | ||
| raise ValueError("Cannot save a deleted dataset.") | ||
|
|
||
| # DIRTY or FAILED_SYNC | ||
| if self.id is None: | ||
| raise ValueError("Dataset ID is not set. Cannot update a dataset without an ID.") | ||
|
|
||
| try: | ||
| logger.info(f"Dataset.save: name='{self.name}' id='{self.id}' - started") | ||
| datasets_service = Datasets() | ||
| updated_dataset = datasets_service.update(self.id, name=self.name) | ||
|
|
||
| # Update attributes from response | ||
| self.name = updated_dataset.name | ||
| self.updated_at = updated_dataset.updated_at | ||
|
|
||
| self._set_state(SyncState.SYNCED) | ||
|
Comment on lines
+563
to
+571
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dataset.save() claims to persist local modifications but only passes name to Datasets.update — should we include mutated fields such as draft in the update before marking SYNCED or restrict save() to renames and update its docstring? Finding type: Want Baz to fix this for you? Activate Fixer Other fix methodsPrompt for AI Agents: |
||
| logger.info(f"Dataset.save: id='{self.id}' - completed") | ||
| return self | ||
| except Exception as e: | ||
| self._set_state(SyncState.FAILED_SYNC, error=e) | ||
| logger.error(f"Dataset.save: id='{self.id}' - failed: {e}") | ||
| raise | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
| from galileo.__future__.shared.base import StateManagementMixin, SyncState | ||
| from galileo.__future__.shared.exceptions import APIError, ValidationError | ||
| from galileo.projects import Projects | ||
| from galileo.resources.types import Unset | ||
|
|
||
| if TYPE_CHECKING: | ||
| from galileo.__future__.dataset import Dataset | ||
|
|
@@ -760,20 +761,56 @@ def save(self) -> Project: | |
| """ | ||
| Save changes to this project. | ||
|
|
||
| This method is a placeholder for future functionality to update | ||
| project properties. | ||
| Persists any local modifications to the API. If the project has not been | ||
| created yet (LOCAL_ONLY state), this calls create() instead. If already | ||
| synced with no pending changes, this is a no-op. | ||
|
|
||
| Returns | ||
| ------- | ||
| Project: This project instance. | ||
|
|
||
| Raises | ||
| ------ | ||
| NotImplementedError: This functionality is not yet implemented. | ||
| ValueError: If the project has been deleted or has no ID set. | ||
| Exception: If the API call fails. | ||
|
|
||
| Examples | ||
| -------- | ||
| project = Project.get(name="My Project") | ||
| project.name = "Renamed Project" | ||
| project._set_state(SyncState.DIRTY) | ||
| project.save() | ||
| assert project.is_synced() | ||
| """ | ||
| raise NotImplementedError( | ||
| "Project updates are not yet implemented. Use specific methods to modify project state." | ||
| ) | ||
| if self.sync_state == SyncState.LOCAL_ONLY: | ||
| return self.create() | ||
| if self.sync_state == SyncState.SYNCED: | ||
| logger.debug(f"Project.save: id='{self.id}' - already synced, no action needed") | ||
| return self | ||
| if self.sync_state == SyncState.DELETED: | ||
| raise ValueError("Cannot save a deleted project.") | ||
|
|
||
| # DIRTY or FAILED_SYNC | ||
| if self.id is None: | ||
| raise ValueError("Project ID is not set. Cannot update a project without an ID.") | ||
|
|
||
| try: | ||
| logger.info(f"Project.save: name='{self.name}' id='{self.id}' - started") | ||
| projects_service = Projects() | ||
| updated_project = projects_service.update(self.id, name=self.name) | ||
|
|
||
| # Update attributes from response | ||
| if not isinstance(updated_project.name, Unset): | ||
| self.name = updated_project.name | ||
| self.updated_at = updated_project.updated_at | ||
|
Comment on lines
+797
to
+805
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Project.save() only persists name but marks the project SYNCED, should we include other editable fields (at least type) in the update or prevent saving when only non-name fields changed? Finding type: Want Baz to fix this for you? Activate Fixer Other fix methodsPrompt for AI Agents: |
||
|
|
||
| self._set_state(SyncState.SYNCED) | ||
| logger.info(f"Project.save: id='{self.id}' - completed") | ||
| return self | ||
| except Exception as e: | ||
| self._set_state(SyncState.FAILED_SYNC, error=e) | ||
| logger.error(f"Project.save: id='{self.id}' - failed: {e}") | ||
| raise | ||
|
|
||
|
|
||
| # Import at end to avoid circular import (log_stream.py imports Project) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| query_dataset_versions_datasets_dataset_id_versions_query_post, | ||
| query_datasets_datasets_query_post, | ||
| update_dataset_content_datasets_dataset_id_content_patch, | ||
| update_dataset_datasets_dataset_id_patch, | ||
| ) | ||
| from galileo.resources.models import DatasetRow, ListDatasetVersionParams, ListDatasetVersionResponse | ||
| from galileo.resources.models.body_create_dataset_datasets_post import BodyCreateDatasetDatasetsPost | ||
|
|
@@ -38,6 +39,7 @@ | |
| from galileo.resources.models.synthetic_dataset_extension_request import SyntheticDatasetExtensionRequest | ||
| from galileo.resources.models.synthetic_dataset_extension_response import SyntheticDatasetExtensionResponse | ||
| from galileo.resources.models.update_dataset_content_request import UpdateDatasetContentRequest | ||
| from galileo.resources.models.update_dataset_request import UpdateDatasetRequest | ||
| from galileo.resources.types import UNSET, File, Unset | ||
| from galileo.schema.datasets import DatasetRecord | ||
| from galileo.utils.datasets import validate_dataset_in_project | ||
|
|
@@ -420,6 +422,54 @@ def delete( | |
|
|
||
| return delete_dataset_datasets_dataset_id_delete.sync(client=self.config.api_client, dataset_id=dataset.id) | ||
|
|
||
| def update(self, dataset_id: str, *, name: Optional[str] = None) -> Dataset: | ||
| """ | ||
| Updates a dataset's properties. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| dataset_id : str | ||
| The ID of the dataset to update. | ||
| name : str, optional | ||
| The new name for the dataset. | ||
|
|
||
| Returns | ||
|
Comment on lines
+425
to
+436
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Finding type: Want Baz to fix this for you? Activate Fixer Other fix methodsPrompt for AI Agents:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commit 609bcb7 addressed this comment by adding structured lifecycle logging to |
||
| ------- | ||
| Dataset | ||
| The updated dataset. | ||
|
|
||
| Raises | ||
| ------ | ||
| DatasetAPIException | ||
| If the API request fails. | ||
| ValueError | ||
| If the server returns no response. | ||
| """ | ||
| logger.info("action='dataset.update' phase='start' dataset_id=%s name=%s", dataset_id, name) | ||
|
|
||
| body = UpdateDatasetRequest(name=name) | ||
|
|
||
| try: | ||
|
Comment on lines
+448
to
+452
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Datasets.update sends name: null when name is None, violating the API's non-null name contract — should we only set name when provided (e.g. UpdateDatasetRequest(name=name if name is not None else UNSET))? Finding type: Want Baz to fix this for you? Activate Fixer Other fix methodsPrompt for AI Agents: |
||
| response = update_dataset_datasets_dataset_id_patch.sync( | ||
| dataset_id=dataset_id, client=self.config.api_client, body=body | ||
| ) | ||
| except Exception as e: | ||
| logger.error("action='dataset.update' phase='error' dataset_id=%s error=%s", dataset_id, e) | ||
| raise | ||
|
|
||
| if isinstance(response, HTTPValidationError): | ||
| logger.error( | ||
| "action='dataset.update' phase='error' dataset_id=%s validation_error=%s", dataset_id, response.detail | ||
| ) | ||
| raise DatasetAPIException(response.detail) | ||
|
|
||
| if not response: | ||
| logger.error("action='dataset.update' phase='error' dataset_id=%s reason='empty response'", dataset_id) | ||
| raise ValueError(f"Unable to update dataset: {dataset_id}") | ||
|
|
||
| logger.info("action='dataset.update' phase='complete' dataset_id=%s name=%s", response.id, name) | ||
| return Dataset(dataset_db=response) | ||
|
|
||
| def create( | ||
| self, name: str, content: DatasetType, *, project_id: Optional[str] = None, project_name: Optional[str] = None | ||
| ) -> Dataset: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| get_project_projects_project_id_get, | ||
| get_projects_projects_get, | ||
| list_user_project_collaborators_projects_project_id_users_get, | ||
| update_project_projects_project_id_put, | ||
| update_user_project_collaborator_projects_project_id_users_user_id_patch, | ||
| ) | ||
| from galileo.resources.models.collaborator_role import CollaboratorRole | ||
|
|
@@ -25,6 +26,8 @@ | |
| from galileo.resources.models.project_db import ProjectDB | ||
| from galileo.resources.models.project_db_thin import ProjectDBThin | ||
| from galileo.resources.models.project_type import ProjectType | ||
| from galileo.resources.models.project_update import ProjectUpdate | ||
| from galileo.resources.models.project_update_response import ProjectUpdateResponse | ||
| from galileo.resources.models.user_collaborator import UserCollaborator | ||
| from galileo.resources.models.user_collaborator_create import UserCollaboratorCreate | ||
| from galileo.resources.types import UNSET, Unset | ||
|
|
@@ -297,6 +300,61 @@ def create(self, name: str) -> Project: | |
|
|
||
| return Project(project=response) | ||
|
|
||
| def update(self, project_id: str, *, name: Optional[str] = None) -> ProjectUpdateResponse: | ||
| """ | ||
| Updates a project's properties. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| project_id : str | ||
| The ID of the project to update. | ||
| name : str, optional | ||
| The new name for the project. | ||
|
|
||
| Returns | ||
|
Comment on lines
+303
to
+314
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Finding type: Want Baz to fix this for you? Activate Fixer Other fix methodsPrompt for AI Agents:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commit 609bcb7 addressed this comment by adding lifecycle logs around |
||
| ------- | ||
| ProjectUpdateResponse | ||
| The updated project data returned by the API. | ||
|
|
||
| Raises | ||
| ------ | ||
| ProjectsAPIException | ||
| If the server returns an error response. | ||
| ValueError | ||
| If the server returns no response. | ||
| """ | ||
| _logger.info("action='project.update' phase='start' project_id=%s name=%s", project_id, name) | ||
|
|
||
| body = ProjectUpdate(name=name) | ||
|
|
||
| detailed_response = update_project_projects_project_id_put.sync_detailed( | ||
| project_id=project_id, client=self.config.api_client, body=body | ||
| ) | ||
|
|
||
| if detailed_response.status_code != httpx.codes.OK: | ||
| _logger.error( | ||
| "action='project.update' phase='error' project_id=%s status=%s content=%s", | ||
| project_id, | ||
| detailed_response.status_code, | ||
| detailed_response.content, | ||
| ) | ||
| raise ProjectsAPIException(detailed_response.content) | ||
|
|
||
| response = detailed_response.parsed | ||
|
|
||
| if isinstance(response, HTTPValidationError): | ||
| _logger.error( | ||
| "action='project.update' phase='error' project_id=%s validation_error=%s", project_id, response.detail | ||
| ) | ||
| raise ProjectsAPIException(f"Failed to update project: {response.detail}") | ||
|
|
||
| if not response: | ||
| _logger.error("action='project.update' phase='error' project_id=%s reason='empty response'", project_id) | ||
| raise ValueError(f"Unable to update project: {project_id}") | ||
|
|
||
| _logger.info("action='project.update' phase='complete' project_id=%s name=%s", project_id, name) | ||
| return response | ||
|
|
||
| def share_project_with_user( | ||
| self, project_id: str, user_id: str, role: CollaboratorRole = CollaboratorRole.VIEWER | ||
| ) -> UserCollaborator: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dataset.savenow re-implements the entire state-transition + persistence workflow (LOCAL_ONLY -> create,SYNCEDno-op,DELETEDguard) before callingDatasets.update, which is essentially the same block thatProject.savecopies verbatim (seeproject.pylines 785‑809). Any change to the state checks, logging, or_set_stateerror handling now needs to be made in two places. Can we consolidate this intoStateManagementMixin(e.g. a shared_persist_changes(create_fn, update_fn, attr_sync)helper) so dataset/project save reuse one canonical workflow and only supply the resource-specific update call?Finding type:
Code Dedup and Conventions| Severity: 🟢 LowWant Baz to fix this for you? Activate Fixer