From f2471e4db90cc38b1239ec9e352652f257b871c2 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 16:50:44 +0000 Subject: [PATCH 01/11] Integrate deployment metadata service for server-side locking and state Add client integration with the deployment metadata service API for server-side deployment locking and resource state tracking. Gated behind DATABRICKS_BUNDLE_DEPLOYMENT_SERVICE=true environment variable. Co-authored-by: Isaac --- bundle/deploy/metadata/service/client.go | 183 +++++++++++++ bundle/deploy/metadata/service/heartbeat.go | 37 +++ bundle/deploy/metadata/service/types.go | 189 ++++++++++++++ bundle/deploy/state_update.go | 6 + bundle/env/deployment_metadata.go | 15 ++ bundle/phases/deploy.go | 6 + bundle/phases/deploy_metadata.go | 269 ++++++++++++++++++++ bundle/phases/destroy.go | 6 + bundle/phases/destroy_metadata.go | 169 ++++++++++++ 9 files changed, 880 insertions(+) create mode 100644 bundle/deploy/metadata/service/client.go create mode 100644 bundle/deploy/metadata/service/heartbeat.go create mode 100644 bundle/deploy/metadata/service/types.go create mode 100644 bundle/env/deployment_metadata.go create mode 100644 bundle/phases/deploy_metadata.go create mode 100644 bundle/phases/destroy_metadata.go diff --git a/bundle/deploy/metadata/service/client.go b/bundle/deploy/metadata/service/client.go new file mode 100644 index 0000000000..ffe2fb36fc --- /dev/null +++ b/bundle/deploy/metadata/service/client.go @@ -0,0 +1,183 @@ +package service + +import ( + "context" + "fmt" + "net/http" + + "errors" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/client" +) + +const basePath = "/api/2.0/bundle" + +// Client wraps the Databricks API client for the deployment metadata service. +type Client struct { + api *client.DatabricksClient +} + +// NewClient creates a new deployment metadata service client from a workspace client. +func NewClient(w *databricks.WorkspaceClient) (*Client, error) { + apiClient, err := client.New(w.Config) + if err != nil { + return nil, fmt.Errorf("failed to create deployment metadata API client: %w", err) + } + return &Client{api: apiClient}, nil +} + +// CreateDeployment creates a new deployment. +func (c *Client) CreateDeployment(ctx context.Context, deploymentID string, deployment *Deployment) (*Deployment, error) { + resp := &Deployment{} + path := fmt.Sprintf("%s/deployments", basePath) + err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CreateDeploymentRequest{ + DeploymentID: deploymentID, + Deployment: deployment, + }, resp) + if err != nil { + return nil, mapError("create deployment", err) + } + return resp, nil +} + +// GetDeployment retrieves a deployment by ID. +func (c *Client) GetDeployment(ctx context.Context, deploymentID string) (*Deployment, error) { + resp := &Deployment{} + path := fmt.Sprintf("%s/deployments/%s", basePath, deploymentID) + err := c.api.Do(ctx, http.MethodGet, path, nil, nil, nil, resp) + if err != nil { + return nil, mapError("get deployment", err) + } + return resp, nil +} + +// DeleteDeployment soft-deletes a deployment. +func (c *Client) DeleteDeployment(ctx context.Context, deploymentID string) error { + path := fmt.Sprintf("%s/deployments/%s", basePath, deploymentID) + err := c.api.Do(ctx, http.MethodDelete, path, nil, nil, nil, nil) + if err != nil { + return mapError("delete deployment", err) + } + return nil +} + +// CreateVersion creates a new version (acquires the deployment lock). +func (c *Client) CreateVersion(ctx context.Context, deploymentID string, versionID string, version *Version) (*Version, error) { + resp := &Version{} + path := fmt.Sprintf("%s/deployments/%s/versions", basePath, deploymentID) + err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CreateVersionRequest{ + Parent: fmt.Sprintf("deployments/%s", deploymentID), + Version: version, + VersionID: versionID, + }, resp) + if err != nil { + return nil, mapError("create version", err) + } + return resp, nil +} + +// GetVersion retrieves a version. +func (c *Client) GetVersion(ctx context.Context, deploymentID, versionID string) (*Version, error) { + resp := &Version{} + path := fmt.Sprintf("%s/deployments/%s/versions/%s", basePath, deploymentID, versionID) + err := c.api.Do(ctx, http.MethodGet, path, nil, nil, nil, resp) + if err != nil { + return nil, mapError("get version", err) + } + return resp, nil +} + +// Heartbeat renews the lock lease for an in-progress version. +func (c *Client) Heartbeat(ctx context.Context, deploymentID, versionID string) (*HeartbeatResponse, error) { + resp := &HeartbeatResponse{} + path := fmt.Sprintf("%s/deployments/%s/versions/%s/heartbeat", basePath, deploymentID, versionID) + err := c.api.Do(ctx, http.MethodPost, path, nil, nil, struct{}{}, resp) + if err != nil { + return nil, mapError("heartbeat", err) + } + return resp, nil +} + +// CompleteVersion marks a version as completed (releases the deployment lock). +func (c *Client) CompleteVersion(ctx context.Context, deploymentID, versionID string, reason VersionComplete, force bool) (*Version, error) { + resp := &Version{} + path := fmt.Sprintf("%s/deployments/%s/versions/%s/complete", basePath, deploymentID, versionID) + err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CompleteVersionRequest{ + Name: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), + CompletionReason: reason, + Force: force, + }, resp) + if err != nil { + return nil, mapError("complete version", err) + } + return resp, nil +} + +// CreateOperation records a resource operation for a version. +func (c *Client) CreateOperation(ctx context.Context, deploymentID, versionID, resourceKey string, operation *Operation) (*Operation, error) { + resp := &Operation{} + path := fmt.Sprintf("%s/deployments/%s/versions/%s/operations", basePath, deploymentID, versionID) + err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CreateOperationRequest{ + Parent: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), + ResourceKey: resourceKey, + Operation: operation, + }, resp) + if err != nil { + return nil, mapError("create operation", err) + } + return resp, nil +} + +// ListResources lists all resources for a deployment. +func (c *Client) ListResources(ctx context.Context, deploymentID string) ([]Resource, error) { + var allResources []Resource + pageToken := "" + + for { + resp := &ListResourcesResponse{} + path := fmt.Sprintf("%s/deployments/%s/resources", basePath, deploymentID) + + q := map[string]any{ + "parent": fmt.Sprintf("deployments/%s", deploymentID), + "page_size": 1000, + } + if pageToken != "" { + q["page_token"] = pageToken + } + + err := c.api.Do(ctx, http.MethodGet, path, nil, q, nil, resp) + if err != nil { + return nil, mapError("list resources", err) + } + + allResources = append(allResources, resp.Resources...) + if resp.NextPageToken == "" { + break + } + pageToken = resp.NextPageToken + } + + return allResources, nil +} + +// mapError translates API errors into user-friendly messages. +func mapError(operation string, err error) error { + var apiErr *apierr.APIError + if !errors.As(err, &apiErr) { + return fmt.Errorf("%s: %w", operation, err) + } + + switch apiErr.StatusCode { + case http.StatusConflict: + return fmt.Errorf("%s: deployment is locked by another active deployment. "+ + "Use --force-lock to override", operation) + case http.StatusNotFound: + return fmt.Errorf("%s: resource not found: %w", operation, err) + case http.StatusBadRequest: + return fmt.Errorf("%s: bad request: %s", operation, apiErr.Message) + default: + return fmt.Errorf("%s: %w", operation, err) + } +} diff --git a/bundle/deploy/metadata/service/heartbeat.go b/bundle/deploy/metadata/service/heartbeat.go new file mode 100644 index 0000000000..d32e0a24f0 --- /dev/null +++ b/bundle/deploy/metadata/service/heartbeat.go @@ -0,0 +1,37 @@ +package service + +import ( + "context" + "time" + + "github.com/databricks/cli/libs/log" +) + +const DefaultHeartbeatInterval = 2 * time.Minute + +// StartHeartbeat starts a background goroutine that sends heartbeats to keep +// the deployment lock alive. Returns a cancel function to stop the heartbeat. +func StartHeartbeat(ctx context.Context, client *Client, deploymentID, versionID string, interval time.Duration) context.CancelFunc { + ctx, cancel := context.WithCancel(ctx) + + go func() { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + _, err := client.Heartbeat(ctx, deploymentID, versionID) + if err != nil { + log.Warnf(ctx, "Failed to send deployment heartbeat: %v", err) + } else { + log.Debugf(ctx, "Deployment heartbeat sent for deployment=%s version=%s", deploymentID, versionID) + } + } + } + }() + + return cancel +} diff --git a/bundle/deploy/metadata/service/types.go b/bundle/deploy/metadata/service/types.go new file mode 100644 index 0000000000..05e0cf03b1 --- /dev/null +++ b/bundle/deploy/metadata/service/types.go @@ -0,0 +1,189 @@ +package service + +import "time" + +// Enum types matching the proto definitions. + +type DeploymentStatus int +type VersionStatus int +type VersionComplete int +type VersionType int +type OperationStatus int +type OperationActionType int +type DeploymentResourceType int + +const ( + DeploymentStatusUnspecified DeploymentStatus = 0 + DeploymentStatusActive DeploymentStatus = 1 + DeploymentStatusFailed DeploymentStatus = 2 + DeploymentStatusInProgress DeploymentStatus = 3 + DeploymentStatusDeleted DeploymentStatus = 4 +) + +const ( + VersionStatusUnspecified VersionStatus = 0 + VersionStatusInProgress VersionStatus = 1 + VersionStatusCompleted VersionStatus = 2 +) + +const ( + VersionCompleteUnspecified VersionComplete = 0 + VersionCompleteSuccess VersionComplete = 1 + VersionCompleteFailure VersionComplete = 2 + VersionCompleteForceAbort VersionComplete = 3 + VersionCompleteLeaseExpire VersionComplete = 4 +) + +const ( + VersionTypeUnspecified VersionType = 0 + VersionTypeDeploy VersionType = 1 + VersionTypeDestroy VersionType = 2 +) + +const ( + OperationStatusUnspecified OperationStatus = 0 + OperationStatusSucceeded OperationStatus = 1 + OperationStatusFailed OperationStatus = 2 +) + +const ( + OperationActionTypeUnspecified OperationActionType = 0 + OperationActionTypeResize OperationActionType = 1 + OperationActionTypeUpdate OperationActionType = 2 + OperationActionTypeUpdateWithID OperationActionType = 3 + OperationActionTypeCreate OperationActionType = 4 + OperationActionTypeRecreate OperationActionType = 5 + OperationActionTypeDelete OperationActionType = 6 + OperationActionTypeBind OperationActionType = 7 + OperationActionTypeBindAndUpdate OperationActionType = 8 + OperationActionTypeInitRegister OperationActionType = 9 +) + +const ( + ResourceTypeUnspecified DeploymentResourceType = 0 + ResourceTypeJob DeploymentResourceType = 1 + ResourceTypePipeline DeploymentResourceType = 2 + ResourceTypeModel DeploymentResourceType = 4 + ResourceTypeRegisteredModel DeploymentResourceType = 5 + ResourceTypeExperiment DeploymentResourceType = 6 + ResourceTypeServingEndpoint DeploymentResourceType = 7 + ResourceTypeQualityMonitor DeploymentResourceType = 8 + ResourceTypeSchema DeploymentResourceType = 9 + ResourceTypeVolume DeploymentResourceType = 10 + ResourceTypeCluster DeploymentResourceType = 11 + ResourceTypeDashboard DeploymentResourceType = 12 + ResourceTypeApp DeploymentResourceType = 13 + ResourceTypeCatalog DeploymentResourceType = 14 + ResourceTypeExternalLocation DeploymentResourceType = 15 + ResourceTypeSecretScope DeploymentResourceType = 16 + ResourceTypeAlert DeploymentResourceType = 17 + ResourceTypeSQLWarehouse DeploymentResourceType = 18 + ResourceTypeDatabaseInstance DeploymentResourceType = 19 + ResourceTypeDatabaseCatalog DeploymentResourceType = 20 + ResourceTypeSyncedDBTable DeploymentResourceType = 21 + ResourceTypePostgresProject DeploymentResourceType = 22 + ResourceTypePostgresBranch DeploymentResourceType = 23 + ResourceTypePostgresEndpoint DeploymentResourceType = 24 +) + +// Deployment represents a bundle deployment registered with the control plane. +type Deployment struct { + Name string `json:"name,omitempty"` + DisplayName string `json:"display_name,omitempty"` + TargetName string `json:"target_name,omitempty"` + Status DeploymentStatus `json:"status,omitempty"` + LastVersionID string `json:"last_version_id,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CreateTime *time.Time `json:"create_time,omitempty"` + UpdateTime *time.Time `json:"update_time,omitempty"` + DestroyTime *time.Time `json:"destroy_time,omitempty"` + DestroyedBy string `json:"destroyed_by,omitempty"` +} + +// Version represents a single invocation of deploy/destroy against a deployment. +type Version struct { + Name string `json:"name,omitempty"` + VersionID string `json:"version_id,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CreateTime *time.Time `json:"create_time,omitempty"` + CompleteTime *time.Time `json:"complete_time,omitempty"` + CliVersion string `json:"cli_version,omitempty"` + Status VersionStatus `json:"status,omitempty"` + VersionType VersionType `json:"version_type,omitempty"` + CompletionReason VersionComplete `json:"completion_reason,omitempty"` + CompletedBy string `json:"completed_by,omitempty"` + DisplayName string `json:"display_name,omitempty"` + TargetName string `json:"target_name,omitempty"` +} + +// Operation records the result of applying a resource change. +type Operation struct { + Name string `json:"name,omitempty"` + ResourceKey string `json:"resource_key,omitempty"` + ActionType OperationActionType `json:"action_type,omitempty"` + State any `json:"state,omitempty"` + ResourceID string `json:"resource_id,omitempty"` + CreateTime *time.Time `json:"create_time,omitempty"` + Status OperationStatus `json:"status,omitempty"` + ErrorMessage string `json:"error_message,omitempty"` +} + +// Resource represents a resource managed by a deployment. +type Resource struct { + Name string `json:"name,omitempty"` + ResourceKey string `json:"resource_key,omitempty"` + State any `json:"state,omitempty"` + ResourceID string `json:"resource_id,omitempty"` + LastActionType OperationActionType `json:"last_action_type,omitempty"` + LastVersionID string `json:"last_version_id,omitempty"` + ResourceType DeploymentResourceType `json:"resource_type,omitempty"` +} + +// Request/Response types. + +type CreateDeploymentRequest struct { + DeploymentID string `json:"deployment_id"` + Deployment *Deployment `json:"deployment"` +} + +type ListDeploymentsResponse struct { + Deployments []Deployment `json:"deployments"` + NextPageToken string `json:"next_page_token,omitempty"` +} + +type CreateVersionRequest struct { + Parent string `json:"parent"` + Version *Version `json:"version"` + VersionID string `json:"version_id"` +} + +type ListVersionsResponse struct { + Versions []Version `json:"versions"` + NextPageToken string `json:"next_page_token,omitempty"` +} + +type HeartbeatResponse struct { + ExpireTime *time.Time `json:"expire_time,omitempty"` +} + +type CompleteVersionRequest struct { + Name string `json:"name"` + CompletionReason VersionComplete `json:"completion_reason"` + Force bool `json:"force,omitempty"` +} + +type CreateOperationRequest struct { + Parent string `json:"parent"` + ResourceKey string `json:"resource_key"` + Operation *Operation `json:"operation"` +} + +type ListOperationsResponse struct { + Operations []Operation `json:"operations"` + NextPageToken string `json:"next_page_token,omitempty"` +} + +type ListResourcesResponse struct { + Resources []Resource `json:"resources"` + NextPageToken string `json:"next_page_token,omitempty"` +} diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index 55cf2393bf..06326c8a93 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -81,6 +81,12 @@ func StateUpdate() bundle.Mutator { return &stateUpdate{} } +// LoadState loads the deployment state from the local cache directory. +// If no state file exists, a new default DeploymentState is returned. +func LoadState(ctx context.Context, b *bundle.Bundle) (*DeploymentState, error) { + return load(ctx, b) +} + func load(ctx context.Context, b *bundle.Bundle) (*DeploymentState, error) { // If the file does not exist, return a new DeploymentState. statePath, err := getPathToStateFile(ctx, b) diff --git a/bundle/env/deployment_metadata.go b/bundle/env/deployment_metadata.go new file mode 100644 index 0000000000..60e896c045 --- /dev/null +++ b/bundle/env/deployment_metadata.go @@ -0,0 +1,15 @@ +package env + +import "context" + +// deploymentServiceVariable names the environment variable that controls whether the +// deployment metadata service is used for locking and resource state management. +const deploymentServiceVariable = "DATABRICKS_BUNDLE_DEPLOYMENT_SERVICE" + +// DeploymentService returns the environment variable that controls whether the +// deployment metadata service is used for locking and resource state management. +func DeploymentService(ctx context.Context) (string, bool) { + return get(ctx, []string{ + deploymentServiceVariable, + }) +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 4613a7a211..7a1fa6e778 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" "github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" @@ -139,6 +140,11 @@ func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]li // The deploy phase deploys artifacts and resources. // If readPlanPath is provided, the plan is loaded from that file instead of being calculated. func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, engine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { + if v, _ := env.DeploymentService(ctx); v == "true" { + deployWithMetadataService(ctx, b, outputHandler, engine, libs, plan) + return + } + log.Info(ctx, "Phase: deploy") // Core mutators that CRUD resources and modify deployment state. These diff --git a/bundle/phases/deploy_metadata.go b/bundle/phases/deploy_metadata.go new file mode 100644 index 0000000000..bbe1197b5b --- /dev/null +++ b/bundle/phases/deploy_metadata.go @@ -0,0 +1,269 @@ +package phases + +import ( + "context" + "errors" + "fmt" + "net/http" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/artifacts" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/bundle/deploy/files" + "github.com/databricks/cli/bundle/deploy/metadata" + metadataservice "github.com/databricks/cli/bundle/deploy/metadata/service" + "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/bundle/deployplan" + "github.com/databricks/cli/bundle/direct" + "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/bundle/metrics" + "github.com/databricks/cli/bundle/permissions" + "github.com/databricks/cli/bundle/scripts" + "github.com/databricks/cli/bundle/statemgmt" + "github.com/databricks/cli/internal/build" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/logdiag" + "github.com/databricks/cli/libs/sync" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/google/uuid" +) + +func deployWithMetadataService(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, targetEngine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { + log.Info(ctx, "Phase: deploy (with metadata service)") + + bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPreDeploy)) + if logdiag.HasError(ctx) { + return + } + + // Create the metadata service client. + svc, err := metadataservice.NewClient(b.WorkspaceClient()) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", err)) + return + } + + // Load local deployment state to get the deployment ID and sequence number. + state, err := deploy.LoadState(ctx, b) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to load deployment state: %w", err)) + return + } + + // Generate a deployment ID if one doesn't exist yet. + if state.ID == uuid.Nil { + state.ID = uuid.New() + } + deploymentID := state.ID.String() + + // Ensure the deployment exists in the metadata service. + _, err = svc.CreateDeployment(ctx, deploymentID, &metadataservice.Deployment{ + TargetName: b.Config.Bundle.Target, + }) + if err != nil && !isAlreadyExists(err) { + logdiag.LogError(ctx, fmt.Errorf("failed to create deployment: %w", err)) + return + } + + // Create a version to acquire the deployment lock. + versionID := fmt.Sprintf("%d", state.Seq+1) + version, err := svc.CreateVersion(ctx, deploymentID, versionID, &metadataservice.Version{ + CliVersion: build.GetInfo().Version, + VersionType: metadataservice.VersionTypeDeploy, + TargetName: b.Config.Bundle.Target, + }) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to acquire deployment lock: %w", err)) + return + } + + log.Infof(ctx, "Acquired deployment lock: deployment=%s version=%s", deploymentID, version.VersionID) + + // Start heartbeat to keep the lock alive. + stopHeartbeat := metadataservice.StartHeartbeat(ctx, svc, deploymentID, versionID, metadataservice.DefaultHeartbeatInterval) + + // Ensure we always complete the version (release the lock) and stop heartbeat. + var deployFailed bool + defer func() { + stopHeartbeat() + + reason := metadataservice.VersionCompleteSuccess + if deployFailed || logdiag.HasError(ctx) { + reason = metadataservice.VersionCompleteFailure + } + + _, completeErr := svc.CompleteVersion(ctx, deploymentID, versionID, reason, false) + if completeErr != nil { + log.Warnf(ctx, "Failed to release deployment lock: %v", completeErr) + } else { + log.Infof(ctx, "Released deployment lock: deployment=%s version=%s reason=%d", deploymentID, versionID, reason) + } + }() + + // Upload libraries. + bundle.ApplySeqContext(ctx, b, + artifacts.CleanUp(), + libraries.Upload(libs), + ) + if logdiag.HasError(ctx) { + deployFailed = true + return + } + + // Upload files, update state, apply permissions. + bundle.ApplySeqContext(ctx, b, + files.Upload(outputHandler), + deploy.StateUpdate(), + deploy.StatePush(), + permissions.ApplyWorkspaceRootPermissions(), + metrics.TrackUsedCompute(), + deploy.ResourcePathMkdir(), + ) + if logdiag.HasError(ctx) { + deployFailed = true + return + } + + // Calculate or load the deploy plan. + if plan != nil { + _, localPath := b.StateFilenameDirect(ctx) + err := b.DeploymentBundle.InitForApply(ctx, b.WorkspaceClient(), localPath, plan) + if err != nil { + logdiag.LogError(ctx, err) + deployFailed = true + return + } + } else { + plan = RunPlan(ctx, b, targetEngine) + } + if logdiag.HasError(ctx) { + deployFailed = true + return + } + + // Seek approval for potentially destructive changes. + haveApproval, err := approvalForDeploy(ctx, b, plan) + if err != nil { + logdiag.LogError(ctx, err) + deployFailed = true + return + } + if !haveApproval { + cmdio.LogString(ctx, "Deployment cancelled!") + return + } + + // Apply the deployment. + deployCoreWithMetadata(ctx, b, plan, targetEngine, svc, deploymentID, versionID) + if logdiag.HasError(ctx) { + deployFailed = true + return + } + + logDeployTelemetry(ctx, b) + bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPostDeploy)) +} + +// deployCoreWithMetadata applies the deployment plan and reports operations to +// the metadata service. +func deployCoreWithMetadata(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, targetEngine engine.EngineType, svc *metadataservice.Client, deploymentID, versionID string) { + cmdio.LogString(ctx, "Deploying resources...") + + if targetEngine.IsDirect() { + b.DeploymentBundle.Apply(ctx, b.WorkspaceClient(), plan, direct.MigrateMode(false)) + } else { + bundle.ApplyContext(ctx, b, terraform.Apply()) + } + + // Push resource state even on failure. + statemgmt.PushResourcesState(ctx, b, targetEngine) + + // Report operations to the metadata service (best-effort). + reportOperations(ctx, b, svc, deploymentID, versionID, plan) + + if logdiag.HasError(ctx) { + return + } + + bundle.ApplySeqContext(ctx, b, + statemgmt.Load(targetEngine), + metadata.Compute(), + metadata.Upload(), + statemgmt.UploadStateForYamlSync(targetEngine), + ) + + if !logdiag.HasError(ctx) { + cmdio.LogString(ctx, "Deployment complete!") + } +} + +// reportOperations reports each resource operation to the metadata service. +// This is best-effort: failures are logged as warnings, not fatal errors. +func reportOperations(ctx context.Context, b *bundle.Bundle, svc *metadataservice.Client, deploymentID, versionID string, plan *deployplan.Plan) { + if plan == nil { + return + } + + // Fetch existing resources to determine if this is the first time we're + // tracking each resource in the metadata service. + knownResources := map[string]bool{} + existing, err := svc.ListResources(ctx, deploymentID) + if err != nil { + log.Warnf(ctx, "Failed to list existing resources from metadata service, will use INITIAL_REGISTER for all: %v", err) + } else { + for _, r := range existing { + knownResources[r.ResourceKey] = true + } + } + + for resourceKey, entry := range plan.Plan { + var actionType metadataservice.OperationActionType + if knownResources[resourceKey] { + // Resource is already tracked; use the plan's action type. + actionType = planActionToOperationAction(entry.Action) + } else { + // First time tracking this resource in the service. + actionType = metadataservice.OperationActionTypeInitRegister + } + + if actionType == metadataservice.OperationActionTypeUnspecified { + continue + } + + _, err := svc.CreateOperation(ctx, deploymentID, versionID, resourceKey, &metadataservice.Operation{ + ResourceKey: resourceKey, + Status: metadataservice.OperationStatusSucceeded, + ActionType: actionType, + }) + if err != nil { + log.Warnf(ctx, "Failed to report operation for resource %s: %v", resourceKey, err) + } + } +} + +func planActionToOperationAction(action deployplan.ActionType) metadataservice.OperationActionType { + switch action { + case deployplan.Create: + return metadataservice.OperationActionTypeCreate + case deployplan.Update: + return metadataservice.OperationActionTypeUpdate + case deployplan.Delete: + return metadataservice.OperationActionTypeDelete + case deployplan.Recreate: + return metadataservice.OperationActionTypeRecreate + default: + return metadataservice.OperationActionTypeUnspecified + } +} + +// isAlreadyExists checks if an error indicates the resource already exists (HTTP 409). +func isAlreadyExists(err error) bool { + var apiErr *apierr.APIError + if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusConflict { + return true + } + return false +} diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index e6be00b579..374b533ca7 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/engine" "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/terraform" @@ -115,6 +116,11 @@ func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, e // The destroy phase deletes artifacts and resources. func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { + if v, _ := env.DeploymentService(ctx); v == "true" { + destroyWithMetadataService(ctx, b, engine) + return + } + log.Info(ctx, "Phase: destroy") ok, err := assertRootPathExists(ctx, b) diff --git a/bundle/phases/destroy_metadata.go b/bundle/phases/destroy_metadata.go new file mode 100644 index 0000000000..acb776d540 --- /dev/null +++ b/bundle/phases/destroy_metadata.go @@ -0,0 +1,169 @@ +package phases + +import ( + "context" + "errors" + "fmt" + "net/http" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/bundle/deploy/files" + metadataservice "github.com/databricks/cli/bundle/deploy/metadata/service" + "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/bundle/deployplan" + "github.com/databricks/cli/bundle/direct" + "github.com/databricks/cli/internal/build" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/logdiag" + "github.com/databricks/databricks-sdk-go/apierr" +) + +func destroyWithMetadataService(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineType) { + log.Info(ctx, "Phase: destroy (with metadata service)") + + ok, err := assertRootPathExists(ctx, b) + if err != nil { + logdiag.LogError(ctx, err) + return + } + if !ok { + cmdio.LogString(ctx, "No active deployment found to destroy!") + return + } + + // Create the metadata service client. + svc, err := metadataservice.NewClient(b.WorkspaceClient()) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", err)) + return + } + + // Load local deployment state to get the deployment ID and sequence number. + state, err := deploy.LoadState(ctx, b) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to load deployment state: %w", err)) + return + } + + deploymentID := state.ID.String() + + // Check that the deployment exists. + _, err = svc.GetDeployment(ctx, deploymentID) + if err != nil { + var apiErr *apierr.APIError + if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound { + log.Infof(ctx, "No deployment found in metadata service for %s, nothing to destroy", deploymentID) + cmdio.LogString(ctx, "No active deployment found to destroy!") + return + } + logdiag.LogError(ctx, fmt.Errorf("failed to get deployment: %w", err)) + return + } + + // Create a version to acquire the deployment lock. + versionID := fmt.Sprintf("%d", state.Seq+1) + _, err = svc.CreateVersion(ctx, deploymentID, versionID, &metadataservice.Version{ + CliVersion: build.GetInfo().Version, + VersionType: metadataservice.VersionTypeDestroy, + TargetName: b.Config.Bundle.Target, + }) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to acquire deployment lock: %w", err)) + return + } + + log.Infof(ctx, "Acquired deployment lock for destroy: deployment=%s version=%s", deploymentID, versionID) + + // Start heartbeat to keep the lock alive. + stopHeartbeat := metadataservice.StartHeartbeat(ctx, svc, deploymentID, versionID, metadataservice.DefaultHeartbeatInterval) + + var destroyFailed bool + defer func() { + stopHeartbeat() + + reason := metadataservice.VersionCompleteSuccess + if destroyFailed || logdiag.HasError(ctx) { + reason = metadataservice.VersionCompleteFailure + } + + _, completeErr := svc.CompleteVersion(ctx, deploymentID, versionID, reason, false) + if completeErr != nil { + log.Warnf(ctx, "Failed to release deployment lock: %v", completeErr) + } else { + log.Infof(ctx, "Released deployment lock: deployment=%s version=%s reason=%d", deploymentID, versionID, reason) + } + }() + + // Calculate the destroy plan. + if !targetEngine.IsDirect() { + bundle.ApplySeqContext(ctx, b, + mutator.ResolveVariableReferencesWithoutResources("artifacts"), + mutator.ResolveVariableReferencesOnlyResources("artifacts"), + terraform.Interpolate(), + terraform.Write(), + terraform.Plan(terraform.PlanGoal("destroy")), + ) + } + + if logdiag.HasError(ctx) { + destroyFailed = true + return + } + + var plan *deployplan.Plan + if targetEngine.IsDirect() { + _, localPath := b.StateFilenameDirect(ctx) + plan, err = b.DeploymentBundle.CalculatePlan(ctx, b.WorkspaceClient(), nil, localPath) + if err != nil { + logdiag.LogError(ctx, err) + destroyFailed = true + return + } + } else { + tf := b.Terraform + if tf == nil { + logdiag.LogError(ctx, fmt.Errorf("terraform not initialized")) + destroyFailed = true + return + } + + plan, err = terraform.ShowPlanFile(ctx, tf, b.TerraformPlanPath) + if err != nil { + logdiag.LogError(ctx, err) + destroyFailed = true + return + } + } + + hasApproval, err := approvalForDestroy(ctx, b, plan) + if err != nil { + logdiag.LogError(ctx, err) + destroyFailed = true + return + } + + if hasApproval { + if targetEngine.IsDirect() { + b.DeploymentBundle.Apply(ctx, b.WorkspaceClient(), plan, direct.MigrateMode(false)) + } else { + bundle.ApplyContext(ctx, b, terraform.Apply()) + } + + if logdiag.HasError(ctx) { + destroyFailed = true + return + } + + bundle.ApplyContext(ctx, b, files.Delete()) + + if !logdiag.HasError(ctx) { + cmdio.LogString(ctx, "Destroy complete!") + } + } else { + cmdio.LogString(ctx, "Destroy cancelled!") + } +} From 9d055f4f1804260918ed553e6e5262933c6c13b2 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 30 Mar 2026 17:41:35 +0000 Subject: [PATCH 02/11] Fix correctness bugs and improve code quality from self-review - Use background context with timeout for CompleteVersion in defer blocks, so the lock is released even if the parent context is cancelled (e.g. Ctrl+C) - Add nil state.ID guard in destroy to avoid querying with zero UUID - Fix misleading --force-lock error message to explain lock expiry behavior - Fix import ordering Co-authored-by: Isaac --- bundle/deploy/metadata/service/client.go | 6 +++--- bundle/phases/deploy_metadata.go | 8 +++++++- bundle/phases/destroy_metadata.go | 13 ++++++++++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/bundle/deploy/metadata/service/client.go b/bundle/deploy/metadata/service/client.go index ffe2fb36fc..df25bb39ce 100644 --- a/bundle/deploy/metadata/service/client.go +++ b/bundle/deploy/metadata/service/client.go @@ -2,11 +2,10 @@ package service import ( "context" + "errors" "fmt" "net/http" - "errors" - "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/client" @@ -172,7 +171,8 @@ func mapError(operation string, err error) error { switch apiErr.StatusCode { case http.StatusConflict: return fmt.Errorf("%s: deployment is locked by another active deployment. "+ - "Use --force-lock to override", operation) + "If the prior deployment failed, the lock will expire automatically after 5 minutes. "+ + "You can also force-acquire the lock by running deploy with the --force-lock flag", operation) case http.StatusNotFound: return fmt.Errorf("%s: resource not found: %w", operation, err) case http.StatusBadRequest: diff --git a/bundle/phases/deploy_metadata.go b/bundle/phases/deploy_metadata.go index bbe1197b5b..e9f41a56cf 100644 --- a/bundle/phases/deploy_metadata.go +++ b/bundle/phases/deploy_metadata.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/http" + "time" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts" @@ -95,7 +96,12 @@ func deployWithMetadataService(ctx context.Context, b *bundle.Bundle, outputHand reason = metadataservice.VersionCompleteFailure } - _, completeErr := svc.CompleteVersion(ctx, deploymentID, versionID, reason, false) + // Use a separate context for cleanup so the lock is released even if the + // parent context was cancelled (e.g. user hit Ctrl+C). + cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + _, completeErr := svc.CompleteVersion(cleanupCtx, deploymentID, versionID, reason, false) if completeErr != nil { log.Warnf(ctx, "Failed to release deployment lock: %v", completeErr) } else { diff --git a/bundle/phases/destroy_metadata.go b/bundle/phases/destroy_metadata.go index acb776d540..d7992e72cc 100644 --- a/bundle/phases/destroy_metadata.go +++ b/bundle/phases/destroy_metadata.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/http" + "time" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/engine" @@ -20,6 +21,7 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" "github.com/databricks/databricks-sdk-go/apierr" + "github.com/google/uuid" ) func destroyWithMetadataService(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineType) { @@ -49,6 +51,10 @@ func destroyWithMetadataService(ctx context.Context, b *bundle.Bundle, targetEng return } + if state.ID == uuid.Nil { + cmdio.LogString(ctx, "No active deployment found to destroy!") + return + } deploymentID := state.ID.String() // Check that the deployment exists. @@ -90,7 +96,12 @@ func destroyWithMetadataService(ctx context.Context, b *bundle.Bundle, targetEng reason = metadataservice.VersionCompleteFailure } - _, completeErr := svc.CompleteVersion(ctx, deploymentID, versionID, reason, false) + // Use a separate context for cleanup so the lock is released even if the + // parent context was cancelled (e.g. user hit Ctrl+C). + cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + _, completeErr := svc.CompleteVersion(cleanupCtx, deploymentID, versionID, reason, false) if completeErr != nil { log.Warnf(ctx, "Failed to release deployment lock: %v", completeErr) } else { From 342fef82db47a5521bfd570a8bc5731190cdb805 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 30 Mar 2026 19:57:23 +0000 Subject: [PATCH 03/11] Refactor to SDK-style tempdms package and unify deploy/destroy flows Move the deployment metadata service client from bundle/deploy/metadata/service to libs/tempdms with SDK-style method signatures (single request struct param). When the protos land in the Go SDK, migration is just an import path change. Unify deploy and destroy flows: instead of separate *WithMetadataService functions that duplicated all mutator calls, the core logic stays in Deploy() and Destroy() with conditional lock management based on the env var. Co-authored-by: Isaac --- bundle/deploy/metadata/service/client.go | 183 -------- bundle/phases/deploy.go | 102 +++-- bundle/phases/deploy_metadata.go | 234 +++------- bundle/phases/destroy.go | 70 ++- bundle/phases/destroy_metadata.go | 181 +------- .../metadata/service => phases}/heartbeat.go | 14 +- libs/tempdms/api.go | 164 +++++++ .../service => libs/tempdms}/types.go | 146 ++++--- libs/testserver/deployment_metadata.go | 400 ++++++++++++++++++ libs/testserver/fake_workspace.go | 3 + libs/testserver/handlers.go | 38 ++ 11 files changed, 893 insertions(+), 642 deletions(-) delete mode 100644 bundle/deploy/metadata/service/client.go rename bundle/{deploy/metadata/service => phases}/heartbeat.go (56%) create mode 100644 libs/tempdms/api.go rename {bundle/deploy/metadata/service => libs/tempdms}/types.go (58%) create mode 100644 libs/testserver/deployment_metadata.go diff --git a/bundle/deploy/metadata/service/client.go b/bundle/deploy/metadata/service/client.go deleted file mode 100644 index df25bb39ce..0000000000 --- a/bundle/deploy/metadata/service/client.go +++ /dev/null @@ -1,183 +0,0 @@ -package service - -import ( - "context" - "errors" - "fmt" - "net/http" - - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/apierr" - "github.com/databricks/databricks-sdk-go/client" -) - -const basePath = "/api/2.0/bundle" - -// Client wraps the Databricks API client for the deployment metadata service. -type Client struct { - api *client.DatabricksClient -} - -// NewClient creates a new deployment metadata service client from a workspace client. -func NewClient(w *databricks.WorkspaceClient) (*Client, error) { - apiClient, err := client.New(w.Config) - if err != nil { - return nil, fmt.Errorf("failed to create deployment metadata API client: %w", err) - } - return &Client{api: apiClient}, nil -} - -// CreateDeployment creates a new deployment. -func (c *Client) CreateDeployment(ctx context.Context, deploymentID string, deployment *Deployment) (*Deployment, error) { - resp := &Deployment{} - path := fmt.Sprintf("%s/deployments", basePath) - err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CreateDeploymentRequest{ - DeploymentID: deploymentID, - Deployment: deployment, - }, resp) - if err != nil { - return nil, mapError("create deployment", err) - } - return resp, nil -} - -// GetDeployment retrieves a deployment by ID. -func (c *Client) GetDeployment(ctx context.Context, deploymentID string) (*Deployment, error) { - resp := &Deployment{} - path := fmt.Sprintf("%s/deployments/%s", basePath, deploymentID) - err := c.api.Do(ctx, http.MethodGet, path, nil, nil, nil, resp) - if err != nil { - return nil, mapError("get deployment", err) - } - return resp, nil -} - -// DeleteDeployment soft-deletes a deployment. -func (c *Client) DeleteDeployment(ctx context.Context, deploymentID string) error { - path := fmt.Sprintf("%s/deployments/%s", basePath, deploymentID) - err := c.api.Do(ctx, http.MethodDelete, path, nil, nil, nil, nil) - if err != nil { - return mapError("delete deployment", err) - } - return nil -} - -// CreateVersion creates a new version (acquires the deployment lock). -func (c *Client) CreateVersion(ctx context.Context, deploymentID string, versionID string, version *Version) (*Version, error) { - resp := &Version{} - path := fmt.Sprintf("%s/deployments/%s/versions", basePath, deploymentID) - err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CreateVersionRequest{ - Parent: fmt.Sprintf("deployments/%s", deploymentID), - Version: version, - VersionID: versionID, - }, resp) - if err != nil { - return nil, mapError("create version", err) - } - return resp, nil -} - -// GetVersion retrieves a version. -func (c *Client) GetVersion(ctx context.Context, deploymentID, versionID string) (*Version, error) { - resp := &Version{} - path := fmt.Sprintf("%s/deployments/%s/versions/%s", basePath, deploymentID, versionID) - err := c.api.Do(ctx, http.MethodGet, path, nil, nil, nil, resp) - if err != nil { - return nil, mapError("get version", err) - } - return resp, nil -} - -// Heartbeat renews the lock lease for an in-progress version. -func (c *Client) Heartbeat(ctx context.Context, deploymentID, versionID string) (*HeartbeatResponse, error) { - resp := &HeartbeatResponse{} - path := fmt.Sprintf("%s/deployments/%s/versions/%s/heartbeat", basePath, deploymentID, versionID) - err := c.api.Do(ctx, http.MethodPost, path, nil, nil, struct{}{}, resp) - if err != nil { - return nil, mapError("heartbeat", err) - } - return resp, nil -} - -// CompleteVersion marks a version as completed (releases the deployment lock). -func (c *Client) CompleteVersion(ctx context.Context, deploymentID, versionID string, reason VersionComplete, force bool) (*Version, error) { - resp := &Version{} - path := fmt.Sprintf("%s/deployments/%s/versions/%s/complete", basePath, deploymentID, versionID) - err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CompleteVersionRequest{ - Name: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), - CompletionReason: reason, - Force: force, - }, resp) - if err != nil { - return nil, mapError("complete version", err) - } - return resp, nil -} - -// CreateOperation records a resource operation for a version. -func (c *Client) CreateOperation(ctx context.Context, deploymentID, versionID, resourceKey string, operation *Operation) (*Operation, error) { - resp := &Operation{} - path := fmt.Sprintf("%s/deployments/%s/versions/%s/operations", basePath, deploymentID, versionID) - err := c.api.Do(ctx, http.MethodPost, path, nil, nil, CreateOperationRequest{ - Parent: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), - ResourceKey: resourceKey, - Operation: operation, - }, resp) - if err != nil { - return nil, mapError("create operation", err) - } - return resp, nil -} - -// ListResources lists all resources for a deployment. -func (c *Client) ListResources(ctx context.Context, deploymentID string) ([]Resource, error) { - var allResources []Resource - pageToken := "" - - for { - resp := &ListResourcesResponse{} - path := fmt.Sprintf("%s/deployments/%s/resources", basePath, deploymentID) - - q := map[string]any{ - "parent": fmt.Sprintf("deployments/%s", deploymentID), - "page_size": 1000, - } - if pageToken != "" { - q["page_token"] = pageToken - } - - err := c.api.Do(ctx, http.MethodGet, path, nil, q, nil, resp) - if err != nil { - return nil, mapError("list resources", err) - } - - allResources = append(allResources, resp.Resources...) - if resp.NextPageToken == "" { - break - } - pageToken = resp.NextPageToken - } - - return allResources, nil -} - -// mapError translates API errors into user-friendly messages. -func mapError(operation string, err error) error { - var apiErr *apierr.APIError - if !errors.As(err, &apiErr) { - return fmt.Errorf("%s: %w", operation, err) - } - - switch apiErr.StatusCode { - case http.StatusConflict: - return fmt.Errorf("%s: deployment is locked by another active deployment. "+ - "If the prior deployment failed, the lock will expire automatically after 5 minutes. "+ - "You can also force-acquire the lock by running deploy with the --force-lock flag", operation) - case http.StatusNotFound: - return fmt.Errorf("%s: resource not found: %w", operation, err) - case http.StatusBadRequest: - return fmt.Errorf("%s: bad request: %s", operation, apiErr.Message) - default: - return fmt.Errorf("%s: %w", operation, err) - } -} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 7a1fa6e778..5ca8745f06 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -3,19 +3,20 @@ package phases import ( "context" "errors" + "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" "github.com/databricks/cli/bundle/deploy" - "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" + "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/metrics" "github.com/databricks/cli/bundle/permissions" @@ -25,6 +26,7 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" "github.com/databricks/cli/libs/sync" + "github.com/databricks/cli/libs/tempdms" ) func approvalForDeploy(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan) (bool, error) { @@ -98,9 +100,11 @@ func approvalForDeploy(ctx context.Context, b *bundle.Bundle, plan *deployplan.P return approved, nil } -func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, targetEngine engine.EngineType) { - // Core mutators that CRUD resources and modify deployment state. These - // mutators need informed consent if they are potentially destructive. +// postApplyHook is called after the deployment plan is applied (terraform/direct Apply). +// It can be used for additional state reporting (e.g. to the metadata service). +type postApplyHook func(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan) + +func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, targetEngine engine.EngineType, hook postApplyHook) { cmdio.LogString(ctx, "Deploying resources...") if targetEngine.IsDirect() { @@ -109,8 +113,14 @@ func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, ta bundle.ApplyContext(ctx, b, terraform.Apply()) } - // Even if deployment failed, there might be updates in states that we need to upload + // Even if deployment failed, there might be updates in states that we need to upload. statemgmt.PushResourcesState(ctx, b, targetEngine) + + // Run any additional post-apply logic (e.g. metadata service operation reporting). + if hook != nil { + hook(ctx, b, plan) + } + if logdiag.HasError(ctx) { return } @@ -139,33 +149,55 @@ func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]li // The deploy phase deploys artifacts and resources. // If readPlanPath is provided, the plan is loaded from that file instead of being calculated. -func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, engine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { - if v, _ := env.DeploymentService(ctx); v == "true" { - deployWithMetadataService(ctx, b, outputHandler, engine, libs, plan) - return - } +func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, targetEngine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { + useMetadataService, _ := env.DeploymentService(ctx) - log.Info(ctx, "Phase: deploy") - - // Core mutators that CRUD resources and modify deployment state. These - // mutators need informed consent if they are potentially destructive. - bundle.ApplySeqContext(ctx, b, - scripts.Execute(config.ScriptPreDeploy), - lock.Acquire(), - ) + if useMetadataService == "true" { + log.Info(ctx, "Phase: deploy (with metadata service)") + } else { + log.Info(ctx, "Phase: deploy") + } + bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPreDeploy)) if logdiag.HasError(ctx) { - // lock is not acquired here return } - // lock is acquired here - defer func() { - bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) - }() + // Acquire the deployment lock. + var svc *tempdms.DeploymentMetadataAPI + var deploymentID, versionID string + var failed bool + + if useMetadataService == "true" { + var err error + svc, err = tempdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", err)) + return + } + + var cleanup func(failed bool) + deploymentID, versionID, cleanup, err = deployMetadataLock(ctx, b, svc, tempdms.VersionTypeDeploy) + if err != nil { + logdiag.LogError(ctx, err) + return + } + defer func() { + cleanup(failed || logdiag.HasError(ctx)) + }() + } else { + bundle.ApplyContext(ctx, b, lock.Acquire()) + if logdiag.HasError(ctx) { + return + } + defer func() { + bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) + }() + } uploadLibraries(ctx, b, libs) if logdiag.HasError(ctx) { + failed = true return } @@ -177,40 +209,50 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand metrics.TrackUsedCompute(), deploy.ResourcePathMkdir(), ) - if logdiag.HasError(ctx) { + failed = true return } if plan != nil { - // Initialize DeploymentBundle for applying the loaded plan + // Initialize DeploymentBundle for applying the loaded plan. _, localPath := b.StateFilenameDirect(ctx) err := b.DeploymentBundle.InitForApply(ctx, b.WorkspaceClient(), localPath, plan) if err != nil { logdiag.LogError(ctx, err) + failed = true return } } else { - plan = RunPlan(ctx, b, engine) + plan = RunPlan(ctx, b, targetEngine) } - if logdiag.HasError(ctx) { + failed = true return } haveApproval, err := approvalForDeploy(ctx, b, plan) if err != nil { logdiag.LogError(ctx, err) + failed = true return } - if haveApproval { - deployCore(ctx, b, plan, engine) - } else { + if !haveApproval { cmdio.LogString(ctx, "Deployment cancelled!") return } + // Build the post-apply hook for metadata service reporting (nil for file-based). + var hook postApplyHook + if useMetadataService == "true" { + hook = func(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan) { + reportOperations(ctx, svc, deploymentID, versionID, plan) + } + } + + deployCore(ctx, b, plan, targetEngine, hook) if logdiag.HasError(ctx) { + failed = true return } diff --git a/bundle/phases/deploy_metadata.go b/bundle/phases/deploy_metadata.go index e9f41a56cf..40d1d7d620 100644 --- a/bundle/phases/deploy_metadata.go +++ b/bundle/phases/deploy_metadata.go @@ -8,92 +8,72 @@ import ( "time" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/engine" "github.com/databricks/cli/bundle/deploy" - "github.com/databricks/cli/bundle/deploy/files" - "github.com/databricks/cli/bundle/deploy/metadata" - metadataservice "github.com/databricks/cli/bundle/deploy/metadata/service" - "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" - "github.com/databricks/cli/bundle/direct" - "github.com/databricks/cli/bundle/libraries" - "github.com/databricks/cli/bundle/metrics" - "github.com/databricks/cli/bundle/permissions" - "github.com/databricks/cli/bundle/scripts" - "github.com/databricks/cli/bundle/statemgmt" "github.com/databricks/cli/internal/build" - "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" - "github.com/databricks/cli/libs/sync" + "github.com/databricks/cli/libs/tempdms" "github.com/databricks/databricks-sdk-go/apierr" "github.com/google/uuid" ) -func deployWithMetadataService(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, targetEngine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { - log.Info(ctx, "Phase: deploy (with metadata service)") - - bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPreDeploy)) - if logdiag.HasError(ctx) { - return - } - - // Create the metadata service client. - svc, err := metadataservice.NewClient(b.WorkspaceClient()) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", err)) - return - } - +// deployMetadataLock implements the lock acquire/release lifecycle using the +// deployment metadata service (CreateVersion / CompleteVersion). +// +// It returns a cleanup function that must be deferred by the caller to release +// the lock and stop the heartbeat, as well as any error from acquiring the lock. +func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tempdms.DeploymentMetadataAPI, versionType tempdms.VersionType) (deploymentID, versionID string, cleanup func(failed bool), err error) { // Load local deployment state to get the deployment ID and sequence number. - state, err := deploy.LoadState(ctx, b) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to load deployment state: %w", err)) - return + state, loadErr := deploy.LoadState(ctx, b) + if loadErr != nil { + return "", "", nil, fmt.Errorf("failed to load deployment state: %w", loadErr) } // Generate a deployment ID if one doesn't exist yet. if state.ID == uuid.Nil { state.ID = uuid.New() } - deploymentID := state.ID.String() + deploymentID = state.ID.String() // Ensure the deployment exists in the metadata service. - _, err = svc.CreateDeployment(ctx, deploymentID, &metadataservice.Deployment{ - TargetName: b.Config.Bundle.Target, + _, createErr := svc.CreateDeployment(ctx, tempdms.CreateDeploymentRequest{ + DeploymentID: deploymentID, + Deployment: &tempdms.Deployment{ + TargetName: b.Config.Bundle.Target, + }, }) - if err != nil && !isAlreadyExists(err) { - logdiag.LogError(ctx, fmt.Errorf("failed to create deployment: %w", err)) - return + if createErr != nil && !isAlreadyExists(createErr) { + return "", "", nil, fmt.Errorf("failed to create deployment: %w", createErr) } // Create a version to acquire the deployment lock. - versionID := fmt.Sprintf("%d", state.Seq+1) - version, err := svc.CreateVersion(ctx, deploymentID, versionID, &metadataservice.Version{ - CliVersion: build.GetInfo().Version, - VersionType: metadataservice.VersionTypeDeploy, - TargetName: b.Config.Bundle.Target, + versionID = fmt.Sprintf("%d", state.Seq+1) + version, versionErr := svc.CreateVersion(ctx, tempdms.CreateVersionRequest{ + DeploymentID: deploymentID, + Parent: fmt.Sprintf("deployments/%s", deploymentID), + VersionID: versionID, + Version: &tempdms.Version{ + CliVersion: build.GetInfo().Version, + VersionType: versionType, + TargetName: b.Config.Bundle.Target, + }, }) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to acquire deployment lock: %w", err)) - return + if versionErr != nil { + return "", "", nil, fmt.Errorf("failed to acquire deployment lock: %w", versionErr) } log.Infof(ctx, "Acquired deployment lock: deployment=%s version=%s", deploymentID, version.VersionID) // Start heartbeat to keep the lock alive. - stopHeartbeat := metadataservice.StartHeartbeat(ctx, svc, deploymentID, versionID, metadataservice.DefaultHeartbeatInterval) + stopHeartbeat := startHeartbeat(ctx, svc, deploymentID, versionID, defaultHeartbeatInterval) - // Ensure we always complete the version (release the lock) and stop heartbeat. - var deployFailed bool - defer func() { + cleanup = func(failed bool) { stopHeartbeat() - reason := metadataservice.VersionCompleteSuccess - if deployFailed || logdiag.HasError(ctx) { - reason = metadataservice.VersionCompleteFailure + reason := tempdms.VersionCompleteSuccess + if failed { + reason = tempdms.VersionCompleteFailure } // Use a separate context for cleanup so the lock is released even if the @@ -101,114 +81,25 @@ func deployWithMetadataService(ctx context.Context, b *bundle.Bundle, outputHand cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - _, completeErr := svc.CompleteVersion(cleanupCtx, deploymentID, versionID, reason, false) + _, completeErr := svc.CompleteVersion(cleanupCtx, tempdms.CompleteVersionRequest{ + DeploymentID: deploymentID, + VersionID: versionID, + Name: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), + CompletionReason: reason, + }) if completeErr != nil { log.Warnf(ctx, "Failed to release deployment lock: %v", completeErr) } else { log.Infof(ctx, "Released deployment lock: deployment=%s version=%s reason=%d", deploymentID, versionID, reason) } - }() - - // Upload libraries. - bundle.ApplySeqContext(ctx, b, - artifacts.CleanUp(), - libraries.Upload(libs), - ) - if logdiag.HasError(ctx) { - deployFailed = true - return - } - - // Upload files, update state, apply permissions. - bundle.ApplySeqContext(ctx, b, - files.Upload(outputHandler), - deploy.StateUpdate(), - deploy.StatePush(), - permissions.ApplyWorkspaceRootPermissions(), - metrics.TrackUsedCompute(), - deploy.ResourcePathMkdir(), - ) - if logdiag.HasError(ctx) { - deployFailed = true - return } - // Calculate or load the deploy plan. - if plan != nil { - _, localPath := b.StateFilenameDirect(ctx) - err := b.DeploymentBundle.InitForApply(ctx, b.WorkspaceClient(), localPath, plan) - if err != nil { - logdiag.LogError(ctx, err) - deployFailed = true - return - } - } else { - plan = RunPlan(ctx, b, targetEngine) - } - if logdiag.HasError(ctx) { - deployFailed = true - return - } - - // Seek approval for potentially destructive changes. - haveApproval, err := approvalForDeploy(ctx, b, plan) - if err != nil { - logdiag.LogError(ctx, err) - deployFailed = true - return - } - if !haveApproval { - cmdio.LogString(ctx, "Deployment cancelled!") - return - } - - // Apply the deployment. - deployCoreWithMetadata(ctx, b, plan, targetEngine, svc, deploymentID, versionID) - if logdiag.HasError(ctx) { - deployFailed = true - return - } - - logDeployTelemetry(ctx, b) - bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPostDeploy)) -} - -// deployCoreWithMetadata applies the deployment plan and reports operations to -// the metadata service. -func deployCoreWithMetadata(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, targetEngine engine.EngineType, svc *metadataservice.Client, deploymentID, versionID string) { - cmdio.LogString(ctx, "Deploying resources...") - - if targetEngine.IsDirect() { - b.DeploymentBundle.Apply(ctx, b.WorkspaceClient(), plan, direct.MigrateMode(false)) - } else { - bundle.ApplyContext(ctx, b, terraform.Apply()) - } - - // Push resource state even on failure. - statemgmt.PushResourcesState(ctx, b, targetEngine) - - // Report operations to the metadata service (best-effort). - reportOperations(ctx, b, svc, deploymentID, versionID, plan) - - if logdiag.HasError(ctx) { - return - } - - bundle.ApplySeqContext(ctx, b, - statemgmt.Load(targetEngine), - metadata.Compute(), - metadata.Upload(), - statemgmt.UploadStateForYamlSync(targetEngine), - ) - - if !logdiag.HasError(ctx) { - cmdio.LogString(ctx, "Deployment complete!") - } + return deploymentID, versionID, cleanup, nil } // reportOperations reports each resource operation to the metadata service. // This is best-effort: failures are logged as warnings, not fatal errors. -func reportOperations(ctx context.Context, b *bundle.Bundle, svc *metadataservice.Client, deploymentID, versionID string, plan *deployplan.Plan) { +func reportOperations(ctx context.Context, svc *tempdms.DeploymentMetadataAPI, deploymentID, versionID string, plan *deployplan.Plan) { if plan == nil { return } @@ -216,7 +107,10 @@ func reportOperations(ctx context.Context, b *bundle.Bundle, svc *metadataservic // Fetch existing resources to determine if this is the first time we're // tracking each resource in the metadata service. knownResources := map[string]bool{} - existing, err := svc.ListResources(ctx, deploymentID) + existing, err := svc.ListResources(ctx, tempdms.ListResourcesRequest{ + DeploymentID: deploymentID, + Parent: fmt.Sprintf("deployments/%s", deploymentID), + }) if err != nil { log.Warnf(ctx, "Failed to list existing resources from metadata service, will use INITIAL_REGISTER for all: %v", err) } else { @@ -226,23 +120,27 @@ func reportOperations(ctx context.Context, b *bundle.Bundle, svc *metadataservic } for resourceKey, entry := range plan.Plan { - var actionType metadataservice.OperationActionType + var actionType tempdms.OperationActionType if knownResources[resourceKey] { - // Resource is already tracked; use the plan's action type. actionType = planActionToOperationAction(entry.Action) } else { - // First time tracking this resource in the service. - actionType = metadataservice.OperationActionTypeInitRegister + actionType = tempdms.OperationActionTypeInitRegister } - if actionType == metadataservice.OperationActionTypeUnspecified { + if actionType == tempdms.OperationActionTypeUnspecified { continue } - _, err := svc.CreateOperation(ctx, deploymentID, versionID, resourceKey, &metadataservice.Operation{ - ResourceKey: resourceKey, - Status: metadataservice.OperationStatusSucceeded, - ActionType: actionType, + _, err := svc.CreateOperation(ctx, tempdms.CreateOperationRequest{ + DeploymentID: deploymentID, + VersionID: versionID, + Parent: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), + ResourceKey: resourceKey, + Operation: &tempdms.Operation{ + ResourceKey: resourceKey, + Status: tempdms.OperationStatusSucceeded, + ActionType: actionType, + }, }) if err != nil { log.Warnf(ctx, "Failed to report operation for resource %s: %v", resourceKey, err) @@ -250,18 +148,18 @@ func reportOperations(ctx context.Context, b *bundle.Bundle, svc *metadataservic } } -func planActionToOperationAction(action deployplan.ActionType) metadataservice.OperationActionType { +func planActionToOperationAction(action deployplan.ActionType) tempdms.OperationActionType { switch action { case deployplan.Create: - return metadataservice.OperationActionTypeCreate + return tempdms.OperationActionTypeCreate case deployplan.Update: - return metadataservice.OperationActionTypeUpdate + return tempdms.OperationActionTypeUpdate case deployplan.Delete: - return metadataservice.OperationActionTypeDelete + return tempdms.OperationActionTypeDelete case deployplan.Recreate: - return metadataservice.OperationActionTypeRecreate + return tempdms.OperationActionTypeRecreate default: - return metadataservice.OperationActionTypeUnspecified + return tempdms.OperationActionTypeUnspecified } } diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 374b533ca7..81e52a3445 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -3,20 +3,22 @@ package phases import ( "context" "errors" + "fmt" "net/http" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/engine" "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" + "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" + "github.com/databricks/cli/libs/tempdms" "github.com/databricks/databricks-sdk-go/apierr" ) @@ -95,11 +97,10 @@ func approvalForDestroy(ctx context.Context, b *bundle.Bundle, plan *deployplan. return approved, nil } -func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, engine engine.EngineType) { - if engine.IsDirect() { +func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, targetEngine engine.EngineType) { + if targetEngine.IsDirect() { b.DeploymentBundle.Apply(ctx, b.WorkspaceClient(), plan, direct.MigrateMode(false)) } else { - // Core destructive mutators for destroy. These require informed user consent. bundle.ApplyContext(ctx, b, terraform.Apply()) } @@ -115,35 +116,54 @@ func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, e } // The destroy phase deletes artifacts and resources. -func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { - if v, _ := env.DeploymentService(ctx); v == "true" { - destroyWithMetadataService(ctx, b, engine) - return - } +func Destroy(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineType) { + useMetadataService, _ := env.DeploymentService(ctx) - log.Info(ctx, "Phase: destroy") + if useMetadataService == "true" { + log.Info(ctx, "Phase: destroy (with metadata service)") + } else { + log.Info(ctx, "Phase: destroy") + } ok, err := assertRootPathExists(ctx, b) if err != nil { logdiag.LogError(ctx, err) return } - if !ok { cmdio.LogString(ctx, "No active deployment found to destroy!") return } - bundle.ApplyContext(ctx, b, lock.Acquire()) - if logdiag.HasError(ctx) { - return - } + // Acquire the deployment lock. + var failed bool - defer func() { - bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDestroy)) - }() + if useMetadataService == "true" { + svc, svcErr := tempdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) + if svcErr != nil { + logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", svcErr)) + return + } + + _, _, cleanup, lockErr := deployMetadataLock(ctx, b, svc, tempdms.VersionTypeDestroy) + if lockErr != nil { + logdiag.LogError(ctx, lockErr) + return + } + defer func() { + cleanup(failed || logdiag.HasError(ctx)) + }() + } else { + bundle.ApplyContext(ctx, b, lock.Acquire()) + if logdiag.HasError(ctx) { + return + } + defer func() { + bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDestroy)) + }() + } - if !engine.IsDirect() { + if !targetEngine.IsDirect() { bundle.ApplySeqContext(ctx, b, // We need to resolve artifact variable (how we do it in build phase) // because some of the to-be-destroyed resource might use this variable. @@ -158,27 +178,31 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { } if logdiag.HasError(ctx) { + failed = true return } var plan *deployplan.Plan - if engine.IsDirect() { + if targetEngine.IsDirect() { _, localPath := b.StateFilenameDirect(ctx) plan, err = b.DeploymentBundle.CalculatePlan(ctx, b.WorkspaceClient(), nil, localPath) if err != nil { logdiag.LogError(ctx, err) + failed = true return } } else { tf := b.Terraform if tf == nil { logdiag.LogError(ctx, errors.New("terraform not initialized")) + failed = true return } plan, err = terraform.ShowPlanFile(ctx, tf, b.TerraformPlanPath) if err != nil { logdiag.LogError(ctx, err) + failed = true return } } @@ -186,11 +210,15 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { hasApproval, err := approvalForDestroy(ctx, b, plan) if err != nil { logdiag.LogError(ctx, err) + failed = true return } if hasApproval { - destroyCore(ctx, b, plan, engine) + destroyCore(ctx, b, plan, targetEngine) + if logdiag.HasError(ctx) { + failed = true + } } else { cmdio.LogString(ctx, "Destroy cancelled!") } diff --git a/bundle/phases/destroy_metadata.go b/bundle/phases/destroy_metadata.go index d7992e72cc..6cfa47ecc0 100644 --- a/bundle/phases/destroy_metadata.go +++ b/bundle/phases/destroy_metadata.go @@ -1,180 +1,3 @@ +// This file is intentionally left minimal. The destroy flow with metadata service +// support has been unified into destroy.go using the deployMetadataLock helper. package phases - -import ( - "context" - "errors" - "fmt" - "net/http" - "time" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/engine" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/deploy" - "github.com/databricks/cli/bundle/deploy/files" - metadataservice "github.com/databricks/cli/bundle/deploy/metadata/service" - "github.com/databricks/cli/bundle/deploy/terraform" - "github.com/databricks/cli/bundle/deployplan" - "github.com/databricks/cli/bundle/direct" - "github.com/databricks/cli/internal/build" - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/logdiag" - "github.com/databricks/databricks-sdk-go/apierr" - "github.com/google/uuid" -) - -func destroyWithMetadataService(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineType) { - log.Info(ctx, "Phase: destroy (with metadata service)") - - ok, err := assertRootPathExists(ctx, b) - if err != nil { - logdiag.LogError(ctx, err) - return - } - if !ok { - cmdio.LogString(ctx, "No active deployment found to destroy!") - return - } - - // Create the metadata service client. - svc, err := metadataservice.NewClient(b.WorkspaceClient()) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", err)) - return - } - - // Load local deployment state to get the deployment ID and sequence number. - state, err := deploy.LoadState(ctx, b) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to load deployment state: %w", err)) - return - } - - if state.ID == uuid.Nil { - cmdio.LogString(ctx, "No active deployment found to destroy!") - return - } - deploymentID := state.ID.String() - - // Check that the deployment exists. - _, err = svc.GetDeployment(ctx, deploymentID) - if err != nil { - var apiErr *apierr.APIError - if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound { - log.Infof(ctx, "No deployment found in metadata service for %s, nothing to destroy", deploymentID) - cmdio.LogString(ctx, "No active deployment found to destroy!") - return - } - logdiag.LogError(ctx, fmt.Errorf("failed to get deployment: %w", err)) - return - } - - // Create a version to acquire the deployment lock. - versionID := fmt.Sprintf("%d", state.Seq+1) - _, err = svc.CreateVersion(ctx, deploymentID, versionID, &metadataservice.Version{ - CliVersion: build.GetInfo().Version, - VersionType: metadataservice.VersionTypeDestroy, - TargetName: b.Config.Bundle.Target, - }) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to acquire deployment lock: %w", err)) - return - } - - log.Infof(ctx, "Acquired deployment lock for destroy: deployment=%s version=%s", deploymentID, versionID) - - // Start heartbeat to keep the lock alive. - stopHeartbeat := metadataservice.StartHeartbeat(ctx, svc, deploymentID, versionID, metadataservice.DefaultHeartbeatInterval) - - var destroyFailed bool - defer func() { - stopHeartbeat() - - reason := metadataservice.VersionCompleteSuccess - if destroyFailed || logdiag.HasError(ctx) { - reason = metadataservice.VersionCompleteFailure - } - - // Use a separate context for cleanup so the lock is released even if the - // parent context was cancelled (e.g. user hit Ctrl+C). - cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - _, completeErr := svc.CompleteVersion(cleanupCtx, deploymentID, versionID, reason, false) - if completeErr != nil { - log.Warnf(ctx, "Failed to release deployment lock: %v", completeErr) - } else { - log.Infof(ctx, "Released deployment lock: deployment=%s version=%s reason=%d", deploymentID, versionID, reason) - } - }() - - // Calculate the destroy plan. - if !targetEngine.IsDirect() { - bundle.ApplySeqContext(ctx, b, - mutator.ResolveVariableReferencesWithoutResources("artifacts"), - mutator.ResolveVariableReferencesOnlyResources("artifacts"), - terraform.Interpolate(), - terraform.Write(), - terraform.Plan(terraform.PlanGoal("destroy")), - ) - } - - if logdiag.HasError(ctx) { - destroyFailed = true - return - } - - var plan *deployplan.Plan - if targetEngine.IsDirect() { - _, localPath := b.StateFilenameDirect(ctx) - plan, err = b.DeploymentBundle.CalculatePlan(ctx, b.WorkspaceClient(), nil, localPath) - if err != nil { - logdiag.LogError(ctx, err) - destroyFailed = true - return - } - } else { - tf := b.Terraform - if tf == nil { - logdiag.LogError(ctx, fmt.Errorf("terraform not initialized")) - destroyFailed = true - return - } - - plan, err = terraform.ShowPlanFile(ctx, tf, b.TerraformPlanPath) - if err != nil { - logdiag.LogError(ctx, err) - destroyFailed = true - return - } - } - - hasApproval, err := approvalForDestroy(ctx, b, plan) - if err != nil { - logdiag.LogError(ctx, err) - destroyFailed = true - return - } - - if hasApproval { - if targetEngine.IsDirect() { - b.DeploymentBundle.Apply(ctx, b.WorkspaceClient(), plan, direct.MigrateMode(false)) - } else { - bundle.ApplyContext(ctx, b, terraform.Apply()) - } - - if logdiag.HasError(ctx) { - destroyFailed = true - return - } - - bundle.ApplyContext(ctx, b, files.Delete()) - - if !logdiag.HasError(ctx) { - cmdio.LogString(ctx, "Destroy complete!") - } - } else { - cmdio.LogString(ctx, "Destroy cancelled!") - } -} diff --git a/bundle/deploy/metadata/service/heartbeat.go b/bundle/phases/heartbeat.go similarity index 56% rename from bundle/deploy/metadata/service/heartbeat.go rename to bundle/phases/heartbeat.go index d32e0a24f0..1f9b3d41d1 100644 --- a/bundle/deploy/metadata/service/heartbeat.go +++ b/bundle/phases/heartbeat.go @@ -1,17 +1,18 @@ -package service +package phases import ( "context" "time" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/tempdms" ) -const DefaultHeartbeatInterval = 2 * time.Minute +const defaultHeartbeatInterval = 2 * time.Minute -// StartHeartbeat starts a background goroutine that sends heartbeats to keep +// startHeartbeat starts a background goroutine that sends heartbeats to keep // the deployment lock alive. Returns a cancel function to stop the heartbeat. -func StartHeartbeat(ctx context.Context, client *Client, deploymentID, versionID string, interval time.Duration) context.CancelFunc { +func startHeartbeat(ctx context.Context, svc *tempdms.DeploymentMetadataAPI, deploymentID, versionID string, interval time.Duration) context.CancelFunc { ctx, cancel := context.WithCancel(ctx) go func() { @@ -23,7 +24,10 @@ func StartHeartbeat(ctx context.Context, client *Client, deploymentID, versionID case <-ctx.Done(): return case <-ticker.C: - _, err := client.Heartbeat(ctx, deploymentID, versionID) + _, err := svc.Heartbeat(ctx, tempdms.HeartbeatRequest{ + DeploymentID: deploymentID, + VersionID: versionID, + }) if err != nil { log.Warnf(ctx, "Failed to send deployment heartbeat: %v", err) } else { diff --git a/libs/tempdms/api.go b/libs/tempdms/api.go new file mode 100644 index 0000000000..305633819e --- /dev/null +++ b/libs/tempdms/api.go @@ -0,0 +1,164 @@ +package tempdms + +import ( + "context" + "errors" + "fmt" + "net/http" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/client" +) + +const basePath = "/api/2.0/bundle" + +// DeploymentMetadataAPI is a client for the Deployment Metadata Service. +// +// This is a temporary implementation that will be replaced by the SDK-generated +// client once the proto definitions land in the Go SDK. The method signatures +// and types are designed to match what the SDK will generate, so migration +// should be a straightforward import path change. +type DeploymentMetadataAPI struct { + api *client.DatabricksClient +} + +func NewDeploymentMetadataAPI(w *databricks.WorkspaceClient) (*DeploymentMetadataAPI, error) { + apiClient, err := client.New(w.Config) + if err != nil { + return nil, fmt.Errorf("failed to create deployment metadata API client: %w", err) + } + return &DeploymentMetadataAPI{api: apiClient}, nil +} + +func (a *DeploymentMetadataAPI) CreateDeployment(ctx context.Context, request CreateDeploymentRequest) (*Deployment, error) { + var resp Deployment + path := fmt.Sprintf("%s/deployments", basePath) + err := a.api.Do(ctx, http.MethodPost, path, nil, nil, request, &resp) + if err != nil { + return nil, mapError("create deployment", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) GetDeployment(ctx context.Context, request GetDeploymentRequest) (*Deployment, error) { + var resp Deployment + path := fmt.Sprintf("%s/deployments/%s", basePath, request.DeploymentID) + err := a.api.Do(ctx, http.MethodGet, path, nil, nil, nil, &resp) + if err != nil { + return nil, mapError("get deployment", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) DeleteDeployment(ctx context.Context, request DeleteDeploymentRequest) (*Deployment, error) { + var resp Deployment + path := fmt.Sprintf("%s/deployments/%s", basePath, request.DeploymentID) + err := a.api.Do(ctx, http.MethodDelete, path, nil, nil, nil, &resp) + if err != nil { + return nil, mapError("delete deployment", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) CreateVersion(ctx context.Context, request CreateVersionRequest) (*Version, error) { + var resp Version + path := fmt.Sprintf("%s/deployments/%s/versions", basePath, request.DeploymentID) + err := a.api.Do(ctx, http.MethodPost, path, nil, nil, request, &resp) + if err != nil { + return nil, mapError("create version", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) GetVersion(ctx context.Context, request GetVersionRequest) (*Version, error) { + var resp Version + path := fmt.Sprintf("%s/deployments/%s/versions/%s", basePath, request.DeploymentID, request.VersionID) + err := a.api.Do(ctx, http.MethodGet, path, nil, nil, nil, &resp) + if err != nil { + return nil, mapError("get version", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) Heartbeat(ctx context.Context, request HeartbeatRequest) (*HeartbeatResponse, error) { + var resp HeartbeatResponse + path := fmt.Sprintf("%s/deployments/%s/versions/%s/heartbeat", basePath, request.DeploymentID, request.VersionID) + err := a.api.Do(ctx, http.MethodPost, path, nil, nil, struct{}{}, &resp) + if err != nil { + return nil, mapError("heartbeat", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) CompleteVersion(ctx context.Context, request CompleteVersionRequest) (*Version, error) { + var resp Version + path := fmt.Sprintf("%s/deployments/%s/versions/%s/complete", basePath, request.DeploymentID, request.VersionID) + err := a.api.Do(ctx, http.MethodPost, path, nil, nil, request, &resp) + if err != nil { + return nil, mapError("complete version", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) CreateOperation(ctx context.Context, request CreateOperationRequest) (*Operation, error) { + var resp Operation + path := fmt.Sprintf("%s/deployments/%s/versions/%s/operations", basePath, request.DeploymentID, request.VersionID) + err := a.api.Do(ctx, http.MethodPost, path, nil, nil, request, &resp) + if err != nil { + return nil, mapError("create operation", err) + } + return &resp, nil +} + +func (a *DeploymentMetadataAPI) ListResources(ctx context.Context, request ListResourcesRequest) ([]Resource, error) { + var allResources []Resource + pageToken := "" + + for { + var resp ListResourcesResponse + path := fmt.Sprintf("%s/deployments/%s/resources", basePath, request.DeploymentID) + + q := map[string]any{ + "parent": fmt.Sprintf("deployments/%s", request.DeploymentID), + "page_size": 1000, + } + if pageToken != "" { + q["page_token"] = pageToken + } + + err := a.api.Do(ctx, http.MethodGet, path, nil, q, nil, &resp) + if err != nil { + return nil, mapError("list resources", err) + } + + allResources = append(allResources, resp.Resources...) + if resp.NextPageToken == "" { + break + } + pageToken = resp.NextPageToken + } + + return allResources, nil +} + +// mapError translates API errors into user-friendly messages. +func mapError(operation string, err error) error { + var apiErr *apierr.APIError + if !errors.As(err, &apiErr) { + return fmt.Errorf("%s: %w", operation, err) + } + + switch apiErr.StatusCode { + case http.StatusConflict: + return fmt.Errorf("%s: deployment is locked by another active deployment. "+ + "If the prior deployment failed, the lock will expire automatically after 5 minutes. "+ + "You can also force-acquire the lock by running deploy with the --force-lock flag: %w", operation, err) + case http.StatusNotFound: + return fmt.Errorf("%s: resource not found: %w", operation, err) + case http.StatusBadRequest: + return fmt.Errorf("%s: bad request: %w", operation, err) + default: + return fmt.Errorf("%s: %w", operation, err) + } +} diff --git a/bundle/deploy/metadata/service/types.go b/libs/tempdms/types.go similarity index 58% rename from bundle/deploy/metadata/service/types.go rename to libs/tempdms/types.go index 05e0cf03b1..a5d8c0df92 100644 --- a/bundle/deploy/metadata/service/types.go +++ b/libs/tempdms/types.go @@ -1,4 +1,8 @@ -package service +// Package tempdms is a temporary client library for the Deployment Metadata Service. +// It mirrors the structure that the Databricks Go SDK will eventually generate from +// the service's proto definitions. When the protos land in the SDK, migration should +// be a straightforward import path change. +package tempdms import "time" @@ -60,47 +64,47 @@ const ( ) const ( - ResourceTypeUnspecified DeploymentResourceType = 0 - ResourceTypeJob DeploymentResourceType = 1 - ResourceTypePipeline DeploymentResourceType = 2 - ResourceTypeModel DeploymentResourceType = 4 - ResourceTypeRegisteredModel DeploymentResourceType = 5 - ResourceTypeExperiment DeploymentResourceType = 6 - ResourceTypeServingEndpoint DeploymentResourceType = 7 - ResourceTypeQualityMonitor DeploymentResourceType = 8 - ResourceTypeSchema DeploymentResourceType = 9 - ResourceTypeVolume DeploymentResourceType = 10 - ResourceTypeCluster DeploymentResourceType = 11 - ResourceTypeDashboard DeploymentResourceType = 12 - ResourceTypeApp DeploymentResourceType = 13 - ResourceTypeCatalog DeploymentResourceType = 14 - ResourceTypeExternalLocation DeploymentResourceType = 15 - ResourceTypeSecretScope DeploymentResourceType = 16 - ResourceTypeAlert DeploymentResourceType = 17 - ResourceTypeSQLWarehouse DeploymentResourceType = 18 - ResourceTypeDatabaseInstance DeploymentResourceType = 19 - ResourceTypeDatabaseCatalog DeploymentResourceType = 20 - ResourceTypeSyncedDBTable DeploymentResourceType = 21 - ResourceTypePostgresProject DeploymentResourceType = 22 - ResourceTypePostgresBranch DeploymentResourceType = 23 - ResourceTypePostgresEndpoint DeploymentResourceType = 24 + ResourceTypeUnspecified DeploymentResourceType = 0 + ResourceTypeJob DeploymentResourceType = 1 + ResourceTypePipeline DeploymentResourceType = 2 + ResourceTypeModel DeploymentResourceType = 4 + ResourceTypeRegisteredModel DeploymentResourceType = 5 + ResourceTypeExperiment DeploymentResourceType = 6 + ResourceTypeServingEndpoint DeploymentResourceType = 7 + ResourceTypeQualityMonitor DeploymentResourceType = 8 + ResourceTypeSchema DeploymentResourceType = 9 + ResourceTypeVolume DeploymentResourceType = 10 + ResourceTypeCluster DeploymentResourceType = 11 + ResourceTypeDashboard DeploymentResourceType = 12 + ResourceTypeApp DeploymentResourceType = 13 + ResourceTypeCatalog DeploymentResourceType = 14 + ResourceTypeExternalLocation DeploymentResourceType = 15 + ResourceTypeSecretScope DeploymentResourceType = 16 + ResourceTypeAlert DeploymentResourceType = 17 + ResourceTypeSQLWarehouse DeploymentResourceType = 18 + ResourceTypeDatabaseInstance DeploymentResourceType = 19 + ResourceTypeDatabaseCatalog DeploymentResourceType = 20 + ResourceTypeSyncedDBTable DeploymentResourceType = 21 + ResourceTypePostgresProject DeploymentResourceType = 22 + ResourceTypePostgresBranch DeploymentResourceType = 23 + ResourceTypePostgresEndpoint DeploymentResourceType = 24 ) -// Deployment represents a bundle deployment registered with the control plane. +// Resource types (proto message equivalents). + type Deployment struct { - Name string `json:"name,omitempty"` - DisplayName string `json:"display_name,omitempty"` - TargetName string `json:"target_name,omitempty"` - Status DeploymentStatus `json:"status,omitempty"` - LastVersionID string `json:"last_version_id,omitempty"` - CreatedBy string `json:"created_by,omitempty"` - CreateTime *time.Time `json:"create_time,omitempty"` - UpdateTime *time.Time `json:"update_time,omitempty"` - DestroyTime *time.Time `json:"destroy_time,omitempty"` - DestroyedBy string `json:"destroyed_by,omitempty"` -} - -// Version represents a single invocation of deploy/destroy against a deployment. + Name string `json:"name,omitempty"` + DisplayName string `json:"display_name,omitempty"` + TargetName string `json:"target_name,omitempty"` + Status DeploymentStatus `json:"status,omitempty"` + LastVersionID string `json:"last_version_id,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CreateTime *time.Time `json:"create_time,omitempty"` + UpdateTime *time.Time `json:"update_time,omitempty"` + DestroyTime *time.Time `json:"destroy_time,omitempty"` + DestroyedBy string `json:"destroyed_by,omitempty"` +} + type Version struct { Name string `json:"name,omitempty"` VersionID string `json:"version_id,omitempty"` @@ -116,7 +120,6 @@ type Version struct { TargetName string `json:"target_name,omitempty"` } -// Operation records the result of applying a resource change. type Operation struct { Name string `json:"name,omitempty"` ResourceKey string `json:"resource_key,omitempty"` @@ -128,7 +131,6 @@ type Operation struct { ErrorMessage string `json:"error_message,omitempty"` } -// Resource represents a resource managed by a deployment. type Resource struct { Name string `json:"name,omitempty"` ResourceKey string `json:"resource_key,omitempty"` @@ -139,43 +141,75 @@ type Resource struct { ResourceType DeploymentResourceType `json:"resource_type,omitempty"` } -// Request/Response types. +// Request types. type CreateDeploymentRequest struct { DeploymentID string `json:"deployment_id"` Deployment *Deployment `json:"deployment"` } -type ListDeploymentsResponse struct { - Deployments []Deployment `json:"deployments"` - NextPageToken string `json:"next_page_token,omitempty"` +type GetDeploymentRequest struct { + DeploymentID string `json:"-"` +} + +type DeleteDeploymentRequest struct { + DeploymentID string `json:"-"` } type CreateVersionRequest struct { - Parent string `json:"parent"` - Version *Version `json:"version"` - VersionID string `json:"version_id"` + DeploymentID string `json:"-"` + Parent string `json:"parent"` + Version *Version `json:"version"` + VersionID string `json:"version_id"` } -type ListVersionsResponse struct { - Versions []Version `json:"versions"` - NextPageToken string `json:"next_page_token,omitempty"` +type GetVersionRequest struct { + DeploymentID string `json:"-"` + VersionID string `json:"-"` } -type HeartbeatResponse struct { - ExpireTime *time.Time `json:"expire_time,omitempty"` +type HeartbeatRequest struct { + DeploymentID string `json:"-"` + VersionID string `json:"-"` } type CompleteVersionRequest struct { + DeploymentID string `json:"-"` + VersionID string `json:"-"` Name string `json:"name"` CompletionReason VersionComplete `json:"completion_reason"` Force bool `json:"force,omitempty"` } type CreateOperationRequest struct { - Parent string `json:"parent"` - ResourceKey string `json:"resource_key"` - Operation *Operation `json:"operation"` + DeploymentID string `json:"-"` + VersionID string `json:"-"` + Parent string `json:"parent"` + ResourceKey string `json:"resource_key"` + Operation *Operation `json:"operation"` +} + +type ListResourcesRequest struct { + DeploymentID string `json:"-"` + Parent string `json:"parent"` + PageSize int `json:"page_size,omitempty"` + PageToken string `json:"page_token,omitempty"` +} + +// Response types. + +type HeartbeatResponse struct { + ExpireTime *time.Time `json:"expire_time,omitempty"` +} + +type ListDeploymentsResponse struct { + Deployments []Deployment `json:"deployments"` + NextPageToken string `json:"next_page_token,omitempty"` +} + +type ListVersionsResponse struct { + Versions []Version `json:"versions"` + NextPageToken string `json:"next_page_token,omitempty"` } type ListOperationsResponse struct { diff --git a/libs/testserver/deployment_metadata.go b/libs/testserver/deployment_metadata.go new file mode 100644 index 0000000000..1b0b3e9f90 --- /dev/null +++ b/libs/testserver/deployment_metadata.go @@ -0,0 +1,400 @@ +package testserver + +import ( + "encoding/json" + "fmt" + "net/http" + "strconv" + "strings" + "time" + + "github.com/databricks/cli/libs/tempdms" +) + +// deploymentMetadataState holds in-memory state for the deployment metadata service. +// Stored per-workspace inside FakeWorkspace. +type deploymentMetadataState struct { + // deployments keyed by deployment_id + deployments map[string]tempdms.Deployment + + // versions keyed by "deploymentId/versionId" + versions map[string]tempdms.Version + + // operations keyed by "deploymentId/versionId/resourceKey" + operations map[string]tempdms.Operation + + // resources keyed by "deploymentId/resourceKey" + resources map[string]tempdms.Resource + + // lock state per deployment: which version holds the lock and when it expires + lockHolder map[string]string // deploymentId -> "deployments/{id}/versions/{vid}" + lockExpiry map[string]time.Time // deploymentId -> expiry time +} + +func newDeploymentMetadataState() *deploymentMetadataState { + return &deploymentMetadataState{ + deployments: map[string]tempdms.Deployment{}, + versions: map[string]tempdms.Version{}, + operations: map[string]tempdms.Operation{}, + resources: map[string]tempdms.Resource{}, + lockHolder: map[string]string{}, + lockExpiry: map[string]time.Time{}, + } +} + +const lockDuration = 5 * time.Minute + +func (s *FakeWorkspace) DeploymentMetadataCreateDeployment(req Request) Response { + defer s.LockUnlock()() + + var createReq tempdms.CreateDeploymentRequest + if err := json.Unmarshal(req.Body, &createReq); err != nil { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + } + } + + deploymentID := createReq.DeploymentID + if deploymentID == "" { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "deployment_id is required"}, + } + } + + state := s.deploymentMetadata + if _, exists := state.deployments[deploymentID]; exists { + return Response{ + StatusCode: http.StatusConflict, + Body: map[string]string{"error_code": "ALREADY_EXISTS", "message": fmt.Sprintf("deployment %s already exists", deploymentID)}, + } + } + + now := time.Now().UTC() + deployment := tempdms.Deployment{ + Name: fmt.Sprintf("deployments/%s", deploymentID), + DisplayName: deploymentID, + Status: tempdms.DeploymentStatusActive, + CreatedBy: s.CurrentUser().UserName, + CreateTime: &now, + UpdateTime: &now, + } + if createReq.Deployment != nil { + if createReq.Deployment.TargetName != "" { + deployment.TargetName = createReq.Deployment.TargetName + } + } + + state.deployments[deploymentID] = deployment + return Response{Body: deployment} +} + +func (s *FakeWorkspace) DeploymentMetadataGetDeployment(deploymentID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + deployment, ok := state.deployments[deploymentID] + if !ok { + return Response{ + StatusCode: http.StatusNotFound, + Body: map[string]string{"error_code": "NOT_FOUND", "message": fmt.Sprintf("deployment %s not found", deploymentID)}, + } + } + return Response{Body: deployment} +} + +func (s *FakeWorkspace) DeploymentMetadataDeleteDeployment(deploymentID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + deployment, ok := state.deployments[deploymentID] + if !ok { + return Response{ + StatusCode: http.StatusNotFound, + Body: map[string]string{"error_code": "NOT_FOUND", "message": fmt.Sprintf("deployment %s not found", deploymentID)}, + } + } + + now := time.Now().UTC() + deployment.Status = tempdms.DeploymentStatusDeleted + deployment.DestroyTime = &now + deployment.DestroyedBy = s.CurrentUser().UserName + deployment.UpdateTime = &now + state.deployments[deploymentID] = deployment + + return Response{Body: deployment} +} + +func (s *FakeWorkspace) DeploymentMetadataCreateVersion(req Request, deploymentID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + deployment, ok := state.deployments[deploymentID] + if !ok { + return Response{ + StatusCode: http.StatusNotFound, + Body: map[string]string{"error_code": "NOT_FOUND", "message": fmt.Sprintf("deployment %s not found", deploymentID)}, + } + } + + var createReq tempdms.CreateVersionRequest + if err := json.Unmarshal(req.Body, &createReq); err != nil { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + } + } + + versionID := createReq.VersionID + if versionID == "" { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "version_id is required"}, + } + } + + // Validate version_id == last_version_id + 1 (matching server behavior). + var expectedVersionID string + if deployment.LastVersionID == "" { + expectedVersionID = "1" + } else { + lastVersion, err := strconv.ParseInt(deployment.LastVersionID, 10, 64) + if err != nil { + return Response{ + StatusCode: http.StatusInternalServerError, + Body: map[string]string{"error_code": "INTERNAL_ERROR", "message": fmt.Sprintf("stored last_version_id is not a valid number: %s", deployment.LastVersionID)}, + } + } + expectedVersionID = strconv.FormatInt(lastVersion+1, 10) + } + if versionID != expectedVersionID { + return Response{ + StatusCode: http.StatusConflict, + Body: map[string]string{ + "error_code": "ABORTED", + "message": fmt.Sprintf("version_id must be %s (last_version_id + 1), got: %s", expectedVersionID, versionID), + }, + } + } + + // Check lock: if a lock is held and not expired, reject with 409. + now := time.Now().UTC() + if holder, hasLock := state.lockHolder[deploymentID]; hasLock { + if expiry, ok := state.lockExpiry[deploymentID]; ok && expiry.After(now) { + return Response{ + StatusCode: http.StatusConflict, + Body: map[string]string{ + "error_code": "ABORTED", + "message": fmt.Sprintf("deployment is locked by %s until %s", holder, expiry.Format(time.RFC3339)), + }, + } + } + } + + versionKey := deploymentID + "/" + versionID + version := tempdms.Version{ + Name: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), + VersionID: versionID, + CreatedBy: s.CurrentUser().UserName, + CreateTime: &now, + Status: tempdms.VersionStatusInProgress, + } + if createReq.Version != nil { + version.CliVersion = createReq.Version.CliVersion + version.VersionType = createReq.Version.VersionType + } + + state.versions[versionKey] = version + + // Acquire the lock. + lockExpiry := now.Add(lockDuration) + state.lockHolder[deploymentID] = version.Name + state.lockExpiry[deploymentID] = lockExpiry + + // Update the deployment's last_version_id and status. + deployment.LastVersionID = versionID + deployment.Status = tempdms.DeploymentStatusInProgress + deployment.UpdateTime = &now + state.deployments[deploymentID] = deployment + + return Response{Body: version} +} + +func (s *FakeWorkspace) DeploymentMetadataGetVersion(deploymentID, versionID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + versionKey := deploymentID + "/" + versionID + version, ok := state.versions[versionKey] + if !ok { + return Response{ + StatusCode: http.StatusNotFound, + Body: map[string]string{"error_code": "NOT_FOUND", "message": fmt.Sprintf("version %s not found", versionKey)}, + } + } + return Response{Body: version} +} + +func (s *FakeWorkspace) DeploymentMetadataHeartbeat(req Request, deploymentID, versionID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + versionKey := deploymentID + "/" + versionID + version, ok := state.versions[versionKey] + if !ok { + return Response{ + StatusCode: http.StatusNotFound, + Body: map[string]string{"error_code": "NOT_FOUND", "message": fmt.Sprintf("version %s not found", versionKey)}, + } + } + + if version.Status != tempdms.VersionStatusInProgress { + return Response{ + StatusCode: http.StatusConflict, + Body: map[string]string{"error_code": "ABORTED", "message": "version is no longer in progress"}, + } + } + + // Verify this version holds the lock. + expectedHolder := fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID) + if state.lockHolder[deploymentID] != expectedHolder { + return Response{ + StatusCode: http.StatusConflict, + Body: map[string]string{"error_code": "ABORTED", "message": "lock is not held by this version"}, + } + } + + // Renew the lock. + now := time.Now().UTC() + newExpiry := now.Add(lockDuration) + state.lockExpiry[deploymentID] = newExpiry + + return Response{Body: tempdms.HeartbeatResponse{ExpireTime: &newExpiry}} +} + +func (s *FakeWorkspace) DeploymentMetadataCompleteVersion(req Request, deploymentID, versionID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + versionKey := deploymentID + "/" + versionID + version, ok := state.versions[versionKey] + if !ok { + return Response{ + StatusCode: http.StatusNotFound, + Body: map[string]string{"error_code": "NOT_FOUND", "message": fmt.Sprintf("version %s not found", versionKey)}, + } + } + + if version.Status != tempdms.VersionStatusInProgress { + return Response{ + StatusCode: http.StatusConflict, + Body: map[string]string{"error_code": "ABORTED", "message": "version is already completed"}, + } + } + + var completeReq tempdms.CompleteVersionRequest + if err := json.Unmarshal(req.Body, &completeReq); err != nil { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + } + } + + now := time.Now().UTC() + version.Status = tempdms.VersionStatusCompleted + version.CompleteTime = &now + version.CompletionReason = completeReq.CompletionReason + version.CompletedBy = s.CurrentUser().UserName + state.versions[versionKey] = version + + // Release the lock. + delete(state.lockHolder, deploymentID) + delete(state.lockExpiry, deploymentID) + + // Update deployment status based on completion reason. + if deployment, ok := state.deployments[deploymentID]; ok { + switch completeReq.CompletionReason { + case tempdms.VersionCompleteSuccess: + deployment.Status = tempdms.DeploymentStatusActive + case tempdms.VersionCompleteFailure, tempdms.VersionCompleteForceAbort, tempdms.VersionCompleteLeaseExpire: + deployment.Status = tempdms.DeploymentStatusFailed + } + deployment.UpdateTime = &now + state.deployments[deploymentID] = deployment + } + + return Response{Body: version} +} + +func (s *FakeWorkspace) DeploymentMetadataCreateOperation(req Request, deploymentID, versionID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + + var createReq tempdms.CreateOperationRequest + if err := json.Unmarshal(req.Body, &createReq); err != nil { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + } + } + + resourceKey := createReq.ResourceKey + if resourceKey == "" { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "resource_key is required"}, + } + } + + now := time.Now().UTC() + opKey := deploymentID + "/" + versionID + "/" + resourceKey + operation := tempdms.Operation{ + Name: fmt.Sprintf("deployments/%s/versions/%s/operations/%s", deploymentID, versionID, resourceKey), + ResourceKey: resourceKey, + CreateTime: &now, + } + if createReq.Operation != nil { + operation.ActionType = createReq.Operation.ActionType + operation.State = createReq.Operation.State + operation.ResourceID = createReq.Operation.ResourceID + operation.Status = createReq.Operation.Status + operation.ErrorMessage = createReq.Operation.ErrorMessage + } + + state.operations[opKey] = operation + + // Upsert the deployment-level resource. + resKey := deploymentID + "/" + resourceKey + resource := tempdms.Resource{ + Name: fmt.Sprintf("deployments/%s/resources/%s", deploymentID, resourceKey), + ResourceKey: resourceKey, + } + if createReq.Operation != nil { + resource.State = createReq.Operation.State + resource.ResourceID = createReq.Operation.ResourceID + resource.LastActionType = createReq.Operation.ActionType + resource.LastVersionID = versionID + } + state.resources[resKey] = resource + + return Response{Body: operation} +} + +func (s *FakeWorkspace) DeploymentMetadataListResources(deploymentID string) Response { + defer s.LockUnlock()() + + state := s.deploymentMetadata + prefix := deploymentID + "/" + var resources []tempdms.Resource + for key, resource := range state.resources { + if strings.HasPrefix(key, prefix) { + resources = append(resources, resource) + } + } + if resources == nil { + resources = []tempdms.Resource{} + } + return Response{Body: tempdms.ListResourcesResponse{Resources: resources}} +} diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index b13aae069a..a2462c4c6d 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -173,6 +173,8 @@ type FakeWorkspace struct { // clusterVenvs caches Python venvs per existing cluster ID, // matching cloud behavior where libraries are cached on running clusters. clusterVenvs map[string]*clusterEnv + + deploymentMetadata *deploymentMetadataState } func (s *FakeWorkspace) LockUnlock() func() { @@ -297,6 +299,7 @@ func NewFakeWorkspace(url, token string) *FakeWorkspace { PostgresEndpoints: map[string]postgres.Endpoint{}, PostgresOperations: map[string]postgres.Operation{}, clusterVenvs: map[string]*clusterEnv{}, + deploymentMetadata: newDeploymentMetadataState(), Alerts: map[string]sql.AlertV2{}, Experiments: map[string]ml.GetExperimentResponse{}, ModelRegistryModels: map[string]ml.Model{}, diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 9e30cb5f0c..904284ed51 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -905,4 +905,42 @@ func AddDefaultHandlers(server *Server) { }, } }) + + // Deployment Metadata Service: + + server.Handle("POST", "/api/2.0/bundle/deployments", func(req Request) any { + return req.Workspace.DeploymentMetadataCreateDeployment(req) + }) + + server.Handle("GET", "/api/2.0/bundle/deployments/{deployment_id}", func(req Request) any { + return req.Workspace.DeploymentMetadataGetDeployment(req.Vars["deployment_id"]) + }) + + server.Handle("DELETE", "/api/2.0/bundle/deployments/{deployment_id}", func(req Request) any { + return req.Workspace.DeploymentMetadataDeleteDeployment(req.Vars["deployment_id"]) + }) + + server.Handle("POST", "/api/2.0/bundle/deployments/{deployment_id}/versions", func(req Request) any { + return req.Workspace.DeploymentMetadataCreateVersion(req, req.Vars["deployment_id"]) + }) + + server.Handle("GET", "/api/2.0/bundle/deployments/{deployment_id}/versions/{version_id}", func(req Request) any { + return req.Workspace.DeploymentMetadataGetVersion(req.Vars["deployment_id"], req.Vars["version_id"]) + }) + + server.Handle("POST", "/api/2.0/bundle/deployments/{deployment_id}/versions/{version_id}/heartbeat", func(req Request) any { + return req.Workspace.DeploymentMetadataHeartbeat(req, req.Vars["deployment_id"], req.Vars["version_id"]) + }) + + server.Handle("POST", "/api/2.0/bundle/deployments/{deployment_id}/versions/{version_id}/complete", func(req Request) any { + return req.Workspace.DeploymentMetadataCompleteVersion(req, req.Vars["deployment_id"], req.Vars["version_id"]) + }) + + server.Handle("POST", "/api/2.0/bundle/deployments/{deployment_id}/versions/{version_id}/operations", func(req Request) any { + return req.Workspace.DeploymentMetadataCreateOperation(req, req.Vars["deployment_id"], req.Vars["version_id"]) + }) + + server.Handle("GET", "/api/2.0/bundle/deployments/{deployment_id}/resources", func(req Request) any { + return req.Workspace.DeploymentMetadataListResources(req.Vars["deployment_id"]) + }) } From 29f567007c9efaca805e8482545f32e5fee48b67 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 31 Mar 2026 21:03:26 +0000 Subject: [PATCH 04/11] Fix query parameter handling for deployment metadata service API The proto HTTP bindings use `body: "deployment"`, `body: "version"`, and `body: "operation"` for Create endpoints, which means only the sub-message goes in the request body. The identifier fields (deployment_id, version_id, resource_key) must be passed as query parameters. Previously these fields were incorrectly included in the request body, which would cause "required field missing" errors against the real service. Also updates the test server to read these fields from query parameters instead of the body, so acceptance tests validate the real API contract. Co-authored-by: Isaac --- libs/tempdms/api.go | 9 ++- libs/testserver/deployment_metadata.go | 104 +++++++++++++------------ 2 files changed, 59 insertions(+), 54 deletions(-) diff --git a/libs/tempdms/api.go b/libs/tempdms/api.go index 305633819e..005e704923 100644 --- a/libs/tempdms/api.go +++ b/libs/tempdms/api.go @@ -34,7 +34,8 @@ func NewDeploymentMetadataAPI(w *databricks.WorkspaceClient) (*DeploymentMetadat func (a *DeploymentMetadataAPI) CreateDeployment(ctx context.Context, request CreateDeploymentRequest) (*Deployment, error) { var resp Deployment path := fmt.Sprintf("%s/deployments", basePath) - err := a.api.Do(ctx, http.MethodPost, path, nil, nil, request, &resp) + query := map[string]string{"deployment_id": request.DeploymentID} + err := a.api.Do(ctx, http.MethodPost, path, nil, query, request.Deployment, &resp) if err != nil { return nil, mapError("create deployment", err) } @@ -64,7 +65,8 @@ func (a *DeploymentMetadataAPI) DeleteDeployment(ctx context.Context, request De func (a *DeploymentMetadataAPI) CreateVersion(ctx context.Context, request CreateVersionRequest) (*Version, error) { var resp Version path := fmt.Sprintf("%s/deployments/%s/versions", basePath, request.DeploymentID) - err := a.api.Do(ctx, http.MethodPost, path, nil, nil, request, &resp) + query := map[string]string{"version_id": request.VersionID} + err := a.api.Do(ctx, http.MethodPost, path, nil, query, request.Version, &resp) if err != nil { return nil, mapError("create version", err) } @@ -104,7 +106,8 @@ func (a *DeploymentMetadataAPI) CompleteVersion(ctx context.Context, request Com func (a *DeploymentMetadataAPI) CreateOperation(ctx context.Context, request CreateOperationRequest) (*Operation, error) { var resp Operation path := fmt.Sprintf("%s/deployments/%s/versions/%s/operations", basePath, request.DeploymentID, request.VersionID) - err := a.api.Do(ctx, http.MethodPost, path, nil, nil, request, &resp) + query := map[string]string{"resource_key": request.ResourceKey} + err := a.api.Do(ctx, http.MethodPost, path, nil, query, request.Operation, &resp) if err != nil { return nil, mapError("create operation", err) } diff --git a/libs/testserver/deployment_metadata.go b/libs/testserver/deployment_metadata.go index 1b0b3e9f90..48cf7f5caa 100644 --- a/libs/testserver/deployment_metadata.go +++ b/libs/testserver/deployment_metadata.go @@ -47,19 +47,23 @@ const lockDuration = 5 * time.Minute func (s *FakeWorkspace) DeploymentMetadataCreateDeployment(req Request) Response { defer s.LockUnlock()() - var createReq tempdms.CreateDeploymentRequest - if err := json.Unmarshal(req.Body, &createReq); err != nil { + // deployment_id is a query parameter, not in the body. + deploymentID := req.URL.Query().Get("deployment_id") + if deploymentID == "" { return Response{ StatusCode: http.StatusBadRequest, - Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "deployment_id is required"}, } } - deploymentID := createReq.DeploymentID - if deploymentID == "" { - return Response{ - StatusCode: http.StatusBadRequest, - Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "deployment_id is required"}, + // The body maps to the Deployment sub-message. + var bodyDeployment tempdms.Deployment + if len(req.Body) > 0 { + if err := json.Unmarshal(req.Body, &bodyDeployment); err != nil { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + } } } @@ -75,16 +79,12 @@ func (s *FakeWorkspace) DeploymentMetadataCreateDeployment(req Request) Response deployment := tempdms.Deployment{ Name: fmt.Sprintf("deployments/%s", deploymentID), DisplayName: deploymentID, + TargetName: bodyDeployment.TargetName, Status: tempdms.DeploymentStatusActive, CreatedBy: s.CurrentUser().UserName, CreateTime: &now, UpdateTime: &now, } - if createReq.Deployment != nil { - if createReq.Deployment.TargetName != "" { - deployment.TargetName = createReq.Deployment.TargetName - } - } state.deployments[deploymentID] = deployment return Response{Body: deployment} @@ -138,19 +138,23 @@ func (s *FakeWorkspace) DeploymentMetadataCreateVersion(req Request, deploymentI } } - var createReq tempdms.CreateVersionRequest - if err := json.Unmarshal(req.Body, &createReq); err != nil { + // version_id is a query parameter, not in the body. + versionID := req.URL.Query().Get("version_id") + if versionID == "" { return Response{ StatusCode: http.StatusBadRequest, - Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "version_id is required"}, } } - versionID := createReq.VersionID - if versionID == "" { - return Response{ - StatusCode: http.StatusBadRequest, - Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "version_id is required"}, + // The body maps to the Version sub-message. + var bodyVersion tempdms.Version + if len(req.Body) > 0 { + if err := json.Unmarshal(req.Body, &bodyVersion); err != nil { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + } } } @@ -200,10 +204,8 @@ func (s *FakeWorkspace) DeploymentMetadataCreateVersion(req Request, deploymentI CreateTime: &now, Status: tempdms.VersionStatusInProgress, } - if createReq.Version != nil { - version.CliVersion = createReq.Version.CliVersion - version.VersionType = createReq.Version.VersionType - } + version.CliVersion = bodyVersion.CliVersion + version.VersionType = bodyVersion.VersionType state.versions[versionKey] = version @@ -332,35 +334,37 @@ func (s *FakeWorkspace) DeploymentMetadataCreateOperation(req Request, deploymen state := s.deploymentMetadata - var createReq tempdms.CreateOperationRequest - if err := json.Unmarshal(req.Body, &createReq); err != nil { + // resource_key is a query parameter, not in the body. + resourceKey := req.URL.Query().Get("resource_key") + if resourceKey == "" { return Response{ StatusCode: http.StatusBadRequest, - Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "resource_key is required"}, } } - resourceKey := createReq.ResourceKey - if resourceKey == "" { - return Response{ - StatusCode: http.StatusBadRequest, - Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": "resource_key is required"}, + // The body maps to the Operation sub-message. + var bodyOperation tempdms.Operation + if len(req.Body) > 0 { + if err := json.Unmarshal(req.Body, &bodyOperation); err != nil { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"error_code": "INVALID_PARAMETER_VALUE", "message": fmt.Sprintf("invalid request: %s", err)}, + } } } now := time.Now().UTC() opKey := deploymentID + "/" + versionID + "/" + resourceKey operation := tempdms.Operation{ - Name: fmt.Sprintf("deployments/%s/versions/%s/operations/%s", deploymentID, versionID, resourceKey), - ResourceKey: resourceKey, - CreateTime: &now, - } - if createReq.Operation != nil { - operation.ActionType = createReq.Operation.ActionType - operation.State = createReq.Operation.State - operation.ResourceID = createReq.Operation.ResourceID - operation.Status = createReq.Operation.Status - operation.ErrorMessage = createReq.Operation.ErrorMessage + Name: fmt.Sprintf("deployments/%s/versions/%s/operations/%s", deploymentID, versionID, resourceKey), + ResourceKey: resourceKey, + CreateTime: &now, + ActionType: bodyOperation.ActionType, + State: bodyOperation.State, + ResourceID: bodyOperation.ResourceID, + Status: bodyOperation.Status, + ErrorMessage: bodyOperation.ErrorMessage, } state.operations[opKey] = operation @@ -368,14 +372,12 @@ func (s *FakeWorkspace) DeploymentMetadataCreateOperation(req Request, deploymen // Upsert the deployment-level resource. resKey := deploymentID + "/" + resourceKey resource := tempdms.Resource{ - Name: fmt.Sprintf("deployments/%s/resources/%s", deploymentID, resourceKey), - ResourceKey: resourceKey, - } - if createReq.Operation != nil { - resource.State = createReq.Operation.State - resource.ResourceID = createReq.Operation.ResourceID - resource.LastActionType = createReq.Operation.ActionType - resource.LastVersionID = versionID + Name: fmt.Sprintf("deployments/%s/resources/%s", deploymentID, resourceKey), + ResourceKey: resourceKey, + State: bodyOperation.State, + ResourceID: bodyOperation.ResourceID, + LastActionType: bodyOperation.ActionType, + LastVersionID: versionID, } state.resources[resKey] = resource From aa11d7c8fbde2d3a06ce3fc3ca2b009949f10fc7 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 31 Mar 2026 21:15:48 +0000 Subject: [PATCH 05/11] Fix remaining issues: enum naming, redundant param, add acceptance test - Rename VersionCompleteLeaseExpire to VersionCompleteLeaseExpired to match proto enum VERSION_COMPLETE_LEASE_EXPIRED. - Remove redundant "parent" query parameter from ListResources (the deployment ID is already in the URL path). - Add acceptance test for the deployment metadata service integration that validates the correct API call sequence during deploy and destroy. Co-authored-by: Isaac --- .../bundle/deploy/metadata-service/databricks.yml | 7 +++++++ acceptance/bundle/deploy/metadata-service/script | 15 +++++++++++++++ .../bundle/deploy/metadata-service/test.toml | 3 +++ libs/tempdms/api.go | 1 - libs/tempdms/types.go | 2 +- libs/testserver/deployment_metadata.go | 2 +- 6 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 acceptance/bundle/deploy/metadata-service/databricks.yml create mode 100644 acceptance/bundle/deploy/metadata-service/script create mode 100644 acceptance/bundle/deploy/metadata-service/test.toml diff --git a/acceptance/bundle/deploy/metadata-service/databricks.yml b/acceptance/bundle/deploy/metadata-service/databricks.yml new file mode 100644 index 0000000000..c21c8a9392 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: metadata-service-test + +resources: + jobs: + test_job: + name: test-job diff --git a/acceptance/bundle/deploy/metadata-service/script b/acceptance/bundle/deploy/metadata-service/script new file mode 100644 index 0000000000..3f2006ac8e --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/script @@ -0,0 +1,15 @@ +# Deploy with the metadata service enabled. Verify the correct API calls are +# made: CreateDeployment, CreateVersion, resource CRUD, CreateOperation, +# CompleteVersion. +trace $CLI bundle deploy +trace print_requests.py --keep --get //bundle | contains.py "POST" "deployments" "versions" "complete" + +# Verify deployment_id is sent as a query parameter (not in the body). +trace print_requests.py --keep --get //bundle/deployments "^//bundle/deployments/" | contains.py "POST" "deployment_id" + +# Verify version_id is sent as a query parameter. +trace print_requests.py --keep --get //versions "^//versions/" | contains.py "POST" "version_id" + +# Destroy with the metadata service enabled. +trace $CLI bundle destroy --auto-approve +trace print_requests.py --get //bundle | contains.py "POST" "deployments" "versions" "complete" diff --git a/acceptance/bundle/deploy/metadata-service/test.toml b/acceptance/bundle/deploy/metadata-service/test.toml new file mode 100644 index 0000000000..33c8f80dd4 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/test.toml @@ -0,0 +1,3 @@ +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] +EnvMatrix.DATABRICKS_BUNDLE_DEPLOYMENT_SERVICE = ["true"] +RecordRequests = true diff --git a/libs/tempdms/api.go b/libs/tempdms/api.go index 005e704923..005425fa68 100644 --- a/libs/tempdms/api.go +++ b/libs/tempdms/api.go @@ -123,7 +123,6 @@ func (a *DeploymentMetadataAPI) ListResources(ctx context.Context, request ListR path := fmt.Sprintf("%s/deployments/%s/resources", basePath, request.DeploymentID) q := map[string]any{ - "parent": fmt.Sprintf("deployments/%s", request.DeploymentID), "page_size": 1000, } if pageToken != "" { diff --git a/libs/tempdms/types.go b/libs/tempdms/types.go index a5d8c0df92..b36386b438 100644 --- a/libs/tempdms/types.go +++ b/libs/tempdms/types.go @@ -35,7 +35,7 @@ const ( VersionCompleteSuccess VersionComplete = 1 VersionCompleteFailure VersionComplete = 2 VersionCompleteForceAbort VersionComplete = 3 - VersionCompleteLeaseExpire VersionComplete = 4 + VersionCompleteLeaseExpired VersionComplete = 4 ) const ( diff --git a/libs/testserver/deployment_metadata.go b/libs/testserver/deployment_metadata.go index 48cf7f5caa..a1968c8638 100644 --- a/libs/testserver/deployment_metadata.go +++ b/libs/testserver/deployment_metadata.go @@ -319,7 +319,7 @@ func (s *FakeWorkspace) DeploymentMetadataCompleteVersion(req Request, deploymen switch completeReq.CompletionReason { case tempdms.VersionCompleteSuccess: deployment.Status = tempdms.DeploymentStatusActive - case tempdms.VersionCompleteFailure, tempdms.VersionCompleteForceAbort, tempdms.VersionCompleteLeaseExpire: + case tempdms.VersionCompleteFailure, tempdms.VersionCompleteForceAbort, tempdms.VersionCompleteLeaseExpired: deployment.Status = tempdms.DeploymentStatusFailed } deployment.UpdateTime = &now From c21aee6b6394ed399560c72aea6ab15082155c68 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 31 Mar 2026 21:21:21 +0000 Subject: [PATCH 06/11] Update acceptance test to print all metadata service requests Use print_requests.py to print all requests to /bundle endpoints at each stage (deploy and destroy) for clear visibility into the API call sequence. Co-authored-by: Isaac --- acceptance/bundle/deploy/metadata-service/script | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/acceptance/bundle/deploy/metadata-service/script b/acceptance/bundle/deploy/metadata-service/script index 3f2006ac8e..fa4f54bd57 100644 --- a/acceptance/bundle/deploy/metadata-service/script +++ b/acceptance/bundle/deploy/metadata-service/script @@ -1,15 +1,11 @@ -# Deploy with the metadata service enabled. Verify the correct API calls are -# made: CreateDeployment, CreateVersion, resource CRUD, CreateOperation, -# CompleteVersion. +# Deploy with the metadata service enabled. trace $CLI bundle deploy -trace print_requests.py --keep --get //bundle | contains.py "POST" "deployments" "versions" "complete" -# Verify deployment_id is sent as a query parameter (not in the body). -trace print_requests.py --keep --get //bundle/deployments "^//bundle/deployments/" | contains.py "POST" "deployment_id" - -# Verify version_id is sent as a query parameter. -trace print_requests.py --keep --get //versions "^//versions/" | contains.py "POST" "version_id" +# Print all metadata service requests made during deploy. +trace print_requests.py --get //bundle # Destroy with the metadata service enabled. trace $CLI bundle destroy --auto-approve -trace print_requests.py --get //bundle | contains.py "POST" "deployments" "versions" "complete" + +# Print all metadata service requests made during destroy. +trace print_requests.py --get //bundle From 1621bc26ff492a51b267a56f4ac4d1b8cd2c949f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 31 Mar 2026 21:44:40 +0000 Subject: [PATCH 07/11] Fix error masking and input validation from self-review - Rename libs/tempdms package to libs/tmpdms - Rename env var to DATABRICKS_BUNDLE_MANAGED_STATE - Use lineage from resources.json as deployment ID - Write _deployment_id file to state directory - Remove postApplyHook, add inline OperationReporter - Set heartbeat interval to 30 seconds Co-authored-by: Isaac --- .../bundle/deploy/metadata-service/test.toml | 2 +- bundle/direct/bundle_apply.go | 19 +++ bundle/direct/pkg.go | 9 ++ bundle/env/deployment_metadata.go | 14 +- bundle/phases/deploy.go | 37 ++--- bundle/phases/deploy_metadata.go | 143 +++++++++--------- bundle/phases/destroy.go | 12 +- bundle/phases/heartbeat.go | 8 +- libs/testserver/deployment_metadata.go | 64 ++++---- libs/{tempdms => tmpdms}/api.go | 2 +- libs/{tempdms => tmpdms}/types.go | 4 +- 11 files changed, 168 insertions(+), 146 deletions(-) rename libs/{tempdms => tmpdms}/api.go (99%) rename libs/{tempdms => tmpdms}/types.go (98%) diff --git a/acceptance/bundle/deploy/metadata-service/test.toml b/acceptance/bundle/deploy/metadata-service/test.toml index 33c8f80dd4..4cebdfc83a 100644 --- a/acceptance/bundle/deploy/metadata-service/test.toml +++ b/acceptance/bundle/deploy/metadata-service/test.toml @@ -1,3 +1,3 @@ EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] -EnvMatrix.DATABRICKS_BUNDLE_DEPLOYMENT_SERVICE = ["true"] +EnvMatrix.DATABRICKS_BUNDLE_MANAGED_STATE = ["true"] RecordRequests = true diff --git a/bundle/direct/bundle_apply.go b/bundle/direct/bundle_apply.go index ea3f615f7f..68b5672257 100644 --- a/bundle/direct/bundle_apply.go +++ b/bundle/direct/bundle_apply.go @@ -84,11 +84,24 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa logdiag.LogError(ctx, fmt.Errorf("%s: Unexpected delete action during migration", errorPrefix)) return false } + + // Capture the resource ID before deletion for operation reporting. + var deleteResourceID string + if b.OperationReporter != nil { + if dbentry, ok := b.StateDB.GetResourceEntry(resourceKey); ok { + deleteResourceID = dbentry.ID + } + } + err = d.Destroy(ctx, &b.StateDB) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: %w", errorPrefix, err)) return false } + + if b.OperationReporter != nil { + b.OperationReporter(ctx, resourceKey, deleteResourceID, action) + } return true } @@ -128,6 +141,12 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa logdiag.LogError(ctx, fmt.Errorf("%s: %w", errorPrefix, err)) return false } + + // Report the operation inline to the metadata service. + if b.OperationReporter != nil && !migrateMode { + dbentry, _ := b.StateDB.GetResourceEntry(resourceKey) + b.OperationReporter(ctx, resourceKey, dbentry.ID, action) + } } // TODO: Note, we only really need remote state if there are remote references. diff --git a/bundle/direct/pkg.go b/bundle/direct/pkg.go index 74e72e79b0..7932c040eb 100644 --- a/bundle/direct/pkg.go +++ b/bundle/direct/pkg.go @@ -37,6 +37,11 @@ type DeploymentUnit struct { DependsOn []deployplan.DependsOnEntry } +// OperationReporter is called after each successful resource operation to report +// it to the deployment metadata service. It is best-effort: failures are logged +// as warnings by the caller. +type OperationReporter func(ctx context.Context, resourceKey string, resourceID string, action deployplan.ActionType) + // DeploymentBundle holds everything needed to deploy a bundle type DeploymentBundle struct { StateDB dstate.DeploymentState @@ -44,6 +49,10 @@ type DeploymentBundle struct { Plan *deployplan.Plan RemoteStateCache sync.Map StateCache structvar.Cache + + // OperationReporter, when set, is called inline after each successful + // resource Create/Update/Delete to report the operation to the metadata service. + OperationReporter OperationReporter } // SetRemoteState updates the remote state with type validation and marks as fresh. diff --git a/bundle/env/deployment_metadata.go b/bundle/env/deployment_metadata.go index 60e896c045..a4d08c7cd0 100644 --- a/bundle/env/deployment_metadata.go +++ b/bundle/env/deployment_metadata.go @@ -2,14 +2,14 @@ package env import "context" -// deploymentServiceVariable names the environment variable that controls whether the -// deployment metadata service is used for locking and resource state management. -const deploymentServiceVariable = "DATABRICKS_BUNDLE_DEPLOYMENT_SERVICE" +// managedStateVariable names the environment variable that controls whether +// server-managed state is used for locking and resource state management. +const managedStateVariable = "DATABRICKS_BUNDLE_MANAGED_STATE" -// DeploymentService returns the environment variable that controls whether the -// deployment metadata service is used for locking and resource state management. -func DeploymentService(ctx context.Context) (string, bool) { +// ManagedState returns the environment variable that controls whether +// server-managed state is used for locking and resource state management. +func ManagedState(ctx context.Context) (string, bool) { return get(ctx, []string{ - deploymentServiceVariable, + managedStateVariable, }) } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 5ca8745f06..9c067bcff4 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -26,7 +26,7 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" "github.com/databricks/cli/libs/sync" - "github.com/databricks/cli/libs/tempdms" + "github.com/databricks/cli/libs/tmpdms" ) func approvalForDeploy(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan) (bool, error) { @@ -100,11 +100,7 @@ func approvalForDeploy(ctx context.Context, b *bundle.Bundle, plan *deployplan.P return approved, nil } -// postApplyHook is called after the deployment plan is applied (terraform/direct Apply). -// It can be used for additional state reporting (e.g. to the metadata service). -type postApplyHook func(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan) - -func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, targetEngine engine.EngineType, hook postApplyHook) { +func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, targetEngine engine.EngineType) { cmdio.LogString(ctx, "Deploying resources...") if targetEngine.IsDirect() { @@ -116,11 +112,6 @@ func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, ta // Even if deployment failed, there might be updates in states that we need to upload. statemgmt.PushResourcesState(ctx, b, targetEngine) - // Run any additional post-apply logic (e.g. metadata service operation reporting). - if hook != nil { - hook(ctx, b, plan) - } - if logdiag.HasError(ctx) { return } @@ -150,7 +141,7 @@ func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]li // The deploy phase deploys artifacts and resources. // If readPlanPath is provided, the plan is loaded from that file instead of being calculated. func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler, targetEngine engine.EngineType, libs map[string][]libraries.LocationToUpdate, plan *deployplan.Plan) { - useMetadataService, _ := env.DeploymentService(ctx) + useMetadataService, _ := env.ManagedState(ctx) if useMetadataService == "true" { log.Info(ctx, "Phase: deploy (with metadata service)") @@ -164,20 +155,16 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand } // Acquire the deployment lock. - var svc *tempdms.DeploymentMetadataAPI - var deploymentID, versionID string var failed bool if useMetadataService == "true" { - var err error - svc, err = tempdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) + svc, err := tmpdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", err)) return } - var cleanup func(failed bool) - deploymentID, versionID, cleanup, err = deployMetadataLock(ctx, b, svc, tempdms.VersionTypeDeploy) + deploymentID, versionID, cleanup, err := deployMetadataLock(ctx, b, svc, tmpdms.VersionTypeDeploy) if err != nil { logdiag.LogError(ctx, err) return @@ -185,6 +172,10 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand defer func() { cleanup(failed || logdiag.HasError(ctx)) }() + + if targetEngine.IsDirect() { + b.DeploymentBundle.OperationReporter = makeOperationReporter(svc, deploymentID, versionID) + } } else { bundle.ApplyContext(ctx, b, lock.Acquire()) if logdiag.HasError(ctx) { @@ -242,15 +233,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand return } - // Build the post-apply hook for metadata service reporting (nil for file-based). - var hook postApplyHook - if useMetadataService == "true" { - hook = func(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan) { - reportOperations(ctx, svc, deploymentID, versionID, plan) - } - } - - deployCore(ctx, b, plan, targetEngine, hook) + deployCore(ctx, b, plan, targetEngine) if logdiag.HasError(ctx) { failed = true return diff --git a/bundle/phases/deploy_metadata.go b/bundle/phases/deploy_metadata.go index 40d1d7d620..85d7290805 100644 --- a/bundle/phases/deploy_metadata.go +++ b/bundle/phases/deploy_metadata.go @@ -5,15 +5,18 @@ import ( "errors" "fmt" "net/http" + "os" + "path/filepath" + "strconv" "time" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deployplan" + "github.com/databricks/cli/bundle/direct" + "github.com/databricks/cli/bundle/direct/dstate" "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/logdiag" - "github.com/databricks/cli/libs/tempdms" + "github.com/databricks/cli/libs/tmpdms" "github.com/databricks/databricks-sdk-go/apierr" "github.com/google/uuid" ) @@ -23,23 +26,33 @@ import ( // // It returns a cleanup function that must be deferred by the caller to release // the lock and stop the heartbeat, as well as any error from acquiring the lock. -func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tempdms.DeploymentMetadataAPI, versionType tempdms.VersionType) (deploymentID, versionID string, cleanup func(failed bool), err error) { - // Load local deployment state to get the deployment ID and sequence number. - state, loadErr := deploy.LoadState(ctx, b) - if loadErr != nil { - return "", "", nil, fmt.Errorf("failed to load deployment state: %w", loadErr) +func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tmpdms.DeploymentMetadataAPI, versionType tmpdms.VersionType) (deploymentID, versionID string, cleanup func(failed bool), err error) { + // Read the lineage from resources.json (direct engine state) for the deployment ID. + _, localPath := b.StateFilenameDirect(ctx) + var stateDB dstate.DeploymentState + if openErr := stateDB.Open(localPath); openErr != nil { + return "", "", nil, fmt.Errorf("failed to open resources state: %w", openErr) } - // Generate a deployment ID if one doesn't exist yet. - if state.ID == uuid.Nil { - state.ID = uuid.New() + deploymentID = stateDB.Data.Lineage + if deploymentID == "" { + deploymentID = uuid.New().String() + } + + // Write the deployment ID to _deployment_id for external tooling. + stateDir := filepath.Dir(localPath) + if mkdirErr := os.MkdirAll(stateDir, 0o755); mkdirErr != nil { + return "", "", nil, fmt.Errorf("failed to create state directory: %w", mkdirErr) + } + deploymentIDPath := filepath.Join(stateDir, "_deployment_id") + if writeErr := os.WriteFile(deploymentIDPath, []byte(deploymentID), 0o600); writeErr != nil { + return "", "", nil, fmt.Errorf("failed to write deployment ID: %w", writeErr) } - deploymentID = state.ID.String() // Ensure the deployment exists in the metadata service. - _, createErr := svc.CreateDeployment(ctx, tempdms.CreateDeploymentRequest{ + _, createErr := svc.CreateDeployment(ctx, tmpdms.CreateDeploymentRequest{ DeploymentID: deploymentID, - Deployment: &tempdms.Deployment{ + Deployment: &tmpdms.Deployment{ TargetName: b.Config.Bundle.Target, }, }) @@ -47,13 +60,30 @@ func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tempdms.Depl return "", "", nil, fmt.Errorf("failed to create deployment: %w", createErr) } + // Get the deployment to determine the next version ID. + dep, getErr := svc.GetDeployment(ctx, tmpdms.GetDeploymentRequest{ + DeploymentID: deploymentID, + }) + if getErr != nil { + return "", "", nil, fmt.Errorf("failed to get deployment: %w", getErr) + } + + if dep.LastVersionID == "" { + versionID = "1" + } else { + lastVersion, parseErr := strconv.ParseInt(dep.LastVersionID, 10, 64) + if parseErr != nil { + return "", "", nil, fmt.Errorf("failed to parse last_version_id %q: %w", dep.LastVersionID, parseErr) + } + versionID = strconv.FormatInt(lastVersion+1, 10) + } + // Create a version to acquire the deployment lock. - versionID = fmt.Sprintf("%d", state.Seq+1) - version, versionErr := svc.CreateVersion(ctx, tempdms.CreateVersionRequest{ + version, versionErr := svc.CreateVersion(ctx, tmpdms.CreateVersionRequest{ DeploymentID: deploymentID, Parent: fmt.Sprintf("deployments/%s", deploymentID), VersionID: versionID, - Version: &tempdms.Version{ + Version: &tmpdms.Version{ CliVersion: build.GetInfo().Version, VersionType: versionType, TargetName: b.Config.Bundle.Target, @@ -71,9 +101,9 @@ func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tempdms.Depl cleanup = func(failed bool) { stopHeartbeat() - reason := tempdms.VersionCompleteSuccess + reason := tmpdms.VersionCompleteSuccess if failed { - reason = tempdms.VersionCompleteFailure + reason = tmpdms.VersionCompleteFailure } // Use a separate context for cleanup so the lock is released even if the @@ -81,7 +111,7 @@ func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tempdms.Depl cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - _, completeErr := svc.CompleteVersion(cleanupCtx, tempdms.CompleteVersionRequest{ + _, completeErr := svc.CompleteVersion(cleanupCtx, tmpdms.CompleteVersionRequest{ DeploymentID: deploymentID, VersionID: versionID, Name: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), @@ -97,48 +127,40 @@ func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tempdms.Depl return deploymentID, versionID, cleanup, nil } -// reportOperations reports each resource operation to the metadata service. -// This is best-effort: failures are logged as warnings, not fatal errors. -func reportOperations(ctx context.Context, svc *tempdms.DeploymentMetadataAPI, deploymentID, versionID string, plan *deployplan.Plan) { - if plan == nil { - return - } - - // Fetch existing resources to determine if this is the first time we're - // tracking each resource in the metadata service. - knownResources := map[string]bool{} - existing, err := svc.ListResources(ctx, tempdms.ListResourcesRequest{ - DeploymentID: deploymentID, - Parent: fmt.Sprintf("deployments/%s", deploymentID), - }) - if err != nil { - log.Warnf(ctx, "Failed to list existing resources from metadata service, will use INITIAL_REGISTER for all: %v", err) - } else { - for _, r := range existing { - knownResources[r.ResourceKey] = true - } +// planActionToOperationAction maps a deploy plan action to a metadata service operation action type. +func planActionToOperationAction(action deployplan.ActionType) tmpdms.OperationActionType { + switch action { + case deployplan.Create: + return tmpdms.OperationActionTypeCreate + case deployplan.Update: + return tmpdms.OperationActionTypeUpdate + case deployplan.Delete: + return tmpdms.OperationActionTypeDelete + case deployplan.Recreate: + return tmpdms.OperationActionTypeRecreate + default: + return tmpdms.OperationActionTypeUnspecified } +} - for resourceKey, entry := range plan.Plan { - var actionType tempdms.OperationActionType - if knownResources[resourceKey] { - actionType = planActionToOperationAction(entry.Action) - } else { - actionType = tempdms.OperationActionTypeInitRegister - } - - if actionType == tempdms.OperationActionTypeUnspecified { - continue +// makeOperationReporter returns an OperationReporter that reports each resource +// operation to the metadata service. Failures are logged as warnings. +func makeOperationReporter(svc *tmpdms.DeploymentMetadataAPI, deploymentID, versionID string) direct.OperationReporter { + return func(ctx context.Context, resourceKey string, resourceID string, action deployplan.ActionType) { + actionType := planActionToOperationAction(action) + if actionType == tmpdms.OperationActionTypeUnspecified { + return } - _, err := svc.CreateOperation(ctx, tempdms.CreateOperationRequest{ + _, err := svc.CreateOperation(ctx, tmpdms.CreateOperationRequest{ DeploymentID: deploymentID, VersionID: versionID, Parent: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), ResourceKey: resourceKey, - Operation: &tempdms.Operation{ + Operation: &tmpdms.Operation{ ResourceKey: resourceKey, - Status: tempdms.OperationStatusSucceeded, + ResourceID: resourceID, + Status: tmpdms.OperationStatusSucceeded, ActionType: actionType, }, }) @@ -148,21 +170,6 @@ func reportOperations(ctx context.Context, svc *tempdms.DeploymentMetadataAPI, d } } -func planActionToOperationAction(action deployplan.ActionType) tempdms.OperationActionType { - switch action { - case deployplan.Create: - return tempdms.OperationActionTypeCreate - case deployplan.Update: - return tempdms.OperationActionTypeUpdate - case deployplan.Delete: - return tempdms.OperationActionTypeDelete - case deployplan.Recreate: - return tempdms.OperationActionTypeRecreate - default: - return tempdms.OperationActionTypeUnspecified - } -} - // isAlreadyExists checks if an error indicates the resource already exists (HTTP 409). func isAlreadyExists(err error) bool { var apiErr *apierr.APIError diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 81e52a3445..320b8e75dd 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -18,7 +18,7 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" - "github.com/databricks/cli/libs/tempdms" + "github.com/databricks/cli/libs/tmpdms" "github.com/databricks/databricks-sdk-go/apierr" ) @@ -117,7 +117,7 @@ func destroyCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, t // The destroy phase deletes artifacts and resources. func Destroy(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineType) { - useMetadataService, _ := env.DeploymentService(ctx) + useMetadataService, _ := env.ManagedState(ctx) if useMetadataService == "true" { log.Info(ctx, "Phase: destroy (with metadata service)") @@ -139,13 +139,13 @@ func Destroy(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineTy var failed bool if useMetadataService == "true" { - svc, svcErr := tempdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) + svc, svcErr := tmpdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) if svcErr != nil { logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", svcErr)) return } - _, _, cleanup, lockErr := deployMetadataLock(ctx, b, svc, tempdms.VersionTypeDestroy) + deploymentID, versionID, cleanup, lockErr := deployMetadataLock(ctx, b, svc, tmpdms.VersionTypeDestroy) if lockErr != nil { logdiag.LogError(ctx, lockErr) return @@ -153,6 +153,10 @@ func Destroy(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineTy defer func() { cleanup(failed || logdiag.HasError(ctx)) }() + + if targetEngine.IsDirect() { + b.DeploymentBundle.OperationReporter = makeOperationReporter(svc, deploymentID, versionID) + } } else { bundle.ApplyContext(ctx, b, lock.Acquire()) if logdiag.HasError(ctx) { diff --git a/bundle/phases/heartbeat.go b/bundle/phases/heartbeat.go index 1f9b3d41d1..925c53193c 100644 --- a/bundle/phases/heartbeat.go +++ b/bundle/phases/heartbeat.go @@ -5,14 +5,14 @@ import ( "time" "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/tempdms" + "github.com/databricks/cli/libs/tmpdms" ) -const defaultHeartbeatInterval = 2 * time.Minute +const defaultHeartbeatInterval = 30 * time.Second // startHeartbeat starts a background goroutine that sends heartbeats to keep // the deployment lock alive. Returns a cancel function to stop the heartbeat. -func startHeartbeat(ctx context.Context, svc *tempdms.DeploymentMetadataAPI, deploymentID, versionID string, interval time.Duration) context.CancelFunc { +func startHeartbeat(ctx context.Context, svc *tmpdms.DeploymentMetadataAPI, deploymentID, versionID string, interval time.Duration) context.CancelFunc { ctx, cancel := context.WithCancel(ctx) go func() { @@ -24,7 +24,7 @@ func startHeartbeat(ctx context.Context, svc *tempdms.DeploymentMetadataAPI, dep case <-ctx.Done(): return case <-ticker.C: - _, err := svc.Heartbeat(ctx, tempdms.HeartbeatRequest{ + _, err := svc.Heartbeat(ctx, tmpdms.HeartbeatRequest{ DeploymentID: deploymentID, VersionID: versionID, }) diff --git a/libs/testserver/deployment_metadata.go b/libs/testserver/deployment_metadata.go index a1968c8638..eaed985156 100644 --- a/libs/testserver/deployment_metadata.go +++ b/libs/testserver/deployment_metadata.go @@ -8,23 +8,23 @@ import ( "strings" "time" - "github.com/databricks/cli/libs/tempdms" + "github.com/databricks/cli/libs/tmpdms" ) // deploymentMetadataState holds in-memory state for the deployment metadata service. // Stored per-workspace inside FakeWorkspace. type deploymentMetadataState struct { // deployments keyed by deployment_id - deployments map[string]tempdms.Deployment + deployments map[string]tmpdms.Deployment // versions keyed by "deploymentId/versionId" - versions map[string]tempdms.Version + versions map[string]tmpdms.Version // operations keyed by "deploymentId/versionId/resourceKey" - operations map[string]tempdms.Operation + operations map[string]tmpdms.Operation // resources keyed by "deploymentId/resourceKey" - resources map[string]tempdms.Resource + resources map[string]tmpdms.Resource // lock state per deployment: which version holds the lock and when it expires lockHolder map[string]string // deploymentId -> "deployments/{id}/versions/{vid}" @@ -33,10 +33,10 @@ type deploymentMetadataState struct { func newDeploymentMetadataState() *deploymentMetadataState { return &deploymentMetadataState{ - deployments: map[string]tempdms.Deployment{}, - versions: map[string]tempdms.Version{}, - operations: map[string]tempdms.Operation{}, - resources: map[string]tempdms.Resource{}, + deployments: map[string]tmpdms.Deployment{}, + versions: map[string]tmpdms.Version{}, + operations: map[string]tmpdms.Operation{}, + resources: map[string]tmpdms.Resource{}, lockHolder: map[string]string{}, lockExpiry: map[string]time.Time{}, } @@ -57,7 +57,7 @@ func (s *FakeWorkspace) DeploymentMetadataCreateDeployment(req Request) Response } // The body maps to the Deployment sub-message. - var bodyDeployment tempdms.Deployment + var bodyDeployment tmpdms.Deployment if len(req.Body) > 0 { if err := json.Unmarshal(req.Body, &bodyDeployment); err != nil { return Response{ @@ -76,11 +76,11 @@ func (s *FakeWorkspace) DeploymentMetadataCreateDeployment(req Request) Response } now := time.Now().UTC() - deployment := tempdms.Deployment{ + deployment := tmpdms.Deployment{ Name: fmt.Sprintf("deployments/%s", deploymentID), DisplayName: deploymentID, TargetName: bodyDeployment.TargetName, - Status: tempdms.DeploymentStatusActive, + Status: tmpdms.DeploymentStatusActive, CreatedBy: s.CurrentUser().UserName, CreateTime: &now, UpdateTime: &now, @@ -117,7 +117,7 @@ func (s *FakeWorkspace) DeploymentMetadataDeleteDeployment(deploymentID string) } now := time.Now().UTC() - deployment.Status = tempdms.DeploymentStatusDeleted + deployment.Status = tmpdms.DeploymentStatusDeleted deployment.DestroyTime = &now deployment.DestroyedBy = s.CurrentUser().UserName deployment.UpdateTime = &now @@ -148,7 +148,7 @@ func (s *FakeWorkspace) DeploymentMetadataCreateVersion(req Request, deploymentI } // The body maps to the Version sub-message. - var bodyVersion tempdms.Version + var bodyVersion tmpdms.Version if len(req.Body) > 0 { if err := json.Unmarshal(req.Body, &bodyVersion); err != nil { return Response{ @@ -197,12 +197,12 @@ func (s *FakeWorkspace) DeploymentMetadataCreateVersion(req Request, deploymentI } versionKey := deploymentID + "/" + versionID - version := tempdms.Version{ + version := tmpdms.Version{ Name: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), VersionID: versionID, CreatedBy: s.CurrentUser().UserName, CreateTime: &now, - Status: tempdms.VersionStatusInProgress, + Status: tmpdms.VersionStatusInProgress, } version.CliVersion = bodyVersion.CliVersion version.VersionType = bodyVersion.VersionType @@ -216,7 +216,7 @@ func (s *FakeWorkspace) DeploymentMetadataCreateVersion(req Request, deploymentI // Update the deployment's last_version_id and status. deployment.LastVersionID = versionID - deployment.Status = tempdms.DeploymentStatusInProgress + deployment.Status = tmpdms.DeploymentStatusInProgress deployment.UpdateTime = &now state.deployments[deploymentID] = deployment @@ -251,7 +251,7 @@ func (s *FakeWorkspace) DeploymentMetadataHeartbeat(req Request, deploymentID, v } } - if version.Status != tempdms.VersionStatusInProgress { + if version.Status != tmpdms.VersionStatusInProgress { return Response{ StatusCode: http.StatusConflict, Body: map[string]string{"error_code": "ABORTED", "message": "version is no longer in progress"}, @@ -272,7 +272,7 @@ func (s *FakeWorkspace) DeploymentMetadataHeartbeat(req Request, deploymentID, v newExpiry := now.Add(lockDuration) state.lockExpiry[deploymentID] = newExpiry - return Response{Body: tempdms.HeartbeatResponse{ExpireTime: &newExpiry}} + return Response{Body: tmpdms.HeartbeatResponse{ExpireTime: &newExpiry}} } func (s *FakeWorkspace) DeploymentMetadataCompleteVersion(req Request, deploymentID, versionID string) Response { @@ -288,14 +288,14 @@ func (s *FakeWorkspace) DeploymentMetadataCompleteVersion(req Request, deploymen } } - if version.Status != tempdms.VersionStatusInProgress { + if version.Status != tmpdms.VersionStatusInProgress { return Response{ StatusCode: http.StatusConflict, Body: map[string]string{"error_code": "ABORTED", "message": "version is already completed"}, } } - var completeReq tempdms.CompleteVersionRequest + var completeReq tmpdms.CompleteVersionRequest if err := json.Unmarshal(req.Body, &completeReq); err != nil { return Response{ StatusCode: http.StatusBadRequest, @@ -304,7 +304,7 @@ func (s *FakeWorkspace) DeploymentMetadataCompleteVersion(req Request, deploymen } now := time.Now().UTC() - version.Status = tempdms.VersionStatusCompleted + version.Status = tmpdms.VersionStatusCompleted version.CompleteTime = &now version.CompletionReason = completeReq.CompletionReason version.CompletedBy = s.CurrentUser().UserName @@ -317,10 +317,10 @@ func (s *FakeWorkspace) DeploymentMetadataCompleteVersion(req Request, deploymen // Update deployment status based on completion reason. if deployment, ok := state.deployments[deploymentID]; ok { switch completeReq.CompletionReason { - case tempdms.VersionCompleteSuccess: - deployment.Status = tempdms.DeploymentStatusActive - case tempdms.VersionCompleteFailure, tempdms.VersionCompleteForceAbort, tempdms.VersionCompleteLeaseExpired: - deployment.Status = tempdms.DeploymentStatusFailed + case tmpdms.VersionCompleteSuccess: + deployment.Status = tmpdms.DeploymentStatusActive + case tmpdms.VersionCompleteFailure, tmpdms.VersionCompleteForceAbort, tmpdms.VersionCompleteLeaseExpired: + deployment.Status = tmpdms.DeploymentStatusFailed } deployment.UpdateTime = &now state.deployments[deploymentID] = deployment @@ -344,7 +344,7 @@ func (s *FakeWorkspace) DeploymentMetadataCreateOperation(req Request, deploymen } // The body maps to the Operation sub-message. - var bodyOperation tempdms.Operation + var bodyOperation tmpdms.Operation if len(req.Body) > 0 { if err := json.Unmarshal(req.Body, &bodyOperation); err != nil { return Response{ @@ -356,7 +356,7 @@ func (s *FakeWorkspace) DeploymentMetadataCreateOperation(req Request, deploymen now := time.Now().UTC() opKey := deploymentID + "/" + versionID + "/" + resourceKey - operation := tempdms.Operation{ + operation := tmpdms.Operation{ Name: fmt.Sprintf("deployments/%s/versions/%s/operations/%s", deploymentID, versionID, resourceKey), ResourceKey: resourceKey, CreateTime: &now, @@ -371,7 +371,7 @@ func (s *FakeWorkspace) DeploymentMetadataCreateOperation(req Request, deploymen // Upsert the deployment-level resource. resKey := deploymentID + "/" + resourceKey - resource := tempdms.Resource{ + resource := tmpdms.Resource{ Name: fmt.Sprintf("deployments/%s/resources/%s", deploymentID, resourceKey), ResourceKey: resourceKey, State: bodyOperation.State, @@ -389,14 +389,14 @@ func (s *FakeWorkspace) DeploymentMetadataListResources(deploymentID string) Res state := s.deploymentMetadata prefix := deploymentID + "/" - var resources []tempdms.Resource + var resources []tmpdms.Resource for key, resource := range state.resources { if strings.HasPrefix(key, prefix) { resources = append(resources, resource) } } if resources == nil { - resources = []tempdms.Resource{} + resources = []tmpdms.Resource{} } - return Response{Body: tempdms.ListResourcesResponse{Resources: resources}} + return Response{Body: tmpdms.ListResourcesResponse{Resources: resources}} } diff --git a/libs/tempdms/api.go b/libs/tmpdms/api.go similarity index 99% rename from libs/tempdms/api.go rename to libs/tmpdms/api.go index 005425fa68..a2bbd5857f 100644 --- a/libs/tempdms/api.go +++ b/libs/tmpdms/api.go @@ -1,4 +1,4 @@ -package tempdms +package tmpdms import ( "context" diff --git a/libs/tempdms/types.go b/libs/tmpdms/types.go similarity index 98% rename from libs/tempdms/types.go rename to libs/tmpdms/types.go index b36386b438..ba1ca723c2 100644 --- a/libs/tempdms/types.go +++ b/libs/tmpdms/types.go @@ -1,8 +1,8 @@ -// Package tempdms is a temporary client library for the Deployment Metadata Service. +// Package tmpdms is a temporary client library for the Deployment Metadata Service. // It mirrors the structure that the Databricks Go SDK will eventually generate from // the service's proto definitions. When the protos land in the SDK, migration should // be a straightforward import path change. -package tempdms +package tmpdms import "time" From b1a9a0a3f05eb70d5125ead3ad937fd6196ef6d0 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 31 Mar 2026 22:24:48 +0000 Subject: [PATCH 08/11] Add acceptance test golden files and fix SDK compatibility Fix map[string]string -> map[string]any in tmpdms API client for SDK v0.126.0 compatibility. Generate golden files for metadata-service acceptance test showing the full deploy/destroy request flow. Co-authored-by: Isaac --- .../deploy/metadata-service/out.test.toml | 6 + .../bundle/deploy/metadata-service/output.txt | 113 ++++++++++++++++++ libs/tmpdms/api.go | 6 +- 3 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 acceptance/bundle/deploy/metadata-service/out.test.toml create mode 100644 acceptance/bundle/deploy/metadata-service/output.txt diff --git a/acceptance/bundle/deploy/metadata-service/out.test.toml b/acceptance/bundle/deploy/metadata-service/out.test.toml new file mode 100644 index 0000000000..6ce208a048 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/out.test.toml @@ -0,0 +1,6 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["direct"] + DATABRICKS_BUNDLE_MANAGED_STATE = ["true"] diff --git a/acceptance/bundle/deploy/metadata-service/output.txt b/acceptance/bundle/deploy/metadata-service/output.txt new file mode 100644 index 0000000000..e988794ab6 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/output.txt @@ -0,0 +1,113 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/metadata-service-test/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> print_requests.py --get //bundle +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments", + "q": { + "deployment_id": "[UUID]" + }, + "body": { + "target_name": "default" + } +} +{ + "method": "GET", + "path": "/api/2.0/bundle/deployments/[UUID]" +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions", + "q": { + "version_id": "1" + }, + "body": { + "cli_version": "[DEV_VERSION]", + "version_type": 1, + "target_name": "default" + } +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/operations", + "q": { + "resource_key": "resources.jobs.test_job" + }, + "body": { + "resource_key": "resources.jobs.test_job", + "action_type": 4, + "resource_id": "[NUMID]", + "status": 1 + } +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/complete", + "body": { + "name": "deployments/[UUID]/versions/1", + "completion_reason": 1 + } +} + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.test_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/metadata-service-test/default + +Deleting files... +Destroy complete! + +>>> print_requests.py --get //bundle +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments", + "q": { + "deployment_id": "[UUID]" + }, + "body": { + "target_name": "default" + } +} +{ + "method": "GET", + "path": "/api/2.0/bundle/deployments/[UUID]" +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions", + "q": { + "version_id": "1" + }, + "body": { + "cli_version": "[DEV_VERSION]", + "version_type": 2, + "target_name": "default" + } +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/operations", + "q": { + "resource_key": "resources.jobs.test_job" + }, + "body": { + "resource_key": "resources.jobs.test_job", + "action_type": 6, + "resource_id": "[NUMID]", + "status": 1 + } +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/complete", + "body": { + "name": "deployments/[UUID]/versions/1", + "completion_reason": 1 + } +} diff --git a/libs/tmpdms/api.go b/libs/tmpdms/api.go index a2bbd5857f..3f6fcf1957 100644 --- a/libs/tmpdms/api.go +++ b/libs/tmpdms/api.go @@ -34,7 +34,7 @@ func NewDeploymentMetadataAPI(w *databricks.WorkspaceClient) (*DeploymentMetadat func (a *DeploymentMetadataAPI) CreateDeployment(ctx context.Context, request CreateDeploymentRequest) (*Deployment, error) { var resp Deployment path := fmt.Sprintf("%s/deployments", basePath) - query := map[string]string{"deployment_id": request.DeploymentID} + query := map[string]any{"deployment_id": request.DeploymentID} err := a.api.Do(ctx, http.MethodPost, path, nil, query, request.Deployment, &resp) if err != nil { return nil, mapError("create deployment", err) @@ -65,7 +65,7 @@ func (a *DeploymentMetadataAPI) DeleteDeployment(ctx context.Context, request De func (a *DeploymentMetadataAPI) CreateVersion(ctx context.Context, request CreateVersionRequest) (*Version, error) { var resp Version path := fmt.Sprintf("%s/deployments/%s/versions", basePath, request.DeploymentID) - query := map[string]string{"version_id": request.VersionID} + query := map[string]any{"version_id": request.VersionID} err := a.api.Do(ctx, http.MethodPost, path, nil, query, request.Version, &resp) if err != nil { return nil, mapError("create version", err) @@ -106,7 +106,7 @@ func (a *DeploymentMetadataAPI) CompleteVersion(ctx context.Context, request Com func (a *DeploymentMetadataAPI) CreateOperation(ctx context.Context, request CreateOperationRequest) (*Operation, error) { var resp Operation path := fmt.Sprintf("%s/deployments/%s/versions/%s/operations", basePath, request.DeploymentID, request.VersionID) - query := map[string]string{"resource_key": request.ResourceKey} + query := map[string]any{"resource_key": request.ResourceKey} err := a.api.Do(ctx, http.MethodPost, path, nil, query, request.Operation, &resp) if err != nil { return nil, mapError("create operation", err) From 8756548c98fb5af628072d09ceeb58d588f1d2c5 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 31 Mar 2026 23:11:36 +0000 Subject: [PATCH 09/11] Use string enums, report failed operations, and refactor lock acquisition - Change all enum types from int to string using proto enum name strings (e.g. "OPERATION_ACTION_TYPE_CREATE" instead of 4), matching proto-over-HTTP serialization format. - Report failed operations to the metadata service with error messages, not just successful ones. - Enforce direct deployment engine for managed state (early return). - Extract acquireMetadataLock helper to deduplicate deploy/destroy lock blocks. - Add deploy-error acceptance test verifying failed operation reporting. Co-authored-by: Isaac --- .../deploy-error/databricks.yml | 7 + .../deploy-error/out.test.toml | 6 + .../metadata-service/deploy-error/output.txt | 61 +++++++++ .../metadata-service/deploy-error/script | 5 + .../metadata-service/deploy-error/test.toml | 8 ++ .../bundle/deploy/metadata-service/output.txt | 16 +-- bundle/direct/bundle_apply.go | 22 ++-- bundle/direct/pkg.go | 9 +- bundle/phases/deploy.go | 14 +- bundle/phases/deploy_metadata.go | 41 +++++- bundle/phases/destroy.go | 12 +- libs/tmpdms/types.go | 121 +++++++++--------- 12 files changed, 213 insertions(+), 109 deletions(-) create mode 100644 acceptance/bundle/deploy/metadata-service/deploy-error/databricks.yml create mode 100644 acceptance/bundle/deploy/metadata-service/deploy-error/out.test.toml create mode 100644 acceptance/bundle/deploy/metadata-service/deploy-error/output.txt create mode 100644 acceptance/bundle/deploy/metadata-service/deploy-error/script create mode 100644 acceptance/bundle/deploy/metadata-service/deploy-error/test.toml diff --git a/acceptance/bundle/deploy/metadata-service/deploy-error/databricks.yml b/acceptance/bundle/deploy/metadata-service/deploy-error/databricks.yml new file mode 100644 index 0000000000..4786eeddf7 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/deploy-error/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: metadata-service-error-test + +resources: + jobs: + test_job: + name: test-job diff --git a/acceptance/bundle/deploy/metadata-service/deploy-error/out.test.toml b/acceptance/bundle/deploy/metadata-service/deploy-error/out.test.toml new file mode 100644 index 0000000000..6ce208a048 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/deploy-error/out.test.toml @@ -0,0 +1,6 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["direct"] + DATABRICKS_BUNDLE_MANAGED_STATE = ["true"] diff --git a/acceptance/bundle/deploy/metadata-service/deploy-error/output.txt b/acceptance/bundle/deploy/metadata-service/deploy-error/output.txt new file mode 100644 index 0000000000..db0a3d43e1 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/deploy-error/output.txt @@ -0,0 +1,61 @@ + +>>> musterr [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/metadata-service-error-test/default/files... +Deploying resources... +Error: cannot create resources.jobs.test_job: Invalid job configuration. (400 INVALID_PARAMETER_VALUE) + +Endpoint: POST [DATABRICKS_URL]/api/2.2/jobs/create +HTTP Status: 400 Bad Request +API error_code: INVALID_PARAMETER_VALUE +API message: Invalid job configuration. + +Updating deployment state... + +>>> print_requests.py --get //bundle +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments", + "q": { + "deployment_id": "[UUID]" + }, + "body": { + "target_name": "default" + } +} +{ + "method": "GET", + "path": "/api/2.0/bundle/deployments/[UUID]" +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions", + "q": { + "version_id": "1" + }, + "body": { + "cli_version": "[DEV_VERSION]", + "version_type": "VERSION_TYPE_DEPLOY", + "target_name": "default" + } +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/operations", + "q": { + "resource_key": "resources.jobs.test_job" + }, + "body": { + "resource_key": "resources.jobs.test_job", + "action_type": "OPERATION_ACTION_TYPE_CREATE", + "status": "OPERATION_STATUS_FAILED", + "error_message": "Invalid job configuration." + } +} +{ + "method": "POST", + "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/complete", + "body": { + "name": "deployments/[UUID]/versions/1", + "completion_reason": "VERSION_COMPLETE_FAILURE" + } +} diff --git a/acceptance/bundle/deploy/metadata-service/deploy-error/script b/acceptance/bundle/deploy/metadata-service/deploy-error/script new file mode 100644 index 0000000000..806beae3de --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/deploy-error/script @@ -0,0 +1,5 @@ +# Deploy with the metadata service enabled, expecting a resource creation failure. +trace musterr $CLI bundle deploy + +# Print the metadata service requests to verify the failed operation is reported. +trace print_requests.py --get //bundle diff --git a/acceptance/bundle/deploy/metadata-service/deploy-error/test.toml b/acceptance/bundle/deploy/metadata-service/deploy-error/test.toml new file mode 100644 index 0000000000..9d7f2c1348 --- /dev/null +++ b/acceptance/bundle/deploy/metadata-service/deploy-error/test.toml @@ -0,0 +1,8 @@ +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] +EnvMatrix.DATABRICKS_BUNDLE_MANAGED_STATE = ["true"] +RecordRequests = true + +[[Server]] +Pattern = "POST /api/2.2/jobs/create" +Response.StatusCode = 400 +Response.Body = '{"error_code": "INVALID_PARAMETER_VALUE", "message": "Invalid job configuration."}' diff --git a/acceptance/bundle/deploy/metadata-service/output.txt b/acceptance/bundle/deploy/metadata-service/output.txt index e988794ab6..9006297906 100644 --- a/acceptance/bundle/deploy/metadata-service/output.txt +++ b/acceptance/bundle/deploy/metadata-service/output.txt @@ -28,7 +28,7 @@ Deployment complete! }, "body": { "cli_version": "[DEV_VERSION]", - "version_type": 1, + "version_type": "VERSION_TYPE_DEPLOY", "target_name": "default" } } @@ -40,9 +40,9 @@ Deployment complete! }, "body": { "resource_key": "resources.jobs.test_job", - "action_type": 4, + "action_type": "OPERATION_ACTION_TYPE_CREATE", "resource_id": "[NUMID]", - "status": 1 + "status": "OPERATION_STATUS_SUCCEEDED" } } { @@ -50,7 +50,7 @@ Deployment complete! "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/complete", "body": { "name": "deployments/[UUID]/versions/1", - "completion_reason": 1 + "completion_reason": "VERSION_COMPLETE_SUCCESS" } } @@ -86,7 +86,7 @@ Destroy complete! }, "body": { "cli_version": "[DEV_VERSION]", - "version_type": 2, + "version_type": "VERSION_TYPE_DESTROY", "target_name": "default" } } @@ -98,9 +98,9 @@ Destroy complete! }, "body": { "resource_key": "resources.jobs.test_job", - "action_type": 6, + "action_type": "OPERATION_ACTION_TYPE_DELETE", "resource_id": "[NUMID]", - "status": 1 + "status": "OPERATION_STATUS_SUCCEEDED" } } { @@ -108,6 +108,6 @@ Destroy complete! "path": "/api/2.0/bundle/deployments/[UUID]/versions/1/complete", "body": { "name": "deployments/[UUID]/versions/1", - "completion_reason": 1 + "completion_reason": "VERSION_COMPLETE_SUCCESS" } } diff --git a/bundle/direct/bundle_apply.go b/bundle/direct/bundle_apply.go index 68b5672257..95cf32f416 100644 --- a/bundle/direct/bundle_apply.go +++ b/bundle/direct/bundle_apply.go @@ -94,14 +94,13 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa } err = d.Destroy(ctx, &b.StateDB) + if b.OperationReporter != nil { + b.OperationReporter(ctx, resourceKey, deleteResourceID, action, err) + } if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: %w", errorPrefix, err)) return false } - - if b.OperationReporter != nil { - b.OperationReporter(ctx, resourceKey, deleteResourceID, action) - } return true } @@ -137,16 +136,19 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa err = d.Deploy(ctx, &b.StateDB, sv.Value, action, entry) } + // Report the operation inline to the metadata service. + if b.OperationReporter != nil && !migrateMode { + var resourceID string + if dbentry, ok := b.StateDB.GetResourceEntry(resourceKey); ok { + resourceID = dbentry.ID + } + b.OperationReporter(ctx, resourceKey, resourceID, action, err) + } + if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: %w", errorPrefix, err)) return false } - - // Report the operation inline to the metadata service. - if b.OperationReporter != nil && !migrateMode { - dbentry, _ := b.StateDB.GetResourceEntry(resourceKey) - b.OperationReporter(ctx, resourceKey, dbentry.ID, action) - } } // TODO: Note, we only really need remote state if there are remote references. diff --git a/bundle/direct/pkg.go b/bundle/direct/pkg.go index 7932c040eb..aee2265eba 100644 --- a/bundle/direct/pkg.go +++ b/bundle/direct/pkg.go @@ -37,10 +37,11 @@ type DeploymentUnit struct { DependsOn []deployplan.DependsOnEntry } -// OperationReporter is called after each successful resource operation to report -// it to the deployment metadata service. It is best-effort: failures are logged -// as warnings by the caller. -type OperationReporter func(ctx context.Context, resourceKey string, resourceID string, action deployplan.ActionType) +// OperationReporter is called after each resource operation (success or failure) +// to report it to the deployment metadata service. If operationErr is non-nil the +// operation is recorded as failed with the error message. It is best-effort: +// reporting failures are logged as warnings by the caller. +type OperationReporter func(ctx context.Context, resourceKey string, resourceID string, action deployplan.ActionType, operationErr error) // DeploymentBundle holds everything needed to deploy a bundle type DeploymentBundle struct { diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 9c067bcff4..b2d272dac3 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -3,7 +3,6 @@ package phases import ( "context" "errors" - "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts" @@ -109,9 +108,7 @@ func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, ta bundle.ApplyContext(ctx, b, terraform.Apply()) } - // Even if deployment failed, there might be updates in states that we need to upload. statemgmt.PushResourcesState(ctx, b, targetEngine) - if logdiag.HasError(ctx) { return } @@ -158,13 +155,12 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand var failed bool if useMetadataService == "true" { - svc, err := tmpdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) - if err != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", err)) + if !targetEngine.IsDirect() { + logdiag.LogError(ctx, errors.New("managed state is only supported with the direct deployment engine")) return } - deploymentID, versionID, cleanup, err := deployMetadataLock(ctx, b, svc, tmpdms.VersionTypeDeploy) + cleanup, err := acquireMetadataLock(ctx, b, tmpdms.VersionTypeDeploy) if err != nil { logdiag.LogError(ctx, err) return @@ -172,10 +168,6 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand defer func() { cleanup(failed || logdiag.HasError(ctx)) }() - - if targetEngine.IsDirect() { - b.DeploymentBundle.OperationReporter = makeOperationReporter(svc, deploymentID, versionID) - } } else { bundle.ApplyContext(ctx, b, lock.Acquire()) if logdiag.HasError(ctx) { diff --git a/bundle/phases/deploy_metadata.go b/bundle/phases/deploy_metadata.go index 85d7290805..b77c025cde 100644 --- a/bundle/phases/deploy_metadata.go +++ b/bundle/phases/deploy_metadata.go @@ -21,6 +21,24 @@ import ( "github.com/google/uuid" ) +// acquireMetadataLock creates the metadata service client, acquires the deployment +// lock, and sets up the operation reporter on the bundle. It returns a cleanup +// function that releases the lock, or an error if the lock could not be acquired. +func acquireMetadataLock(ctx context.Context, b *bundle.Bundle, versionType tmpdms.VersionType) (cleanup func(failed bool), err error) { + svc, err := tmpdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) + if err != nil { + return nil, fmt.Errorf("failed to create metadata service client: %w", err) + } + + deploymentID, versionID, cleanup, err := deployMetadataLock(ctx, b, svc, versionType) + if err != nil { + return nil, err + } + + b.DeploymentBundle.OperationReporter = makeOperationReporter(svc, deploymentID, versionID) + return cleanup, nil +} + // deployMetadataLock implements the lock acquire/release lifecycle using the // deployment metadata service (CreateVersion / CompleteVersion). // @@ -120,7 +138,7 @@ func deployMetadataLock(ctx context.Context, b *bundle.Bundle, svc *tmpdms.Deplo if completeErr != nil { log.Warnf(ctx, "Failed to release deployment lock: %v", completeErr) } else { - log.Infof(ctx, "Released deployment lock: deployment=%s version=%s reason=%d", deploymentID, versionID, reason) + log.Infof(ctx, "Released deployment lock: deployment=%s version=%s reason=%s", deploymentID, versionID, reason) } } @@ -144,24 +162,33 @@ func planActionToOperationAction(action deployplan.ActionType) tmpdms.OperationA } // makeOperationReporter returns an OperationReporter that reports each resource -// operation to the metadata service. Failures are logged as warnings. +// operation (success or failure) to the metadata service. Reporting failures are +// logged as warnings and do not affect the deploy outcome. func makeOperationReporter(svc *tmpdms.DeploymentMetadataAPI, deploymentID, versionID string) direct.OperationReporter { - return func(ctx context.Context, resourceKey string, resourceID string, action deployplan.ActionType) { + return func(ctx context.Context, resourceKey string, resourceID string, action deployplan.ActionType, operationErr error) { actionType := planActionToOperationAction(action) if actionType == tmpdms.OperationActionTypeUnspecified { return } + status := tmpdms.OperationStatusSucceeded + var errorMessage string + if operationErr != nil { + status = tmpdms.OperationStatusFailed + errorMessage = operationErr.Error() + } + _, err := svc.CreateOperation(ctx, tmpdms.CreateOperationRequest{ DeploymentID: deploymentID, VersionID: versionID, Parent: fmt.Sprintf("deployments/%s/versions/%s", deploymentID, versionID), ResourceKey: resourceKey, Operation: &tmpdms.Operation{ - ResourceKey: resourceKey, - ResourceID: resourceID, - Status: tmpdms.OperationStatusSucceeded, - ActionType: actionType, + ResourceKey: resourceKey, + ResourceID: resourceID, + Status: status, + ActionType: actionType, + ErrorMessage: errorMessage, }, }) if err != nil { diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 320b8e75dd..652126f198 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -3,7 +3,6 @@ package phases import ( "context" "errors" - "fmt" "net/http" "github.com/databricks/cli/bundle" @@ -139,13 +138,12 @@ func Destroy(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineTy var failed bool if useMetadataService == "true" { - svc, svcErr := tmpdms.NewDeploymentMetadataAPI(b.WorkspaceClient()) - if svcErr != nil { - logdiag.LogError(ctx, fmt.Errorf("failed to create metadata service client: %w", svcErr)) + if !targetEngine.IsDirect() { + logdiag.LogError(ctx, errors.New("managed state is only supported with the direct deployment engine")) return } - deploymentID, versionID, cleanup, lockErr := deployMetadataLock(ctx, b, svc, tmpdms.VersionTypeDestroy) + cleanup, lockErr := acquireMetadataLock(ctx, b, tmpdms.VersionTypeDestroy) if lockErr != nil { logdiag.LogError(ctx, lockErr) return @@ -153,10 +151,6 @@ func Destroy(ctx context.Context, b *bundle.Bundle, targetEngine engine.EngineTy defer func() { cleanup(failed || logdiag.HasError(ctx)) }() - - if targetEngine.IsDirect() { - b.DeploymentBundle.OperationReporter = makeOperationReporter(svc, deploymentID, versionID) - } } else { bundle.ApplyContext(ctx, b, lock.Acquire()) if logdiag.HasError(ctx) { diff --git a/libs/tmpdms/types.go b/libs/tmpdms/types.go index ba1ca723c2..320823d00e 100644 --- a/libs/tmpdms/types.go +++ b/libs/tmpdms/types.go @@ -7,87 +7,88 @@ package tmpdms import "time" // Enum types matching the proto definitions. +// Values are the proto enum name strings, which is how proto-over-HTTP serializes enums. -type DeploymentStatus int -type VersionStatus int -type VersionComplete int -type VersionType int -type OperationStatus int -type OperationActionType int -type DeploymentResourceType int +type DeploymentStatus string +type VersionStatus string +type VersionComplete string +type VersionType string +type OperationStatus string +type OperationActionType string +type DeploymentResourceType string const ( - DeploymentStatusUnspecified DeploymentStatus = 0 - DeploymentStatusActive DeploymentStatus = 1 - DeploymentStatusFailed DeploymentStatus = 2 - DeploymentStatusInProgress DeploymentStatus = 3 - DeploymentStatusDeleted DeploymentStatus = 4 + DeploymentStatusUnspecified DeploymentStatus = "DEPLOYMENT_STATUS_UNSPECIFIED" + DeploymentStatusActive DeploymentStatus = "DEPLOYMENT_STATUS_ACTIVE" + DeploymentStatusFailed DeploymentStatus = "DEPLOYMENT_STATUS_FAILED" + DeploymentStatusInProgress DeploymentStatus = "DEPLOYMENT_STATUS_IN_PROGRESS" + DeploymentStatusDeleted DeploymentStatus = "DEPLOYMENT_STATUS_DELETED" ) const ( - VersionStatusUnspecified VersionStatus = 0 - VersionStatusInProgress VersionStatus = 1 - VersionStatusCompleted VersionStatus = 2 + VersionStatusUnspecified VersionStatus = "VERSION_STATUS_UNSPECIFIED" + VersionStatusInProgress VersionStatus = "VERSION_STATUS_IN_PROGRESS" + VersionStatusCompleted VersionStatus = "VERSION_STATUS_COMPLETED" ) const ( - VersionCompleteUnspecified VersionComplete = 0 - VersionCompleteSuccess VersionComplete = 1 - VersionCompleteFailure VersionComplete = 2 - VersionCompleteForceAbort VersionComplete = 3 - VersionCompleteLeaseExpired VersionComplete = 4 + VersionCompleteUnspecified VersionComplete = "VERSION_COMPLETE_UNSPECIFIED" + VersionCompleteSuccess VersionComplete = "VERSION_COMPLETE_SUCCESS" + VersionCompleteFailure VersionComplete = "VERSION_COMPLETE_FAILURE" + VersionCompleteForceAbort VersionComplete = "VERSION_COMPLETE_FORCE_ABORT" + VersionCompleteLeaseExpired VersionComplete = "VERSION_COMPLETE_LEASE_EXPIRED" ) const ( - VersionTypeUnspecified VersionType = 0 - VersionTypeDeploy VersionType = 1 - VersionTypeDestroy VersionType = 2 + VersionTypeUnspecified VersionType = "VERSION_TYPE_UNSPECIFIED" + VersionTypeDeploy VersionType = "VERSION_TYPE_DEPLOY" + VersionTypeDestroy VersionType = "VERSION_TYPE_DESTROY" ) const ( - OperationStatusUnspecified OperationStatus = 0 - OperationStatusSucceeded OperationStatus = 1 - OperationStatusFailed OperationStatus = 2 + OperationStatusUnspecified OperationStatus = "OPERATION_STATUS_UNSPECIFIED" + OperationStatusSucceeded OperationStatus = "OPERATION_STATUS_SUCCEEDED" + OperationStatusFailed OperationStatus = "OPERATION_STATUS_FAILED" ) const ( - OperationActionTypeUnspecified OperationActionType = 0 - OperationActionTypeResize OperationActionType = 1 - OperationActionTypeUpdate OperationActionType = 2 - OperationActionTypeUpdateWithID OperationActionType = 3 - OperationActionTypeCreate OperationActionType = 4 - OperationActionTypeRecreate OperationActionType = 5 - OperationActionTypeDelete OperationActionType = 6 - OperationActionTypeBind OperationActionType = 7 - OperationActionTypeBindAndUpdate OperationActionType = 8 - OperationActionTypeInitRegister OperationActionType = 9 + OperationActionTypeUnspecified OperationActionType = "OPERATION_ACTION_TYPE_UNSPECIFIED" + OperationActionTypeResize OperationActionType = "OPERATION_ACTION_TYPE_RESIZE" + OperationActionTypeUpdate OperationActionType = "OPERATION_ACTION_TYPE_UPDATE" + OperationActionTypeUpdateWithID OperationActionType = "OPERATION_ACTION_TYPE_UPDATE_WITH_ID" + OperationActionTypeCreate OperationActionType = "OPERATION_ACTION_TYPE_CREATE" + OperationActionTypeRecreate OperationActionType = "OPERATION_ACTION_TYPE_RECREATE" + OperationActionTypeDelete OperationActionType = "OPERATION_ACTION_TYPE_DELETE" + OperationActionTypeBind OperationActionType = "OPERATION_ACTION_TYPE_BIND" + OperationActionTypeBindAndUpdate OperationActionType = "OPERATION_ACTION_TYPE_BIND_AND_UPDATE" + OperationActionTypeInitRegister OperationActionType = "OPERATION_ACTION_TYPE_INITIAL_REGISTER" ) const ( - ResourceTypeUnspecified DeploymentResourceType = 0 - ResourceTypeJob DeploymentResourceType = 1 - ResourceTypePipeline DeploymentResourceType = 2 - ResourceTypeModel DeploymentResourceType = 4 - ResourceTypeRegisteredModel DeploymentResourceType = 5 - ResourceTypeExperiment DeploymentResourceType = 6 - ResourceTypeServingEndpoint DeploymentResourceType = 7 - ResourceTypeQualityMonitor DeploymentResourceType = 8 - ResourceTypeSchema DeploymentResourceType = 9 - ResourceTypeVolume DeploymentResourceType = 10 - ResourceTypeCluster DeploymentResourceType = 11 - ResourceTypeDashboard DeploymentResourceType = 12 - ResourceTypeApp DeploymentResourceType = 13 - ResourceTypeCatalog DeploymentResourceType = 14 - ResourceTypeExternalLocation DeploymentResourceType = 15 - ResourceTypeSecretScope DeploymentResourceType = 16 - ResourceTypeAlert DeploymentResourceType = 17 - ResourceTypeSQLWarehouse DeploymentResourceType = 18 - ResourceTypeDatabaseInstance DeploymentResourceType = 19 - ResourceTypeDatabaseCatalog DeploymentResourceType = 20 - ResourceTypeSyncedDBTable DeploymentResourceType = 21 - ResourceTypePostgresProject DeploymentResourceType = 22 - ResourceTypePostgresBranch DeploymentResourceType = 23 - ResourceTypePostgresEndpoint DeploymentResourceType = 24 + ResourceTypeUnspecified DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_UNSPECIFIED" + ResourceTypeJob DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_JOB" + ResourceTypePipeline DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_PIPELINE" + ResourceTypeModel DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_MODEL" + ResourceTypeRegisteredModel DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_REGISTERED_MODEL" + ResourceTypeExperiment DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_EXPERIMENT" + ResourceTypeServingEndpoint DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_MODEL_SERVING_ENDPOINT" + ResourceTypeQualityMonitor DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_QUALITY_MONITOR" + ResourceTypeSchema DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_SCHEMA" + ResourceTypeVolume DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_VOLUME" + ResourceTypeCluster DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_CLUSTER" + ResourceTypeDashboard DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_DASHBOARD" + ResourceTypeApp DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_APP" + ResourceTypeCatalog DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_CATALOG" + ResourceTypeExternalLocation DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_EXTERNAL_LOCATION" + ResourceTypeSecretScope DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_SECRET_SCOPE" + ResourceTypeAlert DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_ALERT" + ResourceTypeSQLWarehouse DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_SQL_WAREHOUSE" + ResourceTypeDatabaseInstance DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_DATABASE_INSTANCE" + ResourceTypeDatabaseCatalog DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_DATABASE_CATALOG" + ResourceTypeSyncedDBTable DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_SYNCED_DATABASE_TABLE" + ResourceTypePostgresProject DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_PROJECT" + ResourceTypePostgresBranch DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_BRANCH" + ResourceTypePostgresEndpoint DeploymentResourceType = "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_ENDPOINT" ) // Resource types (proto message equivalents). From 8117d818e6d47e2d469843e45d51b4d598ead5d7 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 1 Apr 2026 00:23:16 +0000 Subject: [PATCH 10/11] Inject liteswap traffic ID header when DATABRICKS_LITESWAP_ID is set When the DATABRICKS_LITESWAP_ID environment variable is set, wrap the SDK HTTP transport to inject the x-databricks-traffic-id header on all API requests. This routes traffic to the liteswap service instance for E2E testing against dev deployments. Usage: DATABRICKS_LITESWAP_ID=my-env databricks bundle deploy Co-authored-by: Isaac --- bundle/config/workspace.go | 26 ++++++++++++++++++++++++++ bundle/env/liteswap.go | 15 +++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 bundle/env/liteswap.go diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 32e2fdd38a..bd9cd871ed 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -1,6 +1,7 @@ package config import ( + "net/http" "os" "path/filepath" @@ -169,9 +170,34 @@ func (w *Workspace) Client() (*databricks.WorkspaceClient, error) { } } + // If DATABRICKS_LITESWAP_ID is set, wrap the transport to inject the + // x-databricks-traffic-id header for routing to the liteswap instance. + if liteswapID := os.Getenv("DATABRICKS_LITESWAP_ID"); liteswapID != "" { + inner := cfg.HTTPTransport + if inner == nil { + inner = http.DefaultTransport + } + cfg.HTTPTransport = &liteswapTransport{ + inner: inner, + trafficID: "testenv://liteswap/" + liteswapID, + } + } + return databricks.NewWorkspaceClient((*databricks.Config)(cfg)) } +// liteswapTransport injects the x-databricks-traffic-id header to route +// requests to a liteswap service instance. +type liteswapTransport struct { + inner http.RoundTripper + trafficID string +} + +func (t *liteswapTransport) RoundTrip(req *http.Request) (*http.Response, error) { + req.Header.Set("x-databricks-traffic-id", t.trafficID) + return t.inner.RoundTrip(req) +} + func init() { arg0 := os.Args[0] diff --git a/bundle/env/liteswap.go b/bundle/env/liteswap.go new file mode 100644 index 0000000000..1bdb6fc7c0 --- /dev/null +++ b/bundle/env/liteswap.go @@ -0,0 +1,15 @@ +package env + +import "context" + +// liteswapVariable names the environment variable that holds the liteswap +// environment name. When set, the CLI injects the x-databricks-traffic-id +// header on all API requests to route traffic to the liteswap service instance. +const liteswapVariable = "DATABRICKS_LITESWAP_ID" + +// LiteswapID returns the liteswap environment name if set. +func LiteswapID(ctx context.Context) (string, bool) { + return get(ctx, []string{ + liteswapVariable, + }) +} From d648a9e682fb2416c3076b78d947d8a32167fec5 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 1 Apr 2026 09:15:56 +0000 Subject: [PATCH 11/11] Remove unused liteswap env helper The LiteswapID() function was never called; workspace.go reads DATABRICKS_LITESWAP_ID via os.Getenv directly. Co-authored-by: Isaac --- bundle/env/liteswap.go | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 bundle/env/liteswap.go diff --git a/bundle/env/liteswap.go b/bundle/env/liteswap.go deleted file mode 100644 index 1bdb6fc7c0..0000000000 --- a/bundle/env/liteswap.go +++ /dev/null @@ -1,15 +0,0 @@ -package env - -import "context" - -// liteswapVariable names the environment variable that holds the liteswap -// environment name. When set, the CLI injects the x-databricks-traffic-id -// header on all API requests to route traffic to the liteswap service instance. -const liteswapVariable = "DATABRICKS_LITESWAP_ID" - -// LiteswapID returns the liteswap environment name if set. -func LiteswapID(ctx context.Context) (string, bool) { - return get(ctx, []string{ - liteswapVariable, - }) -}