From 04c22b8f907437e2d07ba8cd145b586424e34d80 Mon Sep 17 00:00:00 2001 From: subhadip18 Date: Sun, 22 Feb 2026 16:20:07 -0500 Subject: [PATCH] Add Databricks Terraform skill (fixes #145) --- .../1-provider-authentication.md | 309 ++++ .../2-aws-workspace-deployment.md | 603 ++++++++ .../3-azure-workspace-deployment.md | 536 +++++++ .../4-gcp-workspace-deployment.md | 387 +++++ .../5-unity-catalog.md | 836 ++++++++++ .../6-databricks-resources.md | 1341 +++++++++++++++++ .../7-iam-permissions.md | 519 +++++++ .../databricks-terraform-skill/8-lakebase.md | 930 ++++++++++++ .../databricks-terraform-skill/SKILL.md | 235 +++ 9 files changed, 5696 insertions(+) create mode 100644 databricks-skills/databricks-terraform-skill/1-provider-authentication.md create mode 100644 databricks-skills/databricks-terraform-skill/2-aws-workspace-deployment.md create mode 100644 databricks-skills/databricks-terraform-skill/3-azure-workspace-deployment.md create mode 100644 databricks-skills/databricks-terraform-skill/4-gcp-workspace-deployment.md create mode 100644 databricks-skills/databricks-terraform-skill/5-unity-catalog.md create mode 100644 databricks-skills/databricks-terraform-skill/6-databricks-resources.md create mode 100644 databricks-skills/databricks-terraform-skill/7-iam-permissions.md create mode 100644 databricks-skills/databricks-terraform-skill/8-lakebase.md create mode 100644 databricks-skills/databricks-terraform-skill/SKILL.md diff --git a/databricks-skills/databricks-terraform-skill/1-provider-authentication.md b/databricks-skills/databricks-terraform-skill/1-provider-authentication.md new file mode 100644 index 00000000..0acfc08e --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/1-provider-authentication.md @@ -0,0 +1,309 @@ +# Provider Configuration & Authentication + +## Overview + +The Databricks Terraform provider supports multiple authentication methods. For production, always use a **service principal** with client credentials. Never hardcode tokens in `.tf` files. + +## Provider Version Declaration + +```hcl +# versions.tf +terraform { + required_version = ">= 1.3.0" + required_providers { + databricks = { + source = "databricks/databricks" + version = "~> 1.38.0" + } + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.0" + } + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } +} +``` + +--- + +## Authentication Methods + +### Method 1: Personal Access Token (PAT) — Dev/Test Only + +```hcl +provider "databricks" { + host = "https://adb-.azuredatabricks.net" + token = var.databricks_token # Never hardcode! +} +``` + +Set via environment variable (preferred): +```bash +export DATABRICKS_HOST="https://adb-.azuredatabricks.net" +export DATABRICKS_TOKEN="dapiXXXXXXXXXXXXXXXXXX" +``` + +### Method 2: Service Principal — OAuth M2M (Recommended for Production) + +Works for **all clouds**. Service principal must be added as Databricks account admin for account-level operations. + +```hcl +provider "databricks" { + host = var.databricks_host + client_id = var.client_id # SP application ID + client_secret = var.client_secret # SP client secret +} + +# Account-level operations (Unity Catalog, workspace creation) +provider "databricks" { + alias = "mws" + host = "https://accounts.cloud.databricks.com" # AWS/GCP + # Azure: "https://accounts.azuredatabricks.net" + account_id = var.databricks_account_id + client_id = var.client_id + client_secret = var.client_secret +} +``` + +Set via environment variables: +```bash +export DATABRICKS_CLIENT_ID="" +export DATABRICKS_CLIENT_SECRET="" +export DATABRICKS_ACCOUNT_ID="" +``` + +### Method 3: Azure CLI Authentication (Azure-Specific) + +```hcl +provider "databricks" { + host = "https://adb-.azuredatabricks.net" + # Uses `az login` credentials automatically +} + +provider "azurerm" { + features {} + # Uses `az login` credentials automatically +} +``` + +### Method 4: Azure Service Principal with Client Secret (Azure Production) + +```hcl +provider "azurerm" { + features {} + subscription_id = var.subscription_id + client_id = var.azure_client_id + client_secret = var.azure_client_secret + tenant_id = var.azure_tenant_id +} + +provider "databricks" { + azure_workspace_resource_id = azurerm_databricks_workspace.this.id + azure_client_id = var.azure_client_id + azure_client_secret = var.azure_client_secret + azure_tenant_id = var.azure_tenant_id +} +``` + +### Method 5: GCP Service Account (GCP) + +```hcl +provider "google" { + project = var.google_project + region = var.google_region + # Uses GOOGLE_APPLICATION_CREDENTIALS env var or ADC +} + +provider "databricks" { + host = module.databricks_workspace.databricks_host + google_service_account = var.databricks_google_service_account + # Or use: token from module output + token = module.databricks_workspace.databricks_token +} +``` + +--- + +## Multi-Provider Patterns + +### Pattern 1: Account + Workspace (Unity Catalog) + +Required when managing both account-level (UC) and workspace-level resources. + +```hcl +# providers.tf + +# AWS account-level provider +provider "databricks" { + alias = "mws" + host = "https://accounts.cloud.databricks.com" + account_id = var.databricks_account_id + client_id = var.client_id + client_secret = var.client_secret +} + +# Workspace-level provider (created after workspace exists) +provider "databricks" { + alias = "workspace" + host = databricks_mws_workspaces.this.workspace_url + client_id = var.client_id + client_secret = var.client_secret +} +``` + +Usage in resources: +```hcl +# Account-level resource +resource "databricks_user" "admin" { + provider = databricks.mws + user_name = "admin@company.com" +} + +# Workspace-level resource +resource "databricks_cluster" "shared" { + provider = databricks.workspace + cluster_name = "shared-cluster" + spark_version = data.databricks_spark_version.latest.id + node_type_id = data.databricks_node_type.smallest.id + num_workers = 2 +} +``` + +### Pattern 2: Multiple Workspaces + +```hcl +provider "databricks" { + alias = "ws_prod" + host = var.workspace_prod_url + client_id = var.client_id + client_secret = var.client_secret +} + +provider "databricks" { + alias = "ws_dev" + host = var.workspace_dev_url + client_id = var.client_id + client_secret = var.client_secret +} + +resource "databricks_cluster" "prod_cluster" { + provider = databricks.ws_prod + cluster_name = "prod-shared" + spark_version = "15.4.x-scala2.12" + node_type_id = "i3.xlarge" + num_workers = 4 +} +``` + +### Pattern 3: Workspace Referenced Before Created (Dynamic Provider) + +When the workspace URL isn't known until apply time, use a data source or output: + +```hcl +# Create workspace first +resource "databricks_mws_workspaces" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + workspace_name = var.workspace_name + # ... other config +} + +# Use workspace URL in provider (requires two-step apply) +provider "databricks" { + alias = "workspace" + host = databricks_mws_workspaces.this.workspace_url + client_id = var.client_id + client_secret = var.client_secret +} +``` + +> **Note**: This requires `terraform apply -target=databricks_mws_workspaces.this` first, then a full apply. + +--- + +## Variables Best Practices + +```hcl +# variables.tf +variable "databricks_account_id" { + type = string + description = "Databricks Account ID (found in Account Console)" + sensitive = false +} + +variable "client_id" { + type = string + description = "Service principal application (client) ID" + sensitive = false +} + +variable "client_secret" { + type = string + description = "Service principal client secret" + sensitive = true +} + +variable "databricks_host" { + type = string + description = "Databricks workspace URL (e.g., https://adb-123.azuredatabricks.net)" +} +``` + +Pass sensitive values via environment variables — **never commit** `terraform.tfvars` with secrets: + +```bash +# Set for Terraform variables +export TF_VAR_client_id="" +export TF_VAR_client_secret="" +export TF_VAR_databricks_account_id="" +``` + +--- + +## Data Sources for Dynamic Lookups + +```hcl +# Look up latest LTS Spark version +data "databricks_spark_version" "lts" { + long_term_support = true +} + +# Look up smallest node type for the cloud +data "databricks_node_type" "smallest" { + local_disk = true +} + +# Look up existing cluster +data "databricks_cluster" "existing" { + cluster_name = "Shared Autoscaling" +} + +# Look up current user +data "databricks_current_user" "me" {} + +# Look up existing metastore +data "databricks_metastore" "this" { + provider = databricks.mws + name = "my-metastore" +} +``` + +--- + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **`Error: default auth: cannot configure default credentials`** | Set `DATABRICKS_HOST` + either `DATABRICKS_TOKEN` or `DATABRICKS_CLIENT_ID`+`DATABRICKS_CLIENT_SECRET` | +| **`account_id` missing for UC operations** | Add `account_id` to the account-level provider alias | +| **Azure SP can't create workspace** | SP needs `Contributor` role on the resource group and `User Access Administrator` | +| **`cannot use pat auth: host is an account-level host`** | Account-level operations require OAuth M2M (SP), not PAT | +| **Provider initialization order** | Use `depends_on` or two-pass applies when workspace URL is needed by provider | +| **GCP ADC not found** | Run `gcloud auth application-default login` or set `GOOGLE_APPLICATION_CREDENTIALS` | diff --git a/databricks-skills/databricks-terraform-skill/2-aws-workspace-deployment.md b/databricks-skills/databricks-terraform-skill/2-aws-workspace-deployment.md new file mode 100644 index 00000000..da62555a --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/2-aws-workspace-deployment.md @@ -0,0 +1,603 @@ +# AWS Workspace Deployment + +## Overview + +Two deployment patterns for AWS: +1. **Basic (non-PrivateLink)** — public endpoints, suitable for dev/test +2. **PrivateLink (production)** — private networking, no public internet exposure + +Reference examples: [aws-workspace-basic](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/aws-workspace-basic) | [aws-databricks-modular-privatelink](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/aws-databricks-modular-privatelink) + +--- + +## Pattern 1: Basic AWS Workspace (Non-PrivateLink) + +### Prerequisites + +- AWS account with admin permissions +- Databricks account with account admin Service Principal +- Terraform ≥ 1.3.0 + +### File Structure + +``` +aws-workspace-basic/ +├── main.tf +├── variables.tf +├── outputs.tf +├── providers.tf +└── terraform.tfvars +``` + +### providers.tf + +```hcl +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + version = "~> 1.38.0" + } + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.region +} + +# Account-level provider for workspace creation +provider "databricks" { + alias = "mws" + host = "https://accounts.cloud.databricks.com" + account_id = var.databricks_account_id + client_id = var.client_id + client_secret = var.client_secret +} +``` + +### main.tf — VPC & Networking + +```hcl +# Availability zones data source (required by subnet resources below) +data "aws_availability_zones" "available" { + state = "available" +} + +# VPC +resource "aws_vpc" "databricks" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + tags = merge(var.tags, { Name = "${var.prefix}-vpc" }) +} + +# Internet Gateway +resource "aws_internet_gateway" "igw" { + vpc_id = aws_vpc.databricks.id + tags = merge(var.tags, { Name = "${var.prefix}-igw" }) +} + +# Public subnets for NAT Gateway +resource "aws_subnet" "public" { + count = 2 + vpc_id = aws_vpc.databricks.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index) + availability_zone = data.aws_availability_zones.available.names[count.index] + tags = merge(var.tags, { Name = "${var.prefix}-public-${count.index}" }) +} + +# Private subnets for Databricks nodes +resource "aws_subnet" "private" { + count = 2 + vpc_id = aws_vpc.databricks.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + 4) + availability_zone = data.aws_availability_zones.available.names[count.index] + tags = merge(var.tags, { Name = "${var.prefix}-private-${count.index}" }) +} + +# NAT Gateway for outbound internet from private subnets +resource "aws_eip" "nat" { + domain = "vpc" +} + +resource "aws_nat_gateway" "ngw" { + allocation_id = aws_eip.nat.id + subnet_id = aws_subnet.public[0].id + tags = merge(var.tags, { Name = "${var.prefix}-ngw" }) + depends_on = [aws_internet_gateway.igw] +} + +# Route tables +resource "aws_route_table" "public" { + vpc_id = aws_vpc.databricks.id + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.igw.id + } + tags = merge(var.tags, { Name = "${var.prefix}-rt-public" }) +} + +resource "aws_route_table" "private" { + vpc_id = aws_vpc.databricks.id + route { + cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.ngw.id + } + tags = merge(var.tags, { Name = "${var.prefix}-rt-private" }) +} + +resource "aws_route_table_association" "public" { + count = 2 + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id +} + +resource "aws_route_table_association" "private" { + count = 2 + subnet_id = aws_subnet.private[count.index].id + route_table_id = aws_route_table.private.id +} + +# Security Group for Databricks clusters +resource "aws_security_group" "databricks" { + name = "${var.prefix}-databricks-sg" + description = "Security group for Databricks cluster nodes" + vpc_id = aws_vpc.databricks.id + + ingress { + from_port = 0 + to_port = 65535 + protocol = "tcp" + self = true + } + ingress { + from_port = 0 + to_port = 65535 + protocol = "udp" + self = true + } + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + tags = merge(var.tags, { Name = "${var.prefix}-databricks-sg" }) +} +``` + +### main.tf — S3 Root Bucket + +```hcl +resource "aws_s3_bucket" "root" { + bucket = "${var.prefix}-databricks-root" + force_destroy = true + tags = var.tags +} + +resource "aws_s3_bucket_versioning" "root" { + bucket = aws_s3_bucket.root.id + versioning_configuration { status = "Disabled" } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "root" { + bucket = aws_s3_bucket.root.id + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +resource "aws_s3_bucket_public_access_block" "root" { + bucket = aws_s3_bucket.root.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +data "aws_iam_policy_document" "root_bucket_policy" { + statement { + effect = "Allow" + actions = ["s3:GetObject", "s3:GetObjectVersion", "s3:PutObject", "s3:DeleteObject", "s3:ListBucket", "s3:GetBucketLocation"] + resources = [ + aws_s3_bucket.root.arn, + "${aws_s3_bucket.root.arn}/*" + ] + principals { + type = "AWS" + identifiers = ["arn:aws:iam::414351767826:root"] # Databricks AWS account ID + } + } +} + +resource "aws_s3_bucket_policy" "root" { + bucket = aws_s3_bucket.root.id + policy = data.aws_iam_policy_document.root_bucket_policy.json +} +``` + +### main.tf — Cross-Account IAM Role + +```hcl +data "aws_iam_policy_document" "cross_account_assume" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + principals { + type = "AWS" + identifiers = ["arn:aws:iam::414351767826:root"] # Databricks production AWS account + } + condition { + test = "StringEquals" + variable = "sts:ExternalId" + values = [var.databricks_account_id] + } + } +} + +resource "aws_iam_role" "cross_account" { + name = "${var.prefix}-databricks-crossaccount" + assume_role_policy = data.aws_iam_policy_document.cross_account_assume.json + tags = var.tags +} + +data "aws_iam_policy_document" "cross_account_policy" { + statement { + effect = "Allow" + actions = [ + "ec2:AllocateAddress", "ec2:AssignPrivateIpAddresses", "ec2:AssociateDhcpOptions", + "ec2:AssociateRouteTable", "ec2:AttachInternetGateway", "ec2:AttachNetworkInterface", + "ec2:AuthorizeSecurityGroupEgress", "ec2:AuthorizeSecurityGroupIngress", + "ec2:CancelSpotInstanceRequests", "ec2:CreateDhcpOptions", "ec2:CreateInternetGateway", + "ec2:CreateKeyPair", "ec2:CreateNetworkInterface", "ec2:CreatePlacementGroup", + "ec2:CreateRoute", "ec2:CreateRouteTable", "ec2:CreateSecurityGroup", + "ec2:CreateSubnet", "ec2:CreateTags", "ec2:CreateVolume", "ec2:CreateVpc", + "ec2:CreateVpcEndpoint", "ec2:DeleteDhcpOptions", "ec2:DeleteInternetGateway", + "ec2:DeleteKeyPair", "ec2:DeleteNetworkInterface", "ec2:DeletePlacementGroup", + "ec2:DeleteRoute", "ec2:DeleteRouteTable", "ec2:DeleteSecurityGroup", + "ec2:DeleteSubnet", "ec2:DeleteTags", "ec2:DeleteVolume", "ec2:DeleteVpc", + "ec2:DeleteVpcEndpoints", "ec2:DescribeAvailabilityZones", "ec2:DescribeIamInstanceProfileAssociations", + "ec2:DescribeInstanceStatus", "ec2:DescribeInstances", "ec2:DescribeInternetGateways", + "ec2:DescribeNetworkAcls", "ec2:DescribeNetworkInterfaces", "ec2:DescribePlacementGroups", + "ec2:DescribePrefixLists", "ec2:DescribeReservedInstancesOfferings", + "ec2:DescribeRouteTables", "ec2:DescribeSecurityGroups", "ec2:DescribeSpotInstanceRequests", + "ec2:DescribeSpotPriceHistory", "ec2:DescribeSubnets", "ec2:DescribeVolumes", + "ec2:DescribeVpcAttribute", "ec2:DescribeVpcs", "ec2:DetachInternetGateway", + "ec2:DisassociateRouteTable", "ec2:GetSpotPlacementScores", "ec2:ModifyVpcAttribute", + "ec2:ReleaseAddress", "ec2:RequestSpotInstances", "ec2:RevokeSecurityGroupEgress", + "ec2:RevokeSecurityGroupIngress", "ec2:RunInstances", "ec2:TerminateInstances", + "iam:CreateServiceLinkedRole", "iam:GetRole", "iam:ListInstanceProfiles", "iam:PassRole" + ] + resources = ["*"] + } +} + +resource "aws_iam_role_policy" "cross_account" { + name = "${var.prefix}-databricks-crossaccount-policy" + role = aws_iam_role.cross_account.id + policy = data.aws_iam_policy_document.cross_account_policy.json +} +``` + +### main.tf — Databricks Workspace (MWS) + +```hcl +resource "databricks_mws_credentials" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + credentials_name = "${var.prefix}-credentials" + role_arn = aws_iam_role.cross_account.arn +} + +resource "databricks_mws_storage_configurations" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + storage_configuration_name = "${var.prefix}-storage" + bucket_name = aws_s3_bucket.root.bucket +} + +resource "databricks_mws_networks" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + network_name = "${var.prefix}-network" + security_group_ids = [aws_security_group.databricks.id] + subnet_ids = aws_subnet.private[*].id + vpc_id = aws_vpc.databricks.id +} + +resource "databricks_mws_workspaces" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + workspace_name = var.workspace_name + aws_region = var.region + + credentials_id = databricks_mws_credentials.this.credentials_id + storage_configuration_id = databricks_mws_storage_configurations.this.storage_configuration_id + network_id = databricks_mws_networks.this.network_id + + token { + comment = "Terraform-managed" + lifetime_seconds = 86400 # 1 day; omit for non-expiring + } +} +``` + +### variables.tf + +```hcl +variable "databricks_account_id" { type = string } +variable "client_id" { type = string } +variable "client_secret" { type = string; sensitive = true } +variable "region" { type = string; default = "us-east-1" } +variable "prefix" { type = string; default = "demo" } +variable "workspace_name" { type = string; default = "my-workspace" } +variable "vpc_cidr" { type = string; default = "10.4.0.0/16" } +variable "tags" { type = map(string); default = {} } +``` + +### outputs.tf + +```hcl +output "workspace_url" { + value = databricks_mws_workspaces.this.workspace_url +} +output "workspace_id" { + value = databricks_mws_workspaces.this.workspace_id +} +output "token" { + value = databricks_mws_workspaces.this.token[0].token_value + sensitive = true +} +``` + +--- + +## Pattern 2: AWS Modular PrivateLink Deployment + +For production workloads requiring private networking with no public internet exposure. + +Reference: [aws-databricks-modular-privatelink](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/aws-databricks-modular-privatelink) + +### Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Customer AWS Account │ +│ │ +│ ┌──────────────────────────────────────┐ │ +│ │ VPC (10.109.0.0/17) │ │ +│ │ │ │ +│ │ PrivateLink Subnets │ │ +│ │ ┌────────────────────────────────┐ │ │ +│ │ │ VPC Endpoint (Workspace) │ │ │ +│ │ │ VPC Endpoint (Relay/SCC) │ │ │ +│ │ └────────────────────────────────┘ │ │ +│ │ │ │ +│ │ Workspace Subnet Pairs (1..N) │ │ +│ │ ┌────────────────┐ ┌────────────┐ │ │ +│ │ │ Workspace 1 │ │ Workspace 2│ │ │ +│ │ │ Subnet A + B │ │ Subnet A+B │ │ │ +│ │ └────────────────┘ └────────────┘ │ │ +│ └──────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ + │ PrivateLink + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Databricks Control Plane │ +│ (aws-workspace-vpce-service, relay-vpce-service) │ +└─────────────────────────────────────────────────────────┘ +``` + +### PrivateLink VPC Endpoints + +> **Note**: The snippets below reference `aws_subnet.privatelink` and `aws_security_group.databricks_vpce` which must be declared alongside your existing VPC resources. Add dedicated PrivateLink subnets (at least one per AZ) and a security group that allows HTTPS (443) inbound from the workspace subnets. + +```hcl +# Dedicated subnets for VPC endpoints (separate from workspace node subnets) +resource "aws_subnet" "privatelink" { + count = 2 + vpc_id = aws_vpc.databricks.id + cidr_block = cidrsubnet(var.vpc_cidr, 4, count.index + 8) + availability_zone = data.aws_availability_zones.available.names[count.index] + tags = merge(var.tags, { Name = "${var.prefix}-pl-${count.index}" }) +} + +# Security group for VPC endpoints — allows HTTPS from workspace nodes +resource "aws_security_group" "databricks_vpce" { + name = "${var.prefix}-vpce-sg" + description = "Security group for Databricks VPC endpoints" + vpc_id = aws_vpc.databricks.id + + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + security_groups = [aws_security_group.databricks.id] + } + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + tags = merge(var.tags, { Name = "${var.prefix}-vpce-sg" }) +} + +# Look up Databricks PrivateLink service endpoint for your region +# See: https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html + +resource "aws_vpc_endpoint" "workspace" { + vpc_id = aws_vpc.databricks.id + service_name = var.workspace_vpce_service # e.g. "com.amazonaws.vpce.us-east-1.vpce-svc-xxxxxxxx" + vpc_endpoint_type = "Interface" + subnet_ids = aws_subnet.privatelink[*].id + security_group_ids = [aws_security_group.databricks_vpce.id] + private_dns_enabled = true + tags = merge(var.tags, { Name = "${var.prefix}-workspace-vpce" }) +} + +resource "aws_vpc_endpoint" "relay" { + vpc_id = aws_vpc.databricks.id + service_name = var.relay_vpce_service # Secure Cluster Connectivity relay + vpc_endpoint_type = "Interface" + subnet_ids = aws_subnet.privatelink[*].id + security_group_ids = [aws_security_group.databricks_vpce.id] + private_dns_enabled = true + tags = merge(var.tags, { Name = "${var.prefix}-relay-vpce" }) +} +``` + +### Databricks Private Access Settings + +```hcl +resource "databricks_mws_private_access_settings" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + private_access_settings_name = "${var.prefix}-pas" + region = var.region + public_access_enabled = false # Disable public access entirely + private_access_level = "ACCOUNT" # or "ENDPOINT" +} + +resource "databricks_mws_vpc_endpoint" "workspace" { + provider = databricks.mws + account_id = var.databricks_account_id + aws_vpc_endpoint_id = aws_vpc_endpoint.workspace.id + vpc_endpoint_name = "${var.prefix}-workspace-vpce" + region = var.region +} + +resource "databricks_mws_vpc_endpoint" "relay" { + provider = databricks.mws + account_id = var.databricks_account_id + aws_vpc_endpoint_id = aws_vpc_endpoint.relay.id + vpc_endpoint_name = "${var.prefix}-relay-vpce" + region = var.region +} +``` + +### Workspace with PrivateLink + +```hcl +resource "databricks_mws_workspaces" "privatelink" { + provider = databricks.mws + account_id = var.databricks_account_id + workspace_name = var.workspace_name + aws_region = var.region + + credentials_id = databricks_mws_credentials.this.credentials_id + storage_configuration_id = databricks_mws_storage_configurations.this.storage_configuration_id + network_id = databricks_mws_networks.this.network_id + + private_access_settings_id = databricks_mws_private_access_settings.this.private_access_settings_id + + # Register the VPC endpoints with the workspace + custom_tags = var.tags +} +``` + +### Customer-Managed Keys (Optional — Enhanced Security) + +```hcl +data "aws_caller_identity" "current" {} + +data "aws_iam_policy_document" "kms" { + # Root account full access to manage the key + statement { + effect = "Allow" + actions = ["kms:*"] + resources = ["*"] + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] + } + } + # Databricks account must be able to use the key for managed services encryption + statement { + effect = "Allow" + actions = ["kms:Encrypt", "kms:Decrypt", "kms:ReEncrypt*", "kms:GenerateDataKey*", "kms:DescribeKey"] + resources = ["*"] + principals { + type = "AWS" + identifiers = ["arn:aws:iam::414351767826:root"] # Databricks production AWS account + } + } +} + +resource "aws_kms_key" "databricks" { + description = "Databricks workspace encryption key" + deletion_window_in_days = 7 + enable_key_rotation = true + policy = data.aws_iam_policy_document.kms.json + tags = var.tags +} + +resource "aws_kms_alias" "databricks" { + name = "alias/${var.prefix}-databricks-key" + target_key_id = aws_kms_key.databricks.key_id +} + +resource "databricks_mws_customer_managed_keys" "workspace" { + provider = databricks.mws + account_id = var.databricks_account_id + + aws_key_info { + key_arn = aws_kms_key.databricks.arn + key_alias = aws_kms_alias.databricks.name + key_region = var.region + } + + use_cases = ["MANAGED_SERVICES"] # or ["STORAGE"], or ["MANAGED_SERVICES", "STORAGE"] +} +``` + +--- + +## IP Access Lists + +```hcl +resource "databricks_workspace_conf" "this" { + provider = databricks.workspace + custom_config = { + "enableIpAccessLists" = "true" # custom_config is map(string) — must quote boolean values + } +} + +resource "databricks_ip_access_list" "allow" { + provider = databricks.workspace + label = "allow-corporate" + list_type = "ALLOW" + ip_addresses = [ + "203.0.113.0/24", # Corporate VPN range + "198.51.100.10/32", # Specific IP + ] + depends_on = [databricks_workspace_conf.this] +} + +resource "databricks_ip_access_list" "block" { + provider = databricks.workspace + label = "block-known-bad" + list_type = "BLOCK" + ip_addresses = ["192.0.2.0/24"] + depends_on = [databricks_workspace_conf.this] +} +``` + +--- + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **`InvalidVpcEndpointService`** | Verify the VPCE service name for your region from Databricks docs | +| **Workspace creation times out** | AWS credential propagation takes ~60 seconds; add `time_sleep` resource | +| **`InvalidCrossAccountRole`** | Verify trust policy includes correct Databricks AWS account ID (414351767826) | +| **NAT Gateway not routing** | Check route table associations — private subnets must point to NAT GW | +| **Security group self-reference** | Databricks requires nodes to communicate freely within the SG (self-rule) | +| **S3 bucket access denied** | Verify bucket policy includes Databricks AWS account as principal | diff --git a/databricks-skills/databricks-terraform-skill/3-azure-workspace-deployment.md b/databricks-skills/databricks-terraform-skill/3-azure-workspace-deployment.md new file mode 100644 index 00000000..a7771fa4 --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/3-azure-workspace-deployment.md @@ -0,0 +1,536 @@ +# Azure Workspace Deployment + +## Overview + +Two deployment patterns for Azure: +1. **Basic (VNet injection)** — standard deployment with custom VNet +2. **Private Link Standard** — two-VNet architecture, no public endpoints + +Reference examples: [adb-vnet-injection](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/adb-vnet-injection) | [adb-with-private-link-standard](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/adb-with-private-link-standard) + +--- + +## Pattern 1: Basic Azure Workspace with VNet Injection + +### Prerequisites + +- Azure subscription with `Contributor` permissions +- `Microsoft.Databricks` resource provider registered +- Terraform ≥ 1.3.0, AzureRM provider ≥ 3.0 + +### providers.tf + +```hcl +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.100" + } + databricks = { + source = "databricks/databricks" + version = "~> 1.38.0" + } + } +} + +provider "azurerm" { + features {} + subscription_id = var.subscription_id +} + +# Workspace provider — only available after workspace is created +provider "databricks" { + azure_workspace_resource_id = azurerm_databricks_workspace.this.id + # Uses Azure CLI or SP credentials from environment +} +``` + +### main.tf — Resource Group & Networking + +```hcl +resource "azurerm_resource_group" "databricks" { + name = "${var.prefix}-rg" + location = var.location + tags = var.tags +} + +# Main VNet +resource "azurerm_virtual_network" "databricks" { + name = "${var.prefix}-vnet" + location = azurerm_resource_group.databricks.location + resource_group_name = azurerm_resource_group.databricks.name + address_space = [var.vnet_cidr] + tags = var.tags +} + +# Host subnet (public — used by Azure Databricks control plane) +resource "azurerm_subnet" "public" { + name = "${var.prefix}-public-subnet" + resource_group_name = azurerm_resource_group.databricks.name + virtual_network_name = azurerm_virtual_network.databricks.name + address_prefixes = [var.public_subnet_cidr] + + delegation { + name = "databricks-delegation" + service_delegation { + name = "Microsoft.Databricks/workspaces" + actions = [ + "Microsoft.Network/virtualNetworks/subnets/join/action", + "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", + "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" + ] + } + } +} + +# Container subnet (private — used by worker nodes) +resource "azurerm_subnet" "private" { + name = "${var.prefix}-private-subnet" + resource_group_name = azurerm_resource_group.databricks.name + virtual_network_name = azurerm_virtual_network.databricks.name + address_prefixes = [var.private_subnet_cidr] + + delegation { + name = "databricks-delegation" + service_delegation { + name = "Microsoft.Databricks/workspaces" + actions = [ + "Microsoft.Network/virtualNetworks/subnets/join/action", + "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", + "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" + ] + } + } +} + +# Network Security Groups (NSGs) — Databricks manages rules automatically +resource "azurerm_network_security_group" "public" { + name = "${var.prefix}-public-nsg" + location = azurerm_resource_group.databricks.location + resource_group_name = azurerm_resource_group.databricks.name + tags = var.tags +} + +resource "azurerm_network_security_group" "private" { + name = "${var.prefix}-private-nsg" + location = azurerm_resource_group.databricks.location + resource_group_name = azurerm_resource_group.databricks.name + tags = var.tags +} + +resource "azurerm_subnet_network_security_group_association" "public" { + subnet_id = azurerm_subnet.public.id + network_security_group_id = azurerm_network_security_group.public.id +} + +resource "azurerm_subnet_network_security_group_association" "private" { + subnet_id = azurerm_subnet.private.id + network_security_group_id = azurerm_network_security_group.private.id +} +``` + +### main.tf — Databricks Workspace + +```hcl +resource "azurerm_databricks_workspace" "this" { + name = "${var.prefix}-workspace" + resource_group_name = azurerm_resource_group.databricks.name + location = azurerm_resource_group.databricks.location + sku = "premium" # Required for Unity Catalog + tags = var.tags + + custom_parameters { + virtual_network_id = azurerm_virtual_network.databricks.id + public_subnet_name = azurerm_subnet.public.name + private_subnet_name = azurerm_subnet.private.name + public_subnet_network_security_group_association_id = azurerm_subnet_network_security_group_association.public.id + private_subnet_network_security_group_association_id = azurerm_subnet_network_security_group_association.private.id + + # Disable public IP for worker nodes (recommended) + no_public_ip = true + } +} +``` + +### outputs.tf + +```hcl +output "workspace_url" { + value = "https://${azurerm_databricks_workspace.this.workspace_url}" +} +output "workspace_id" { + value = azurerm_databricks_workspace.this.workspace_id +} +output "workspace_resource_id" { + value = azurerm_databricks_workspace.this.id +} +``` + +### variables.tf (Pattern 1) + +```hcl +variable "subscription_id" { type = string } +variable "location" { type = string; default = "eastus2" } +variable "prefix" { type = string; default = "demo" } +variable "vnet_cidr" { type = string; default = "10.179.0.0/20" } +variable "public_subnet_cidr" { type = string; default = "10.179.0.0/24" } +variable "private_subnet_cidr" { type = string; default = "10.179.1.0/24" } +variable "tags" { type = map(string); default = {} } +``` + +--- + +## Pattern 2: Azure Private Link Standard Deployment + +Two-VNet architecture eliminating all public internet exposure. + +### Architecture + +``` +┌──────────────────────────────────────────────────────┐ +│ Transit VNet (cidr_transit) │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Private Endpoint: Frontend (Web UI) │ │ +│ │ Private Endpoint: Authentication (SSO) │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ Test VM (for validation from within the VNet) │ +└──────────────────────────────────────────────────────┘ + │ + VNet Peering + │ +┌──────────────────────────────────────────────────────┐ +│ Data Plane VNet (cidr_dp) │ +│ │ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Public Subnet (Databricks Host) │ │ +│ │ Private Subnet (Databricks Container) │ │ +│ └──────────────────────────────────────────────┘ │ +│ │ +│ Private Endpoint: Backend (Control Plane) │ +└──────────────────────────────────────────────────────┘ + │ + Private Link (no public internet) + │ + Databricks Control Plane +``` + +### providers.tf + +> **Note**: The Private Link standard example requires AzureRM provider **v4.0+** (differs from Pattern 1 which uses `~> 3.100`). If combining both patterns in one configuration, pin to `>= 4.0.0`. + +```hcl +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = ">= 4.0.0" + } + databricks = { + source = "databricks/databricks" + version = "~> 1.38.0" + } + } +} + +provider "azurerm" { + features {} + subscription_id = var.subscription_id +} +``` + +### main.tf — Data Plane VNet + +```hcl +resource "azurerm_resource_group" "dp" { + name = var.create_data_plane_resource_group ? "${var.prefix}-dp-rg" : var.existing_data_plane_resource_group_name + location = var.location +} + +resource "azurerm_virtual_network" "dp" { + name = "${var.prefix}-dp-vnet" + location = azurerm_resource_group.dp.location + resource_group_name = azurerm_resource_group.dp.name + address_space = [var.cidr_dp] + tags = var.tags +} + +# Host (public) and container (private) subnets with delegation +resource "azurerm_subnet" "dp_public" { + name = "databricks-public" + resource_group_name = azurerm_resource_group.dp.name + virtual_network_name = azurerm_virtual_network.dp.name + address_prefixes = [cidrsubnet(var.cidr_dp, 3, 0)] + + delegation { + name = "databricks" + service_delegation { + name = "Microsoft.Databricks/workspaces" + actions = [ + "Microsoft.Network/virtualNetworks/subnets/join/action", + "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", + "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" + ] + } + } +} + +resource "azurerm_subnet" "dp_private" { + name = "databricks-private" + resource_group_name = azurerm_resource_group.dp.name + virtual_network_name = azurerm_virtual_network.dp.name + address_prefixes = [cidrsubnet(var.cidr_dp, 3, 1)] + + delegation { + name = "databricks" + service_delegation { + name = "Microsoft.Databricks/workspaces" + actions = [ + "Microsoft.Network/virtualNetworks/subnets/join/action", + "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", + "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" + ] + } + } +} + +# Subnet for backend private endpoint (no delegation) +resource "azurerm_subnet" "dp_pe" { + name = "private-endpoint" + resource_group_name = azurerm_resource_group.dp.name + virtual_network_name = azurerm_virtual_network.dp.name + address_prefixes = [cidrsubnet(var.cidr_dp, 3, 2)] +} +``` + +### main.tf — Workspace with Private Link + +```hcl +# NSGs for the data plane subnets (required before workspace creation) +resource "azurerm_network_security_group" "dp_public" { + name = "${var.prefix}-dp-public-nsg" + location = azurerm_resource_group.dp.location + resource_group_name = azurerm_resource_group.dp.name + tags = var.tags +} + +resource "azurerm_network_security_group" "dp_private" { + name = "${var.prefix}-dp-private-nsg" + location = azurerm_resource_group.dp.location + resource_group_name = azurerm_resource_group.dp.name + tags = var.tags +} + +resource "azurerm_subnet_network_security_group_association" "dp_public" { + subnet_id = azurerm_subnet.dp_public.id + network_security_group_id = azurerm_network_security_group.dp_public.id +} + +resource "azurerm_subnet_network_security_group_association" "dp_private" { + subnet_id = azurerm_subnet.dp_private.id + network_security_group_id = azurerm_network_security_group.dp_private.id +} + +resource "azurerm_databricks_workspace" "this" { + name = "${var.prefix}-workspace" + resource_group_name = azurerm_resource_group.dp.name + location = azurerm_resource_group.dp.location + sku = "premium" + tags = var.tags + + public_network_access_enabled = false # No public access + network_security_group_rules_required = "NoAzureDatabricksRules" + + custom_parameters { + virtual_network_id = azurerm_virtual_network.dp.id + public_subnet_name = azurerm_subnet.dp_public.name + private_subnet_name = azurerm_subnet.dp_private.name + public_subnet_network_security_group_association_id = azurerm_subnet_network_security_group_association.dp_public.id + private_subnet_network_security_group_association_id = azurerm_subnet_network_security_group_association.dp_private.id + no_public_ip = true + } +} +``` + +### main.tf — Backend Private Endpoint (Data Plane VNet) + +```hcl +resource "azurerm_private_endpoint" "backend" { + name = "${var.prefix}-backend-pe" + location = azurerm_resource_group.dp.location + resource_group_name = azurerm_resource_group.dp.name + subnet_id = azurerm_subnet.dp_pe.id + tags = var.tags + + private_service_connection { + name = "${var.prefix}-backend-psc" + private_connection_resource_id = azurerm_databricks_workspace.this.id + subresource_names = ["databricks_ui_api"] + is_manual_connection = false + } + + private_dns_zone_group { + name = "databricks-dns" + private_dns_zone_ids = [azurerm_private_dns_zone.databricks.id] + } +} +``` + +### main.tf — Transit VNet & Frontend/Auth Endpoints + +```hcl +resource "azurerm_resource_group" "transit" { + name = "${var.prefix}-transit-rg" + location = var.location +} + +resource "azurerm_virtual_network" "transit" { + name = "${var.prefix}-transit-vnet" + location = azurerm_resource_group.transit.location + resource_group_name = azurerm_resource_group.transit.name + address_space = [var.cidr_transit] + tags = var.tags +} + +resource "azurerm_subnet" "transit_pe" { + name = "private-endpoint" + resource_group_name = azurerm_resource_group.transit.name + virtual_network_name = azurerm_virtual_network.transit.name + address_prefixes = [cidrsubnet(var.cidr_transit, 3, 0)] +} + +# Frontend (Web UI) Private Endpoint +resource "azurerm_private_endpoint" "frontend" { + name = "${var.prefix}-frontend-pe" + location = azurerm_resource_group.transit.location + resource_group_name = azurerm_resource_group.transit.name + subnet_id = azurerm_subnet.transit_pe.id + tags = var.tags + + private_service_connection { + name = "${var.prefix}-frontend-psc" + private_connection_resource_id = azurerm_databricks_workspace.this.id + subresource_names = ["databricks_ui_api"] + is_manual_connection = false + } +} + +# Web Authentication Private Endpoint +resource "azurerm_private_endpoint" "auth" { + name = "${var.prefix}-auth-pe" + location = azurerm_resource_group.transit.location + resource_group_name = azurerm_resource_group.transit.name + subnet_id = azurerm_subnet.transit_pe.id + tags = var.tags + + private_service_connection { + name = "${var.prefix}-auth-psc" + private_connection_resource_id = azurerm_databricks_workspace.this.id + subresource_names = ["browser_authentication"] + is_manual_connection = false + } +} + +# VNet Peering between transit and data plane +resource "azurerm_virtual_network_peering" "dp_to_transit" { + name = "dp-to-transit" + resource_group_name = azurerm_resource_group.dp.name + virtual_network_name = azurerm_virtual_network.dp.name + remote_virtual_network_id = azurerm_virtual_network.transit.id + allow_virtual_network_access = true + allow_forwarded_traffic = true +} + +resource "azurerm_virtual_network_peering" "transit_to_dp" { + name = "transit-to-dp" + resource_group_name = azurerm_resource_group.transit.name + virtual_network_name = azurerm_virtual_network.transit.name + remote_virtual_network_id = azurerm_virtual_network.dp.id + allow_virtual_network_access = true + allow_forwarded_traffic = true +} +``` + +### Private DNS Zone + +```hcl +resource "azurerm_private_dns_zone" "databricks" { + name = "privatelink.azuredatabricks.net" + resource_group_name = azurerm_resource_group.dp.name + tags = var.tags +} + +resource "azurerm_private_dns_zone_virtual_network_link" "dp" { + name = "dp-link" + resource_group_name = azurerm_resource_group.dp.name + private_dns_zone_name = azurerm_private_dns_zone.databricks.name + virtual_network_id = azurerm_virtual_network.dp.id +} + +resource "azurerm_private_dns_zone_virtual_network_link" "transit" { + name = "transit-link" + resource_group_name = azurerm_resource_group.dp.name + private_dns_zone_name = azurerm_private_dns_zone.databricks.name + virtual_network_id = azurerm_virtual_network.transit.id +} +``` + +### variables.tf (Pattern 2 additions) + +```hcl +variable "cidr_dp" { type = string; default = "10.180.0.0/20" } +variable "cidr_transit" { type = string; default = "10.181.0.0/24" } + +variable "create_data_plane_resource_group" { + type = bool + default = true + description = "Set false to reuse an existing resource group" +} +variable "existing_data_plane_resource_group_name" { + type = string + default = "" +} +``` + +--- + +## Customer-Managed Keys (Azure Key Vault) + +```hcl +resource "azurerm_key_vault" "databricks" { + name = "${var.prefix}-kv" + location = azurerm_resource_group.databricks.location + resource_group_name = azurerm_resource_group.databricks.name + tenant_id = data.azurerm_client_config.current.tenant_id + sku_name = "premium" # Premium required for HSM-backed keys +} + +resource "azurerm_key_vault_key" "databricks" { + name = "${var.prefix}-key" + key_vault_id = azurerm_key_vault.databricks.id + key_type = "RSA" + key_size = 2048 + key_opts = ["decrypt", "encrypt", "sign", "unwrapKey", "verify", "wrapKey"] +} + +resource "azurerm_databricks_workspace_customer_managed_key" "this" { + workspace_id = azurerm_databricks_workspace.this.id + key_vault_key_id = azurerm_key_vault_key.databricks.id +} +``` + +--- + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **`Subnet delegation already exists`** | Each subnet can only have one delegation; verify no conflicting resources | +| **`InsufficientSubnetSize`** | Databricks requires at minimum /26 for public and /26 for private subnets | +| **Private endpoint DNS not resolving** | Ensure DNS zone is linked to both VNets and propagation time (~5 min) has passed | +| **`NoAzureDatabricksRules` NSG conflict** | With Private Link, remove all manually added NSG rules; Databricks manages them | +| **`premium` SKU required for UC** | Set `sku = "premium"` for Unity Catalog compatibility | +| **VNet peering failing** | Both sides of peering must be created; use `depends_on` to ensure order | +| **Azure SP needs additional role** | For ADLS Gen2 access, SP needs `Storage Blob Data Contributor` on the storage account | diff --git a/databricks-skills/databricks-terraform-skill/4-gcp-workspace-deployment.md b/databricks-skills/databricks-terraform-skill/4-gcp-workspace-deployment.md new file mode 100644 index 00000000..c7924d7d --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/4-gcp-workspace-deployment.md @@ -0,0 +1,387 @@ +# GCP Workspace Deployment + +## Overview + +Two deployment patterns for GCP: +1. **Managed VPC (Basic)** — Databricks manages the VPC, simplest setup +2. **BYOVPC (Customer-Managed VPC)** — full control over networking + +Reference examples: [gcp-basic](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/gcp-basic) | [gcp-byovpc](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/gcp-byovpc) + +--- + +## Prerequisites + +Before deploying GCP workspaces, provision a service account for Databricks. This is a two-step process: + +1. **Step 1** — Create and configure the GCP service account ([gcp-sa-provisioning](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/gcp-sa-provisionning)) +2. **Step 2** — Deploy the workspace using the service account + +### Step 1: Service Account Provisioning + +```hcl +# Required GCP APIs to enable +resource "google_project_service" "required" { + for_each = toset([ + "cloudresourcemanager.googleapis.com", + "iam.googleapis.com", + "compute.googleapis.com", + "serviceusage.googleapis.com", + "databricks.googleapis.com" + ]) + service = each.key + disable_on_destroy = false +} + +# Service account for Databricks +resource "google_service_account" "databricks" { + account_id = "${var.prefix}-databricks-sa" + display_name = "Databricks Workspace Service Account" + project = var.google_project +} + +# Required IAM roles for the service account +locals { + databricks_sa_roles = [ + "roles/compute.admin", + "roles/iam.serviceAccountUser", + "roles/iam.serviceAccountTokenCreator", + "roles/storage.admin", + "roles/logging.logWriter" + ] +} + +resource "google_project_iam_member" "databricks_sa" { + for_each = toset(local.databricks_sa_roles) + project = var.google_project + role = each.value + member = "serviceAccount:${google_service_account.databricks.email}" +} + +# Allow users/SPs to impersonate the service account +resource "google_service_account_iam_binding" "impersonation" { + service_account_id = google_service_account.databricks.name + role = "roles/iam.serviceAccountTokenCreator" + members = var.delegate_from # e.g., ["user:admin@company.com"] +} +``` + +--- + +## Pattern 1: Managed VPC (gcp-basic) + +Databricks creates and manages the VPC. Simplest deployment option. + +### providers.tf + +```hcl +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + databricks = { + source = "databricks/databricks" + version = "~> 1.38.0" + } + } +} + +provider "google" { + project = var.google_project + region = var.google_region + zone = var.google_zone +} + +# Account-level Databricks provider (for workspace creation) +provider "databricks" { + alias = "mws" + host = "https://accounts.gcp.databricks.com" + account_id = var.databricks_account_id + google_service_account = var.databricks_google_service_account +} + +# Workspace-level provider (after workspace creation) +provider "databricks" { + alias = "workspace" + host = databricks_mws_workspaces.this.workspace_url + google_service_account = var.databricks_google_service_account +} +``` + +### main.tf + +```hcl +# GCS bucket for workspace root storage +resource "google_storage_bucket" "root" { + name = "${var.prefix}-databricks-root" + location = upper(var.google_region) + force_destroy = true + + uniform_bucket_level_access = true + + versioning { enabled = false } +} + +# Grant Databricks SA access to root bucket +resource "google_storage_bucket_iam_member" "root_admin" { + bucket = google_storage_bucket.root.name + role = "roles/storage.admin" + member = "serviceAccount:${var.databricks_google_service_account}" +} + +# Databricks workspace (managed VPC) +resource "databricks_mws_workspaces" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + workspace_name = var.workspace_name + + location = var.google_region + cloud = "gcp" + + gcp_managed_network_config { + gke_cluster_pod_ip_range = "10.3.0.0/16" + gke_cluster_service_ip_range = "10.4.0.0/16" + subnet_cidr = "10.0.0.0/22" + } + + gke_config { + connectivity_type = "PRIVATE_NODE_PUBLIC_MASTER" + master_ip_range = "10.3.0.0/28" + } + + storage_configuration { + gcs { + bucket_name = google_storage_bucket.root.name + } + } + + token { + comment = "Terraform-managed token" + } +} + +output "databricks_host" { + value = databricks_mws_workspaces.this.workspace_url +} +output "databricks_token" { + value = databricks_mws_workspaces.this.token[0].token_value + sensitive = true +} +``` + +### variables.tf + +```hcl +variable "databricks_account_id" { type = string } +variable "databricks_google_service_account" { type = string } +variable "google_project" { type = string } +variable "google_region" { type = string; default = "us-central1" } +variable "google_zone" { type = string; default = "us-central1-a" } +variable "prefix" { type = string; default = "demo" } +variable "workspace_name" { type = string; default = "my-workspace" } +variable "delegate_from" { type = list(string); default = [] } +``` + +--- + +## Pattern 2: BYOVPC (Customer-Managed VPC) + +Full control over networking — required for Private Service Connect (PSC) or custom firewall rules. + +### main.tf — VPC & Subnets + +```hcl +# Custom VPC +resource "google_compute_network" "databricks" { + name = "${var.prefix}-vpc" + auto_create_subnetworks = false + project = var.google_project +} + +# Primary subnet for Databricks nodes +resource "google_compute_subnetwork" "databricks" { + name = var.subnet_name + ip_cidr_range = var.subnet_ip_cidr_range # e.g., "10.0.0.0/22" + region = var.google_region + network = google_compute_network.databricks.id + private_ip_google_access = true + + # Secondary ranges for GKE pods and services + secondary_ip_range { + range_name = "pods" + ip_cidr_range = var.pod_ip_cidr_range # e.g., "10.1.0.0/16" + } + + secondary_ip_range { + range_name = "services" + ip_cidr_range = var.svc_ip_cidr_range # e.g., "10.2.0.0/20" + } +} + +# Cloud Router for NAT (outbound internet) +resource "google_compute_router" "databricks" { + name = var.router_name + region = var.google_region + network = google_compute_network.databricks.id +} + +# Cloud NAT for outbound internet from private nodes +resource "google_compute_router_nat" "databricks" { + name = var.nat_name + router = google_compute_router.databricks.name + region = var.google_region + nat_ip_allocate_option = "AUTO_ONLY" + source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES" +} + +# Firewall rules +resource "google_compute_firewall" "databricks_internal" { + name = "${var.prefix}-databricks-internal" + network = google_compute_network.databricks.name + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + allow { + protocol = "udp" + ports = ["0-65535"] + } + + source_tags = ["databricks"] + target_tags = ["databricks"] +} + +resource "google_compute_firewall" "databricks_egress" { + name = "${var.prefix}-databricks-egress" + network = google_compute_network.databricks.name + direction = "EGRESS" + + allow { protocol = "all" } + + target_tags = ["databricks"] + destination_ranges = ["0.0.0.0/0"] +} +``` + +### main.tf — Network Configuration & Workspace + +```hcl +resource "databricks_mws_networks" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + network_name = "${var.prefix}-network" + + gcp_network_info { + network_project_id = var.google_project + vpc_id = google_compute_network.databricks.name + subnet_id = google_compute_subnetwork.databricks.name + subnet_region = var.google_region + pod_ip_range_name = "pods" + service_ip_range_name = "services" + } +} + +resource "databricks_mws_workspaces" "byovpc" { + provider = databricks.mws + account_id = var.databricks_account_id + workspace_name = var.workspace_name + + location = var.google_region + cloud = "gcp" + + network_id = databricks_mws_networks.this.network_id + + gke_config { + connectivity_type = "PRIVATE_NODE_PUBLIC_MASTER" # or "PRIVATE_NODE_PRIVATE_MASTER" for PSC + master_ip_range = "10.3.0.0/28" + } + + storage_configuration { + gcs { + bucket_name = google_storage_bucket.root.name + } + } +} +``` + +### variables.tf (BYOVPC additions) + +```hcl +variable "subnet_name" { type = string; default = "databricks-subnet" } +variable "subnet_ip_cidr_range" { type = string; default = "10.0.0.0/22" } +variable "pod_ip_cidr_range" { type = string; default = "10.1.0.0/16" } +variable "svc_ip_cidr_range" { type = string; default = "10.2.0.0/20" } +variable "router_name" { type = string; default = "databricks-router" } +variable "nat_name" { type = string; default = "databricks-nat" } +``` + +--- + +## Pattern 3: Private Service Connect (PSC) — No Public Internet + +For maximum isolation using Google Private Service Connect. + +```hcl +# PSC endpoint for Databricks relay +resource "google_compute_address" "psc_relay" { + name = "${var.prefix}-psc-relay" + subnetwork = google_compute_subnetwork.databricks.id + address_type = "INTERNAL" + region = var.google_region +} + +resource "google_compute_forwarding_rule" "psc_relay" { + name = "${var.prefix}-psc-relay-fw" + region = var.google_region + network = google_compute_network.databricks.id + subnetwork = google_compute_subnetwork.databricks.id + ip_address = google_compute_address.psc_relay.id + target = var.relay_psc_service_attachment # From Databricks docs for your region + load_balancing_scheme = "" # PSC +} + +resource "databricks_mws_workspaces" "psc" { + provider = databricks.mws + account_id = var.databricks_account_id + workspace_name = var.workspace_name + location = var.google_region + cloud = "gcp" + network_id = databricks_mws_networks.this.network_id + + gke_config { + connectivity_type = "PRIVATE_NODE_PRIVATE_MASTER" # Full private + master_ip_range = "10.3.0.0/28" + } + + storage_configuration { + gcs { bucket_name = google_storage_bucket.root.name } + } + + private_access_settings_id = databricks_mws_private_access_settings.psc.id +} + +resource "databricks_mws_private_access_settings" "psc" { + provider = databricks.mws + account_id = var.databricks_account_id + private_access_settings_name = "${var.prefix}-psc" + region = var.google_region + public_access_enabled = false +} +``` + +--- + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **`googleapi: Error 403: Required permission missing`** | Ensure SA has all required IAM roles; check `gcp-sa-provisioning` step first | +| **`Databricks API requires service account`** | GCP provider auth must use service account, not user credentials | +| **GKE pod CIDR conflict** | Secondary ranges must not overlap with primary subnet or other existing ranges | +| **Workspace creation hangs** | GKE cluster creation takes 5-10 minutes; increase `timeouts` in workspace resource | +| **NAT gateway not routing** | Verify `private_ip_google_access = true` on subnet and NAT covers all ranges | +| **PSC endpoint service name** | Look up region-specific PSC service attachments in Databricks GCP docs | +| **Storage bucket permission denied** | SA needs `roles/storage.admin` on the root GCS bucket | diff --git a/databricks-skills/databricks-terraform-skill/5-unity-catalog.md b/databricks-skills/databricks-terraform-skill/5-unity-catalog.md new file mode 100644 index 00000000..0410a069 --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/5-unity-catalog.md @@ -0,0 +1,836 @@ +# Unity Catalog Deployment + +## Overview + +Unity Catalog (UC) is Databricks' unified governance layer. Deployment involves: +1. **Metastore** — top-level container, one per region per account +2. **Storage credentials** — cloud IAM credentials for storage access +3. **External locations** — storage path references +4. **Catalogs** → **Schemas** → **Tables/Volumes** — data hierarchy +5. **Grants** — fine-grained access control + +Reference examples: [aws-databricks-uc](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/aws-databricks-uc) | [adb-uc](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/adb-uc) | [aws-workspace-uc-simple](https://github.com/databricks/terraform-databricks-examples/tree/main/examples/aws-workspace-uc-simple) + +--- + +## Stage 1: Bootstrap — Users & Service Principal + +UC deployment requires an **account admin** Service Principal (not account owner). The bootstrap stage creates this identity. + +### AWS Bootstrap + +```hcl +# provider: databricks.mws (account-level) + +resource "databricks_service_principal" "uc_admin" { + provider = databricks.mws + display_name = "UC Admin Service Principal" + # application_id is set externally; use aws_iam or Azure AD to create SP +} + +resource "databricks_service_principal_role" "account_admin" { + provider = databricks.mws + service_principal_id = databricks_service_principal.uc_admin.id + role = "account_admin" +} + +# Create account-level users +resource "databricks_user" "users" { + provider = databricks.mws + for_each = toset(var.databricks_users) + user_name = each.key + force = true +} + +resource "databricks_user" "admins" { + provider = databricks.mws + for_each = toset(var.databricks_account_admins) + user_name = each.key + force = true +} + +# Admin group (will be metastore owner) +resource "databricks_group" "admin_group" { + provider = databricks.mws + display_name = var.unity_admin_group +} + +resource "databricks_group_member" "admin_members" { + provider = databricks.mws + for_each = toset(var.databricks_account_admins) + group_id = databricks_group.admin_group.id + member_id = databricks_user.admins[each.value].id +} + +resource "databricks_user_role" "account_admins" { + provider = databricks.mws + for_each = toset(var.databricks_account_admins) + user_id = databricks_user.admins[each.value].id + role = "account_admin" +} +``` + +--- + +## Stage 2: Metastore Setup + +One metastore per region per Databricks account. Choose the section matching your cloud. + +### AWS Metastore Setup + +#### S3 Bucket & IAM Role for UC + +```hcl +# S3 bucket for UC metastore root +resource "aws_s3_bucket" "metastore" { + bucket = "${var.prefix}-uc-metastore" + force_destroy = true + tags = var.tags +} + +resource "aws_s3_bucket_versioning" "metastore" { + bucket = aws_s3_bucket.metastore.id + versioning_configuration { status = "Enabled" } +} + +resource "aws_s3_bucket_public_access_block" "metastore" { + bucket = aws_s3_bucket.metastore.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# IAM policy for Unity Catalog metastore access +data "aws_iam_policy_document" "unity_metastore" { + statement { + actions = ["s3:GetObject", "s3:PutObject", "s3:DeleteObject", "s3:ListBucket", "s3:GetBucketLocation"] + resources = [aws_s3_bucket.metastore.arn, "${aws_s3_bucket.metastore.arn}/*"] + } +} + +# IAM policy for external data access +data "aws_iam_policy_document" "external_data_access" { + statement { + actions = [ + "s3:GetObject", "s3:GetObjectVersion", "s3:PutObject", "s3:PutObjectAcl", + "s3:DeleteObject", "s3:ListBucket", "s3:GetBucketLocation" + ] + resources = [ + aws_s3_bucket.external.arn, + "${aws_s3_bucket.external.arn}/*" + ] + } + # Allow sts:AssumeRole for credential passthrough + statement { + actions = ["sts:AssumeRole"] + resources = ["arn:aws:iam::${var.aws_account_id}:role/${var.prefix}-uc-data-access"] + } +} + +# Trust policy: Databricks can assume this role +data "aws_iam_policy_document" "uc_role_assume" { + statement { + actions = ["sts:AssumeRole"] + principals { + type = "AWS" + identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"] + } + condition { + test = "StringEquals" + variable = "sts:ExternalId" + values = [var.databricks_account_id] + } + } + # Self-assume for credential passthrough + statement { + actions = ["sts:AssumeRole"] + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${var.aws_account_id}:root"] + } + condition { + test = "ArnLike" + variable = "aws:PrincipalArn" + values = ["arn:aws:iam::${var.aws_account_id}:role/${var.prefix}-uc-data-access"] + } + } +} + +resource "aws_iam_role" "uc_metastore" { + name = "${var.prefix}-uc-metastore-role" + assume_role_policy = data.aws_iam_policy_document.uc_role_assume.json + tags = var.tags +} + +resource "aws_iam_role_policy" "uc_metastore" { + name = "${var.prefix}-uc-metastore-policy" + role = aws_iam_role.uc_metastore.id + policy = data.aws_iam_policy_document.unity_metastore.json +} + +resource "aws_iam_role" "uc_data_access" { + name = "${var.prefix}-uc-data-access" + assume_role_policy = data.aws_iam_policy_document.uc_role_assume.json + tags = var.tags +} + +resource "aws_iam_role_policy" "uc_data_access" { + name = "${var.prefix}-uc-data-access-policy" + role = aws_iam_role.uc_data_access.id + policy = data.aws_iam_policy_document.external_data_access.json +} +``` + +### Metastore & Assignment + +```hcl +# Create metastore (one per region per account) +resource "databricks_metastore" "this" { + provider = databricks.mws + name = "${var.prefix}-metastore" + region = var.region + storage_root = "s3://${aws_s3_bucket.metastore.bucket}/metastore" + owner = var.unity_admin_group + force_destroy = true +} + +# Assign metastore to workspace(s) +resource "databricks_metastore_assignment" "this" { + provider = databricks.mws + for_each = toset(var.databricks_workspace_ids) + metastore_id = databricks_metastore.this.id + workspace_id = each.key +} + +# Configure data access (storage credential for metastore root) +resource "databricks_metastore_data_access" "this" { + provider = databricks.mws + metastore_id = databricks_metastore.this.id + name = "${var.prefix}-metastore-access" + + aws_iam_role { + role_arn = aws_iam_role.uc_metastore.arn + } + + is_default = true + depends_on = [databricks_metastore_assignment.this] +} +``` + +--- + +### Azure Metastore Setup + +#### ADLS Gen2 Storage Account & Access Connector + +```hcl +# ADLS Gen2 storage account for UC metastore root +resource "azurerm_storage_account" "uc_metastore" { + name = "${replace(var.prefix, "-", "")}ucmetastore" # 3-24 chars, alphanumeric only + resource_group_name = azurerm_resource_group.databricks.name + location = azurerm_resource_group.databricks.location + account_tier = "Standard" + account_replication_type = "GRS" + account_kind = "StorageV2" + is_hns_enabled = true # ADLS Gen2 (hierarchical namespace) required + tags = var.tags +} + +# Container for metastore root +resource "azurerm_storage_container" "metastore" { + name = "metastore" + storage_account_name = azurerm_storage_account.uc_metastore.name + container_access_type = "private" +} + +# Access Connector (Azure managed identity for Databricks <-> ADLS) +resource "azurerm_databricks_access_connector" "uc_metastore" { + name = "${var.prefix}-uc-metastore-connector" + resource_group_name = azurerm_resource_group.databricks.name + location = azurerm_resource_group.databricks.location + tags = var.tags + + identity { + type = "SystemAssigned" + } +} + +# Grant the access connector identity permission on the metastore storage account +resource "azurerm_role_assignment" "uc_metastore_adls" { + scope = azurerm_storage_account.uc_metastore.id + role_definition_name = "Storage Blob Data Contributor" + principal_id = azurerm_databricks_access_connector.uc_metastore.identity[0].principal_id +} +``` + +#### Metastore & Assignment (Azure) + +```hcl +# Create metastore with ADLS Gen2 storage root +# Azure account-level host: https://accounts.azuredatabricks.net +resource "databricks_metastore" "azure" { + provider = databricks.mws + name = "${var.prefix}-metastore" + # Azure: use azuredatabricks.net account host, not accounts.cloud.databricks.com + storage_root = "abfss://${azurerm_storage_container.metastore.name}@${azurerm_storage_account.uc_metastore.name}.dfs.core.windows.net/" + owner = var.unity_admin_group + region = var.location # Azure region string, e.g. "eastus2" + force_destroy = true +} + +resource "databricks_metastore_assignment" "azure" { + provider = databricks.mws + for_each = toset(var.databricks_workspace_ids) + metastore_id = databricks_metastore.azure.id + workspace_id = each.key +} + +# Default data access configuration using the access connector managed identity +resource "databricks_metastore_data_access" "azure" { + provider = databricks.mws + metastore_id = databricks_metastore.azure.id + name = "${var.prefix}-metastore-access" + + azure_managed_identity { + access_connector_id = azurerm_databricks_access_connector.uc_metastore.id + } + + is_default = true + depends_on = [databricks_metastore_assignment.azure] +} +``` + +> **Azure account-level provider host**: Use `"https://accounts.azuredatabricks.net"` (not `.cloud.databricks.com`) for the `databricks.mws` provider alias when deploying Azure resources. + +--- + +### GCP Metastore Setup + +#### GCS Bucket & Service Account + +```hcl +# GCS bucket for UC metastore root +resource "google_storage_bucket" "uc_metastore" { + name = "${var.prefix}-uc-metastore" + location = upper(var.google_region) # GCS uses uppercase region, e.g. "US-CENTRAL1" + force_destroy = true + uniform_bucket_level_access = true + + versioning { enabled = true } # Recommended for metastore storage +} + +# Service account dedicated to UC data access +resource "google_service_account" "uc_access" { + account_id = "${var.prefix}-uc-access" + display_name = "Databricks Unity Catalog Access SA" + project = var.google_project +} + +# Grant UC access SA admin rights on the metastore bucket +resource "google_storage_bucket_iam_member" "uc_metastore_admin" { + bucket = google_storage_bucket.uc_metastore.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.uc_access.email}" +} + +# GCS bucket for external data access (separate from metastore root) +resource "google_storage_bucket" "uc_external" { + name = "${var.prefix}-uc-external-data" + location = upper(var.google_region) + force_destroy = true + uniform_bucket_level_access = true +} + +resource "google_storage_bucket_iam_member" "uc_external_admin" { + bucket = google_storage_bucket.uc_external.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.uc_access.email}" +} +``` + +#### Metastore & Assignment (GCP) + +```hcl +# GCP account-level host: https://accounts.gcp.databricks.com +resource "databricks_metastore" "gcp" { + provider = databricks.mws + name = "${var.prefix}-metastore" + storage_root = "gs://${google_storage_bucket.uc_metastore.name}/metastore" + owner = var.unity_admin_group + region = var.google_region # GCP region string, e.g. "us-central1" + force_destroy = true +} + +resource "databricks_metastore_assignment" "gcp" { + provider = databricks.mws + for_each = toset(var.databricks_workspace_ids) + metastore_id = databricks_metastore.gcp.id + workspace_id = each.key +} + +# Default data access — Databricks creates a managed GCP service account automatically +resource "databricks_metastore_data_access" "gcp" { + provider = databricks.mws + metastore_id = databricks_metastore.gcp.id + name = "${var.prefix}-metastore-access" + + # Option A: Let Databricks auto-create a managed GCP service account + databricks_gcp_service_account {} + + is_default = true + depends_on = [databricks_metastore_assignment.gcp] +} + +# After apply, grant the auto-created Databricks SA access to the metastore bucket. +# The SA email is available via: databricks_metastore_data_access.gcp.databricks_gcp_service_account[0].email +resource "google_storage_bucket_iam_member" "databricks_managed_sa" { + bucket = google_storage_bucket.uc_metastore.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${databricks_metastore_data_access.gcp.databricks_gcp_service_account[0].email}" +} +``` + +> **GCP two-step apply**: `databricks_metastore_data_access` with `databricks_gcp_service_account {}` creates a Databricks-managed SA. Run `terraform apply` once to get the SA email, then the `google_storage_bucket_iam_member` can be applied in the same or subsequent run. + +--- + +## Stage 3: Storage Credentials & External Locations + +### AWS Storage Credential + +```hcl +resource "databricks_storage_credential" "external" { + provider = databricks.workspace + name = "${var.prefix}-s3-credential" + + aws_iam_role { + role_arn = aws_iam_role.uc_data_access.arn + } + + comment = "Managed by Terraform" + depends_on = [databricks_metastore_assignment.this] +} + +resource "databricks_external_location" "data" { + provider = databricks.workspace + name = "${var.prefix}-data-location" + url = "s3://${aws_s3_bucket.external.bucket}/data" + credential_name = databricks_storage_credential.external.name + comment = "External location for data" + + depends_on = [databricks_metastore_assignment.this] +} +``` + +### Azure Storage Credential (Managed Identity) + +The access connector for the **external data** location is separate from the metastore connector created in Stage 2. + +```hcl +# ADLS Gen2 storage account for external data (separate from metastore root) +resource "azurerm_storage_account" "uc_external" { + name = "${replace(var.prefix, "-", "")}ucexternal" + resource_group_name = azurerm_resource_group.databricks.name + location = azurerm_resource_group.databricks.location + account_tier = "Standard" + account_replication_type = "LRS" + account_kind = "StorageV2" + is_hns_enabled = true # ADLS Gen2 required + tags = var.tags +} + +resource "azurerm_storage_container" "external_data" { + name = "data" + storage_account_name = azurerm_storage_account.uc_external.name + container_access_type = "private" +} + +# Access connector for external data storage credential +resource "azurerm_databricks_access_connector" "uc_external" { + name = "${var.prefix}-uc-external-connector" + resource_group_name = azurerm_resource_group.databricks.name + location = azurerm_resource_group.databricks.location + tags = var.tags + + identity { + type = "SystemAssigned" + } +} + +resource "azurerm_role_assignment" "uc_external_adls" { + scope = azurerm_storage_account.uc_external.id + role_definition_name = "Storage Blob Data Contributor" + principal_id = azurerm_databricks_access_connector.uc_external.identity[0].principal_id +} + +resource "databricks_storage_credential" "azure" { + provider = databricks.workspace + name = "${var.prefix}-azure-credential" + + azure_managed_identity { + access_connector_id = azurerm_databricks_access_connector.uc_external.id + } + + depends_on = [databricks_metastore_assignment.azure] +} + +resource "databricks_external_location" "azure_data" { + provider = databricks.workspace + name = "${var.prefix}-azure-location" + url = "abfss://${azurerm_storage_container.external_data.name}@${azurerm_storage_account.uc_external.name}.dfs.core.windows.net/" + credential_name = databricks_storage_credential.azure.name + + depends_on = [databricks_metastore_assignment.azure] +} +``` + +### GCP Storage Credential (Service Account) + +```hcl +# Storage credential using the Databricks-managed GCP SA (auto-created in Stage 2) +resource "databricks_storage_credential" "gcp" { + provider = databricks.workspace + name = "${var.prefix}-gcs-credential" + + # Option A: Reuse the Databricks-managed SA created by metastore_data_access + databricks_gcp_service_account {} + + depends_on = [databricks_metastore_assignment.gcp] +} + +# External location pointing to the external GCS bucket created in Stage 2 +resource "databricks_external_location" "gcs_data" { + provider = databricks.workspace + name = "${var.prefix}-gcs-location" + url = "gs://${google_storage_bucket.uc_external.name}/" + credential_name = databricks_storage_credential.gcp.name + + depends_on = [databricks_metastore_assignment.gcp] +} + +# Grant the new Databricks-managed SA access to the external bucket +resource "google_storage_bucket_iam_member" "gcs_external_access" { + bucket = google_storage_bucket.uc_external.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${databricks_storage_credential.gcp.databricks_gcp_service_account[0].email}" +} +``` + +--- + +## Stage 4: Catalogs, Schemas & Tables + +### Catalog + +```hcl +resource "databricks_catalog" "main" { + provider = databricks.workspace + name = var.catalog_name + comment = "Managed by Terraform" + + # Optional: pin to specific storage location (overrides metastore root) + # storage_root = "s3://my-bucket/catalog" + + properties = { + purpose = "production" + team = "data-engineering" + } + + depends_on = [databricks_metastore_assignment.this] +} + +# Delta Sharing catalog (from provider) +resource "databricks_catalog" "shared" { + provider = databricks.workspace + name = "shared_data" + provider_name = var.delta_sharing_provider # provider must exist first + share_name = var.delta_sharing_share_name +} +``` + +### Schema + +```hcl +resource "databricks_schema" "bronze" { + provider = databricks.workspace + catalog_name = databricks_catalog.main.name + name = "bronze" + comment = "Raw ingestion layer" + + # Optional: separate storage location + # storage_root = "s3://my-bucket/bronze" + + properties = { + layer = "bronze" + } +} + +resource "databricks_schema" "silver" { + provider = databricks.workspace + catalog_name = databricks_catalog.main.name + name = "silver" + comment = "Curated layer" +} + +resource "databricks_schema" "gold" { + provider = databricks.workspace + catalog_name = databricks_catalog.main.name + name = "gold" + comment = "Business-ready layer" +} +``` + +### Volume + +```hcl +resource "databricks_volume" "landing" { + provider = databricks.workspace + name = "landing" + catalog_name = databricks_catalog.main.name + schema_name = databricks_schema.bronze.name + volume_type = "EXTERNAL" + storage_location = "${databricks_external_location.data.url}/landing" + comment = "Landing zone for raw files" +} + +resource "databricks_volume" "managed" { + provider = databricks.workspace + name = "managed-files" + catalog_name = databricks_catalog.main.name + schema_name = databricks_schema.bronze.name + volume_type = "MANAGED" +} +``` + +--- + +## Stage 5: Grants (Permissions) + +> **CRITICAL**: `databricks_grants` is **authoritative** — it overwrites ALL existing grants on the securable. Always include every principal that should have access. + +### Metastore Grants + +```hcl +resource "databricks_grants" "metastore" { + provider = databricks.workspace + metastore = databricks_metastore.this.id + + grant { + principal = "data_engineers" + privileges = ["CREATE_CATALOG", "CREATE_EXTERNAL_LOCATION"] + } + grant { + principal = "data_analysts" + privileges = ["CREATE_CATALOG"] + } +} +``` + +### Catalog Grants + +```hcl +resource "databricks_grants" "main_catalog" { + provider = databricks.workspace + catalog = databricks_catalog.main.name + + grant { + principal = "data_engineers" + privileges = ["USE_CATALOG", "CREATE_SCHEMA", "CREATE_TABLE", "CREATE_VOLUME"] + } + grant { + principal = "data_analysts" + privileges = ["USE_CATALOG"] + } + grant { + principal = "ml_team" + privileges = ["USE_CATALOG", "CREATE_SCHEMA"] + } +} +``` + +### Schema Grants + +```hcl +resource "databricks_grants" "bronze_schema" { + provider = databricks.workspace + schema = "${databricks_catalog.main.name}.${databricks_schema.bronze.name}" + + grant { + principal = "data_engineers" + privileges = ["USE_SCHEMA", "CREATE_TABLE", "CREATE_VOLUME", "MODIFY", "SELECT"] + } + grant { + principal = "data_analysts" + privileges = ["USE_SCHEMA", "SELECT"] + } +} + +resource "databricks_grants" "gold_schema" { + provider = databricks.workspace + schema = "${databricks_catalog.main.name}.${databricks_schema.gold.name}" + + grant { + principal = "data_analysts" + privileges = ["USE_SCHEMA", "SELECT"] + } + grant { + principal = "data_engineers" + privileges = ["USE_SCHEMA", "CREATE_TABLE", "SELECT", "MODIFY"] + } +} +``` + +### Table Grants + +```hcl +resource "databricks_grants" "sensitive_table" { + provider = databricks.workspace + table = "${databricks_catalog.main.name}.${databricks_schema.gold.name}.customer_data" + + grant { + principal = "pii_access_group" + privileges = ["SELECT", "MODIFY"] + } + grant { + principal = "auditors" + privileges = ["SELECT"] + } +} +``` + +### Storage Credential Grants + +```hcl +resource "databricks_grants" "storage_credential" { + provider = databricks.workspace + storage_credential = databricks_storage_credential.external.id + + grant { + principal = "data_engineers" + privileges = ["CREATE_EXTERNAL_LOCATION", "READ_FILES", "WRITE_FILES"] + } +} +``` + +### External Location Grants + +```hcl +resource "databricks_grants" "external_location" { + provider = databricks.workspace + external_location = databricks_external_location.data.id + + grant { + principal = "data_engineers" + privileges = ["CREATE_EXTERNAL_TABLE", "READ_FILES", "WRITE_FILES", "CREATE_MANAGED_STORAGE"] + } + grant { + principal = "data_analysts" + privileges = ["READ_FILES"] + } +} +``` + +--- + +## Complete UC Deployment Reference + +### Required Provider Aliases + +```hcl +# providers.tf +provider "databricks" { + alias = "mws" # Account-level + host = "https://accounts.cloud.databricks.com" # AWS + account_id = var.databricks_account_id + client_id = var.client_id + client_secret = var.client_secret +} + +provider "databricks" { + alias = "workspace" # Workspace-level + host = var.workspace_url + client_id = var.client_id + client_secret = var.client_secret +} +``` + +### Dependency Order + +``` +databricks_metastore + └── databricks_metastore_assignment (requires: workspace exists) + └── databricks_metastore_data_access + └── databricks_storage_credential + └── databricks_external_location + └── databricks_catalog + └── databricks_schema + ├── databricks_volume + ├── databricks_grants (schema) + └── databricks_grants (table) +``` + +### Privilege Reference Table + +| Securable | Common Privileges | Notes | +|-----------|-------------------|-------| +| **Metastore** | `CREATE_CATALOG`, `CREATE_EXTERNAL_LOCATION`, `CREATE_STORAGE_CREDENTIAL` | Granted to account admins/data admins | +| **Catalog** | `USE_CATALOG`, `CREATE_SCHEMA`, `CREATE_TABLE`, `ALL_PRIVILEGES` | `USE_CATALOG` required to access anything within | +| **Schema** | `USE_SCHEMA`, `CREATE_TABLE`, `CREATE_VOLUME`, `SELECT`, `MODIFY` | `USE_SCHEMA` required to access tables/volumes | +| **Table** | `SELECT`, `MODIFY`, `ALL_PRIVILEGES` | Row/column-level security via row filters/column masks | +| **Volume** | `READ_VOLUME`, `WRITE_VOLUME`, `ALL_PRIVILEGES` | For file-level access | +| **External Location** | `READ_FILES`, `WRITE_FILES`, `CREATE_EXTERNAL_TABLE`, `CREATE_MANAGED_STORAGE` | | +| **Storage Credential** | `CREATE_EXTERNAL_LOCATION`, `READ_FILES`, `WRITE_FILES` | | + +--- + +## Workspace Configuration for UC + +```hcl +# Enable UC features on workspace +resource "databricks_workspace_conf" "uc" { + provider = databricks.workspace + # custom_config is map(string) — all values must be quoted strings, including booleans + custom_config = { + "enableIpAccessLists" = "true" + "maxTokenLifetimeDays" = "90" + } +} + +# Set default catalog for workspace users +resource "databricks_default_namespace_setting" "this" { + provider = databricks.workspace + namespace { + value = databricks_catalog.main.name + } +} +``` + +--- + +## Complete UC Deployment — Cloud Comparison + +| Step | AWS | Azure | GCP | +|------|-----|-------|-----| +| **Metastore storage** | S3 bucket | ADLS Gen2 (`is_hns_enabled = true`) | GCS bucket | +| **Metastore storage_root** | `s3://bucket/metastore` | `abfss://container@account.dfs.core.windows.net/` | `gs://bucket/metastore` | +| **Identity for data access** | IAM role with trust policy | Access Connector (system-assigned managed identity) | Databricks-managed GCP SA or existing SA | +| **Account-level provider host** | `accounts.cloud.databricks.com` | `accounts.azuredatabricks.net` | `accounts.gcp.databricks.com` | +| **metastore_data_access block** | `aws_iam_role { role_arn }` | `azure_managed_identity { access_connector_id }` | `databricks_gcp_service_account {}` | +| **Storage credential block** | `aws_iam_role { role_arn }` | `azure_managed_identity { access_connector_id }` | `databricks_gcp_service_account {}` | + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **`METASTORE_ALREADY_EXISTS` in region** | Only one metastore per region; use data source to reference existing: `data "databricks_metastore" "this"` | +| **`grants` removes admin permissions** | `databricks_grants` is authoritative — always include ALL principals including admins | +| **External location validation fails** | Set `skip_validation = true` temporarily, then fix IAM role trust policy | +| **`metastore_assignment` fails with workspace not found** | Workspace must be fully created before assignment; use `depends_on` | +| **Catalog deletion fails** | Use `force_destroy = true` and first delete all child objects, or set `depends_on` order | +| **Azure: `is_hns_enabled` must be true** | ADLS Gen2 hierarchical namespace is required for UC storage root; cannot be changed after creation | +| **Azure: Access Connector role propagation** | After creating connector, allow ~5 minutes for `Storage Blob Data Contributor` role to propagate | +| **Azure: wrong account host** | Use `accounts.azuredatabricks.net` for Azure account provider, NOT `accounts.cloud.databricks.com` | +| **GCP: two-step apply for managed SA** | `databricks_gcp_service_account {}` creates SA on first apply; grant bucket IAM in same or subsequent apply | +| **GCP SA credential validation** | Ensure SA has `roles/storage.objectAdmin` on the target GCS bucket | +| **GCP: storage bucket location** | Use `upper(var.google_region)` for GCS bucket location (GCS uses uppercase region names) | diff --git a/databricks-skills/databricks-terraform-skill/6-databricks-resources.md b/databricks-skills/databricks-terraform-skill/6-databricks-resources.md new file mode 100644 index 00000000..af6003ed --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/6-databricks-resources.md @@ -0,0 +1,1341 @@ +# Databricks Resource Management + +## Overview + +Core Databricks resources manageable via Terraform: +- **Clusters** — interactive and job compute +- **Jobs** — scheduled/triggered workloads +- **SQL Warehouses** — serverless SQL compute +- **Notebooks** — workspace code artifacts +- **Secrets** — secure credential storage +- **Cluster Policies** — governance for cluster creation +- **Databricks Apps** — serverless web apps running inside Databricks +- **Mosaic AI Vector Search** — serverless similarity search engine + +All resources use a **workspace-level provider**. + +--- + +## Clusters + +### All-Purpose Cluster (Interactive) + +```hcl +data "databricks_spark_version" "lts" { + provider = databricks.workspace + long_term_support = true +} + +data "databricks_node_type" "standard" { + provider = databricks.workspace + local_disk = false + min_cores = 4 + gb_per_core = 8 +} + +resource "databricks_cluster" "shared_autoscaling" { + provider = databricks.workspace + cluster_name = "Shared Autoscaling Cluster" + spark_version = data.databricks_spark_version.lts.id + node_type_id = data.databricks_node_type.standard.id + autotermination_minutes = 30 + is_pinned = true + + autoscale { + min_workers = 1 + max_workers = 8 + } + + spark_conf = { + "spark.databricks.cluster.profile" = "serverless" + "spark.databricks.repl.allowedLanguages" = "python,sql,scala" + } + + spark_env_vars = { + "PYSPARK_PYTHON" = "/databricks/python3/bin/python3" + } + + custom_tags = { + "Team" = "data-engineering" + "Environment" = "production" + } +} +``` + +### Single-Node Cluster (ML Development) + +```hcl +resource "databricks_cluster" "ml_single_node" { + provider = databricks.workspace + cluster_name = "ML Dev Single Node" + spark_version = "15.4.x-cpu-ml-scala2.12" # ML runtime + node_type_id = "i3.2xlarge" + num_workers = 0 # Single-node + autotermination_minutes = 60 + + spark_conf = { + "spark.databricks.cluster.profile" = "singleNode" + "spark.master" = "local[*]" + } + + custom_tags = { + "ResourceClass" = "SingleNode" + } +} +``` + +### AWS Cluster with Instance Profile + +```hcl +resource "databricks_instance_profile" "s3_access" { + provider = databricks.workspace + instance_profile_arn = "arn:aws:iam::123456789012:instance-profile/my-s3-profile" +} + +resource "databricks_cluster" "aws_cluster" { + provider = databricks.workspace + cluster_name = "AWS S3 Access Cluster" + spark_version = data.databricks_spark_version.lts.id + node_type_id = "i3.xlarge" + autotermination_minutes = 30 + + autoscale { + min_workers = 2 + max_workers = 10 + } + + aws_attributes { + availability = "SPOT_WITH_FALLBACK" + zone_id = "us-east-1a" + first_on_demand = 2 + spot_bid_price_percent = 100 + instance_profile_arn = databricks_instance_profile.s3_access.id + + ebs_volume_type = "GENERAL_PURPOSE_SSD" + ebs_volume_count = 1 + ebs_volume_size = 100 + } +} +``` + +### Azure Cluster + +```hcl +resource "databricks_cluster" "azure_cluster" { + provider = databricks.workspace + cluster_name = "Azure Spot Cluster" + spark_version = data.databricks_spark_version.lts.id + node_type_id = "Standard_DS3_v2" + autotermination_minutes = 20 + + autoscale { + min_workers = 2 + max_workers = 8 + } + + azure_attributes { + availability = "SPOT_WITH_FALLBACK_AZURE" + first_on_demand = 1 + spot_bid_max_price = 100 # % of on-demand price + } + + library { + pypi { package = "pandas==2.0.0" } + } + library { + pypi { package = "scikit-learn" } + } +} +``` + +### GCP Cluster + +```hcl +resource "databricks_cluster" "gcp_cluster" { + provider = databricks.workspace + cluster_name = "GCP Preemptible Cluster" + spark_version = data.databricks_spark_version.lts.id + node_type_id = "n1-standard-4" + autotermination_minutes = 30 + + autoscale { + min_workers = 2 + max_workers = 10 + } + + gcp_attributes { + availability = "PREEMPTIBLE_WITH_FALLBACK_GCP" + google_service_account = var.cluster_service_account + local_ssd_count = 1 + } +} +``` + +### Cluster with Init Script & Logging + +```hcl +resource "databricks_cluster" "with_init" { + provider = databricks.workspace + cluster_name = "Cluster with Init Script" + spark_version = data.databricks_spark_version.lts.id + node_type_id = data.databricks_node_type.standard.id + autotermination_minutes = 30 + num_workers = 4 + + cluster_log_conf { + s3 { + destination = "s3://my-logs-bucket/cluster-logs" + region = "us-east-1" + enable_encryption = true + } + } + + init_scripts { + s3 { + destination = "s3://my-scripts-bucket/init.sh" + } + } + + # Alternative: workspace init script + init_scripts { + workspace { + destination = "/Shared/init-scripts/setup.sh" + } + } +} +``` + +--- + +## Jobs + +### Simple Notebook Job + +```hcl +resource "databricks_job" "etl_notebook" { + provider = databricks.workspace + name = "Daily ETL Notebook Job" + + task { + task_key = "extract_load" + + notebook_task { + notebook_path = "/Shared/ETL/extract_load" + source = "WORKSPACE" + base_parameters = { + "date" = "{{ds}}" + "environment" = "production" + } + } + + new_cluster { + spark_version = data.databricks_spark_version.lts.id + node_type_id = "i3.xlarge" + num_workers = 4 + autotermination_minutes = 30 + } + } + + schedule { + quartz_cron_expression = "0 0 6 * * ?" # Daily at 6 AM UTC + timezone_id = "America/New_York" + pause_status = "UNPAUSED" + } + + email_notifications { + on_failure = ["data-engineering-alerts@company.com"] + on_success = [] + on_start = [] + } + + max_concurrent_runs = 1 +} +``` + +### Multi-Task Job with Dependencies + +```hcl +resource "databricks_job" "pipeline" { + provider = databricks.workspace + name = "Multi-Stage Pipeline" + + # Shared cluster across tasks + job_cluster { + job_cluster_key = "shared_cluster" + new_cluster { + spark_version = data.databricks_spark_version.lts.id + node_type_id = "i3.xlarge" + num_workers = 4 + } + } + + task { + task_key = "ingest" + job_cluster_key = "shared_cluster" + + python_wheel_task { + package_name = "my_pipeline" + entry_point = "ingest" + named_parameters = { + "source" = "s3://source-bucket/data" + } + } + + library { + whl = "s3://my-artifacts/my_pipeline-1.0.0-py3-none-any.whl" + } + } + + task { + task_key = "transform" + job_cluster_key = "shared_cluster" + + depends_on { + task_key = "ingest" + } + + notebook_task { + notebook_path = "/Shared/Pipeline/transform" + } + } + + task { + task_key = "validate" + job_cluster_key = "shared_cluster" + + depends_on { + task_key = "transform" + } + + spark_python_task { + python_file = "/Shared/Pipeline/validate.py" + source = "WORKSPACE" + parameters = ["--env", "prod"] + } + } + + task { + task_key = "send_report" + + depends_on { + task_key = "validate" + } + + # Serverless — no cluster needed for SQL + sql_task { + warehouse_id = var.sql_warehouse_id + query { query_id = var.report_query_id } + } + } + + schedule { + quartz_cron_expression = "0 0 2 * * ?" + timezone_id = "UTC" + pause_status = "UNPAUSED" + } + + email_notifications { + on_failure = ["pipeline-alerts@company.com"] + } +} +``` + +### File Arrival Trigger Job + +```hcl +resource "databricks_job" "file_arrival" { + provider = databricks.workspace + name = "Process New Files" + + task { + task_key = "process_files" + + notebook_task { + notebook_path = "/Shared/Ingest/process_files" + } + + new_cluster { + spark_version = data.databricks_spark_version.lts.id + node_type_id = "i3.xlarge" + num_workers = 2 + } + } + + trigger { + file_arrival { + url = "s3://landing-bucket/incoming/" + min_time_between_triggers_seconds = 300 + wait_after_last_change_seconds = 120 + } + } +} +``` + +### Git Source Job + +```hcl +resource "databricks_job" "git_job" { + provider = databricks.workspace + name = "Git-Sourced ETL" + + git_source { + url = "https://github.com/my-org/my-pipeline" + provider = "gitHub" + branch = "main" + } + + task { + task_key = "run_etl" + + notebook_task { + notebook_path = "notebooks/etl_main" # Relative to repo root + source = "GIT" + } + + new_cluster { + spark_version = "15.4.x-scala2.12" + node_type_id = "i3.xlarge" + num_workers = 4 + } + } +} +``` + +--- + +## SQL Warehouses + +```hcl +resource "databricks_sql_endpoint" "shared" { + provider = databricks.workspace + name = "Shared SQL Warehouse" + cluster_size = "Small" # 2X-Small, X-Small, Small, Medium, Large, X-Large, 2X-Large, 3X-Large, 4X-Large + max_num_clusters = 3 + auto_stop_mins = 10 + + # Serverless (recommended) + enable_serverless_compute = true + + # Tags + tags { + custom_tags { + key = "Team" + value = "Analytics" + } + } +} + +# Pro warehouse (required for Serverless DLT, Lakeflow) +resource "databricks_sql_endpoint" "pro" { + provider = databricks.workspace + name = "Pro SQL Warehouse" + cluster_size = "Medium" + warehouse_type = "PRO" + max_num_clusters = 5 + auto_stop_mins = 30 + enable_photon = true +} +``` + +--- + +## Notebooks + +```hcl +resource "databricks_notebook" "etl" { + provider = databricks.workspace + path = "/Shared/ETL/main_etl" + language = "PYTHON" + content_base64 = base64encode(<<-EOT + # Databricks notebook source + # COMMAND ---------- + dbutils.widgets.text("date", "2024-01-01", "Processing Date") + date = dbutils.widgets.get("date") + print(f"Processing date: {date}") + + # COMMAND ---------- + df = spark.read.parquet(f"s3://landing-bucket/data/dt={date}") + df.write.mode("overwrite").saveAsTable("main.bronze.raw_events") + EOT + ) +} + +# Import a notebook from a file +resource "databricks_notebook" "from_file" { + provider = databricks.workspace + path = "/Shared/Analysis/report" + source = "${path.module}/notebooks/report.py" + language = "PYTHON" +} +``` + +--- + +## Secrets + +```hcl +# Secret scope (Databricks-backed) +resource "databricks_secret_scope" "app_secrets" { + provider = databricks.workspace + name = "app-secrets" +} + +# Azure Key Vault-backed scope +resource "databricks_secret_scope" "akv" { + provider = databricks.workspace + name = "azure-kv-scope" + + keyvault_metadata { + resource_id = azurerm_key_vault.secrets.id + dns_name = azurerm_key_vault.secrets.vault_uri + } + + backend_type = "AZURE_KEYVAULT" +} + +# Individual secrets +resource "databricks_secret" "db_password" { + provider = databricks.workspace + key = "db-password" + string_value = var.db_password # sensitive variable + scope = databricks_secret_scope.app_secrets.id +} + +resource "databricks_secret" "api_token" { + provider = databricks.workspace + key = "api-token" + string_value = var.api_token + scope = databricks_secret_scope.app_secrets.id +} + +# Grant access to secrets scope +resource "databricks_secret_acl" "readers" { + provider = databricks.workspace + principal = "data_engineers" + permission = "READ" + scope = databricks_secret_scope.app_secrets.id +} +``` + +--- + +## Cluster Policies + +```hcl +resource "databricks_cluster_policy" "data_engineering" { + provider = databricks.workspace + name = "Data Engineering Policy" + + definition = jsonencode({ + "spark_version" = { + "type" = "allowlist" + "values" = ["15.4.x-scala2.12", "14.3.x-scala2.12"] + "defaultValue" = "15.4.x-scala2.12" + } + "node_type_id" = { + "type" = "allowlist" + "values" = ["i3.xlarge", "i3.2xlarge", "i3.4xlarge"] + } + "autotermination_minutes" = { + "type" = "range" + "minValue" = 10 + "maxValue" = 120 + "defaultValue" = 30 + } + "custom_tags.Team" = { + "type" = "fixed" + "value" = "data-engineering" + } + # Enforce single-user mode for security + "data_security_mode" = { + "type" = "fixed" + "value" = "SINGLE_USER" + } + # Limit max workers + "autoscale.max_workers" = { + "type" = "range" + "maxValue" = 20 + } + }) +} + +# Assign policy to a group +resource "databricks_permissions" "policy_users" { + provider = databricks.workspace + cluster_policy_id = databricks_cluster_policy.data_engineering.id + + access_control { + group_name = "data_engineers" + permission_level = "CAN_USE" + } +} +``` + +--- + +## Workspace Permissions + +```hcl +# Cluster permissions +resource "databricks_permissions" "cluster" { + provider = databricks.workspace + cluster_id = databricks_cluster.shared_autoscaling.id + + access_control { + group_name = "data_engineers" + permission_level = "CAN_RESTART" + } + access_control { + group_name = "data_analysts" + permission_level = "CAN_ATTACH_TO" + } +} + +# Job permissions +resource "databricks_permissions" "job" { + provider = databricks.workspace + job_id = databricks_job.etl_notebook.id + + access_control { + group_name = "data_engineers" + permission_level = "CAN_MANAGE" + } + access_control { + group_name = "data_analysts" + permission_level = "CAN_VIEW" + } +} + +# Notebook permissions +resource "databricks_permissions" "notebook" { + provider = databricks.workspace + notebook_id = databricks_notebook.etl.object_id + + access_control { + group_name = "data_engineers" + permission_level = "CAN_EDIT" + } + access_control { + group_name = "data_analysts" + permission_level = "CAN_READ" + } +} + +# SQL Warehouse permissions +resource "databricks_permissions" "sql_warehouse" { + provider = databricks.workspace + sql_endpoint_id = databricks_sql_endpoint.shared.id + + access_control { + group_name = "users" # All workspace users + permission_level = "CAN_USE" + } +} +``` + +--- + +## Workspace Configuration + +```hcl +resource "databricks_workspace_conf" "settings" { + provider = databricks.workspace + custom_config = { + "enableIpAccessLists" = "true" + "maxTokenLifetimeDays" = "90" + "enableTokensConfig" = "true" + } +} +``` + +--- + +## Global Init Script + +```hcl +resource "databricks_global_init_script" "proxy" { + provider = databricks.workspace + name = "Corporate Proxy Setup" + position = 0 + enabled = true + content_base64 = base64encode(<<-EOT + #!/bin/bash + echo "Setting up corporate proxy..." + echo "https_proxy=https://proxy.company.com:8080" >> /etc/environment + EOT + ) +} +``` + +--- + +## Token Management + +```hcl +# Create a service token for automation +resource "databricks_token" "automation" { + provider = databricks.workspace + comment = "Terraform automation token" + lifetime_seconds = 7776000 # 90 days +} + +output "automation_token" { + value = databricks_token.automation.token_value + sensitive = true +} +``` + +--- + +## Databricks Apps + +Databricks Apps are serverless web applications that run **inside** the customer's Databricks instance with direct access to workspace data, compute, and services. Each app gets its own auto-provisioned **service principal** and a managed execution environment. + +**What Terraform manages:** +- App container creation and metadata (`name`, `description`, `compute_size`) +- Resource bindings — what the app's service principal can access (warehouses, jobs, secrets, model endpoints, UC securables, Lakebase, Genie spaces) +- `user_api_scopes` — which Databricks API scopes user tokens passed to the app can exercise + +**What Terraform does NOT manage:** +- App code deployment — use the Databricks CLI (`databricks apps deploy`), Asset Bundles, or a CI/CD pipeline +- App runtime configuration (`app.yaml`) — committed alongside source code + +--- + +### Resource: `databricks_app` + +#### Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `name` | string | Yes | Lowercase alphanumeric + hyphens only; unique within the workspace | +| `description` | string | No | Human-readable description | +| `compute_size` | string | No | `MEDIUM` (default) or `LARGE` | +| `budget_policy_id` | string | No | Budget policy to associate | +| `user_api_scopes` | list(string) | No | API scopes granted to user tokens the app receives | +| `resources` | list(object) | No | Resource bindings — what the app service principal can access | + +#### `resources` List Object + +Each entry requires `name` and exactly **one** resource-type block: + +| Resource Type | Fields | Permissions | +|---------------|--------|-------------| +| `secret` | `scope`, `key` | `READ`, `WRITE`, `MANAGE` | +| `sql_warehouse` | `id` | `CAN_USE`, `CAN_MANAGE`, `IS_OWNER` | +| `serving_endpoint` | `name` | `CAN_VIEW`, `CAN_QUERY`, `CAN_MANAGE` | +| `job` | `id` | `CAN_VIEW`, `CAN_MANAGE_RUN`, `CAN_MANAGE`, `IS_OWNER` | +| `uc_securable` | `securable_type`, `securable_full_name` | e.g. `READ_VOLUME`, `WRITE_VOLUME`, `SELECT` | +| `database` | `database_name`, `instance_name` | `CAN_CONNECT_AND_CREATE` | +| `genie_space` | `name`, `space_id` | `CAN_VIEW`, `CAN_RUN`, `CAN_EDIT`, `CAN_MANAGE` | + +All resource entries also accept an optional `description` field. + +#### Exported Attributes + +| Attribute | Description | +|-----------|-------------| +| `url` | Public URL of the deployed app | +| `app_status.state` | Application state | +| `app_status.message` | Human-readable status message | +| `compute_status.state` | Compute provisioning state | +| `compute_status.message` | Compute status message | +| `service_principal_id` | Numeric ID of the app's auto-created service principal | +| `service_principal_client_id` | OAuth client ID of the service principal | +| `service_principal_name` | Display name of the service principal | +| `default_source_code_path` | Workspace path where app code lives | +| `effective_budget_policy_id` | Applied budget policy | +| `effective_user_api_scopes` | Resolved API scopes | +| `create_time` / `creator` | Creation audit fields | +| `update_time` / `updater` | Last-update audit fields | + +--- + +### Pattern 1: Minimal App + +Just a name — binds no resources. Use this to bootstrap the app container before deploying code. + +```hcl +resource "databricks_app" "my_app" { + name = "my-app" + description = "My custom Databricks application" +} + +output "app_url" { + value = databricks_app.my_app.url +} + +output "app_service_principal_id" { + value = databricks_app.my_app.service_principal_id + description = "Grant this SP access to additional UC objects outside the resources block" +} +``` + +--- + +### Pattern 2: RAG / AI App with Model Serving + SQL Warehouse + Secret + +A typical Retrieval-Augmented Generation app that queries a vector index via a warehouse, calls a model serving endpoint, and reads an API key from secrets. + +```hcl +resource "databricks_app" "rag_app" { + name = "rag-assistant" + description = "RAG application using Llama 3.1 and Vector Search" + compute_size = "MEDIUM" + + resources = [ + { + name = "llm-endpoint" + description = "Foundation model for generation" + serving_endpoint = { + name = "databricks-meta-llama-3-1-70b-instruct" + permission = "CAN_QUERY" + } + }, + { + name = "analytics-warehouse" + description = "SQL warehouse for structured queries" + sql_warehouse = { + id = var.sql_warehouse_id + permission = "CAN_USE" + } + }, + { + name = "openai-api-key" + description = "External API key stored in secrets" + secret = { + scope = "app-secrets" + key = "openai_api_key" + permission = "READ" + } + }, + ] +} + +output "rag_app_url" { + value = databricks_app.rag_app.url +} +``` + +--- + +### Pattern 3: Data App with Jobs + UC Volume + Lakebase + +An app that triggers a data pipeline job, reads from a UC volume, and connects to a Lakebase database. + +```hcl +resource "databricks_app" "data_app" { + name = "data-pipeline-app" + description = "Triggers ETL jobs and reads from UC volumes" + + resources = [ + { + name = "etl-job" + job = { + id = var.etl_job_id + permission = "CAN_MANAGE_RUN" # trigger runs but not edit + } + }, + { + name = "output-volume" + uc_securable = { + securable_type = "volume" + securable_full_name = "main.silver.output_volume" + permission = "READ_VOLUME" + } + }, + { + name = "app-database" + database = { + database_name = "app_db" + instance_name = var.lakebase_instance_name # databricks_database_instance.this.name + permission = "CAN_CONNECT_AND_CREATE" + } + }, + ] +} +``` + +--- + +### Pattern 4: Dashboard App with Genie Space + +An app that surfaces a Genie AI/BI space with controlled access levels. + +```hcl +resource "databricks_app" "dashboard_app" { + name = "sales-dashboard" + description = "Sales analytics powered by Genie" + + resources = [ + { + name = "sales-genie" + genie_space = { + name = "Sales Analytics Genie" + space_id = var.genie_space_id + permission = "CAN_RUN" + } + }, + { + name = "reporting-warehouse" + sql_warehouse = { + id = var.sql_warehouse_id + permission = "CAN_USE" + } + }, + ] +} +``` + +--- + +### Pattern 5: App with All Resource Types + +Comprehensive example showing every resource-binding type in one app. + +```hcl +resource "databricks_app" "full_app" { + name = "enterprise-app" + description = "Full-featured enterprise application" + compute_size = "LARGE" + + resources = [ + # SQL Warehouse + { + name = "primary-warehouse" + sql_warehouse = { + id = var.sql_warehouse_id + permission = "CAN_USE" + } + }, + # Model serving endpoint + { + name = "llm" + serving_endpoint = { + name = "databricks-dbrx-instruct" + permission = "CAN_QUERY" + } + }, + # Scheduled job + { + name = "refresh-job" + job = { + id = var.refresh_job_id + permission = "CAN_MANAGE_RUN" + } + }, + # Databricks secret + { + name = "db-password" + secret = { + scope = "prod-secrets" + key = "db_password" + permission = "READ" + } + }, + # UC volume + { + name = "data-volume" + uc_securable = { + securable_type = "volume" + securable_full_name = "main.gold.reports_volume" + permission = "READ_VOLUME" + } + }, + # Lakebase instance + { + name = "app-db" + database = { + database_name = "app_state_db" + instance_name = var.lakebase_instance_name + permission = "CAN_CONNECT_AND_CREATE" + } + }, + # Genie space + { + name = "analytics-genie" + genie_space = { + name = "Enterprise Analytics" + space_id = var.genie_space_id + permission = "CAN_RUN" + } + }, + ] + + user_api_scopes = ["sql", "serving-endpoints"] +} +``` + +--- + +### Deployment Workflow + +Terraform provisions the app container; code deployment is a separate step: + +```bash +# 1. Apply Terraform to create the app container +terraform apply + +# 2. Deploy app code using Databricks CLI +databricks apps deploy my-app --source-code-path /Workspace/Users/me/my-app + +# 3. Or use Asset Bundles (recommended for CI/CD) +# In databricks.yml: +# bundle: +# name: my-app +# resources: +# apps: +# my_app: +# name: my-app +# source_code_path: ./app +``` + +--- + +### Import + +```hcl +import { + to = databricks_app.this + id = "my-app" # app name +} +``` + +```bash +terraform import databricks_app.this my-app +``` + +--- + +## Mosaic AI Vector Search + +Mosaic AI Vector Search is a serverless similarity search engine built into Databricks. It stores vector representations of data alongside metadata for semantic/embedding search. Two resources are required: an **endpoint** (compute) and one or more **indexes** (data + vectors). + +### Resource Hierarchy + +``` +databricks_vector_search_endpoint (shared compute — one per team/project) +└── databricks_vector_search_index (the searchable vector index — DELTA_SYNC or DIRECT_ACCESS) +``` + +--- + +### Resource: `databricks_vector_search_endpoint` + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `name` | string | Yes | Endpoint name. Changing forces recreation. | +| `endpoint_type` | string | Yes | Currently only `STANDARD`. Changing forces recreation. | +| `budget_policy_id` | string | No | Budget policy to associate | + +**Exported attributes:** + +| Attribute | Description | +|-----------|-------------| +| `id` | Endpoint name | +| `endpoint_id` | Internal UUID | +| `creator` | Creator email | +| `creation_timestamp` | Creation time (ms) | +| `num_indexes` | Number of indexes on this endpoint | +| `endpoint_status.state` | `PROVISIONING`, `ONLINE`, or `OFFLINE` | +| `endpoint_status.message` | Human-readable status message | + +--- + +### Resource: `databricks_vector_search_index` + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `name` | string | Yes | Three-level UC name: `catalog.schema.index_name` | +| `endpoint_name` | string | Yes | Name of the Vector Search endpoint | +| `primary_key` | string | Yes | Column used as the primary key | +| `index_type` | string | Yes | `DELTA_SYNC` (auto-syncs from a Delta table) or `DIRECT_ACCESS` (manual upsert/delete via API) | +| `delta_sync_index_spec` | block | Conditional | Required when `index_type = "DELTA_SYNC"` | +| `direct_access_index_spec` | block | Conditional | Required when `index_type = "DIRECT_ACCESS"` | + +#### `delta_sync_index_spec` Block + +| Field | Required | Description | +|-------|----------|-------------| +| `source_table` | Yes | Delta table to sync from (three-level UC name) | +| `pipeline_type` | No | `TRIGGERED` (manual refresh) or `CONTINUOUS` (real-time stream). Default: `TRIGGERED` | +| `columns_to_sync` | No | List of columns to include; defaults to all | +| `embedding_writeback_table` | No | Write embeddings back to a Delta table for inspection | +| `embedding_source_columns` | Conditional | Text columns for model-computed embeddings (use this OR `embedding_vector_columns`) | +| `embedding_vector_columns` | Conditional | Pre-computed vector columns (use this OR `embedding_source_columns`) | + +#### `embedding_source_columns` Sub-block + +```hcl +embedding_source_columns { + name = "text_column" # column with raw text + embedding_model_endpoint_name = "gte-large-en" # model serving endpoint + # model_endpoint_name_for_query = "..." # optional: different model for querying +} +``` + +#### `embedding_vector_columns` Sub-block + +```hcl +embedding_vector_columns { + name = "embedding" # column containing pre-computed vectors + embedding_dimension = 1024 # vector dimensionality +} +``` + +#### `direct_access_index_spec` Block + +```hcl +direct_access_index_spec { + schema_json = jsonencode({ + "primary_key" = "id" + "columns" = [ + { "name" = "id", "data_type" = "long" }, + { "name" = "text", "data_type" = "string" }, + { "name" = "embedding", "data_type" = "array", "embedding_dimension" = 1024 } + ] + }) + embedding_vector_columns { + name = "embedding" + embedding_dimension = 1024 + } +} +``` + +**Exported attributes:** + +| Attribute | Description | +|-----------|-------------| +| `id` | Index name | +| `creator` | Creator email | +| `delta_sync_index_spec.pipeline_id` | DLT pipeline ID managing the sync | +| `status.ready` | `true` when index is ready for queries | +| `status.indexed_row_count` | Number of rows indexed | +| `status.index_url` | REST API URL for direct index operations | +| `status.message` | Status description | + +--- + +### Pattern 1: DELTA_SYNC with Model-Computed Embeddings + +The most common pattern — Databricks computes embeddings from a text column using a model serving endpoint and auto-syncs from a Delta table. + +```hcl +# 1. Vector Search endpoint (shared compute) +resource "databricks_vector_search_endpoint" "this" { + name = "${var.prefix}-vs-endpoint" + endpoint_type = "STANDARD" +} + +# 2. DELTA_SYNC index — model computes embeddings from the 'content' column +resource "databricks_vector_search_index" "documents" { + name = "main.rag.documents_index" + endpoint_name = databricks_vector_search_endpoint.this.name + primary_key = "id" + index_type = "DELTA_SYNC" + + delta_sync_index_spec { + source_table = "main.rag.documents" # source Delta table (UC three-level name) + pipeline_type = "TRIGGERED" # refresh on demand (use CONTINUOUS for real-time) + + embedding_source_columns { + name = "content" # text column to embed + embedding_model_endpoint_name = "gte-large-en" # model serving endpoint name + } + + columns_to_sync = ["id", "content", "title", "category", "last_updated"] + } +} + +output "index_ready" { + value = databricks_vector_search_index.documents.status.ready +} +``` + +--- + +### Pattern 2: DELTA_SYNC with Pre-Computed Embeddings + +Use when embeddings are already computed and stored as a vector column in the Delta table. + +```hcl +resource "databricks_vector_search_endpoint" "this" { + name = "${var.prefix}-vs-endpoint" + endpoint_type = "STANDARD" +} + +resource "databricks_vector_search_index" "precomputed" { + name = "main.ml.embeddings_index" + endpoint_name = databricks_vector_search_endpoint.this.name + primary_key = "chunk_id" + index_type = "DELTA_SYNC" + + delta_sync_index_spec { + source_table = "main.ml.document_embeddings" + pipeline_type = "CONTINUOUS" # real-time sync as table is updated + + embedding_vector_columns { + name = "embedding" # ArrayType(FloatType()) column + embedding_dimension = 1024 + } + + columns_to_sync = ["chunk_id", "text", "source_url", "embedding"] + + # Optional: write index contents back to a Delta table for inspection/debugging + embedding_writeback_table = "main.ml.embeddings_index_writeback" + } +} +``` + +--- + +### Pattern 3: DIRECT_ACCESS Index + +For applications that control exactly which vectors are stored via the REST API (upsert/delete). No source Delta table required. + +```hcl +resource "databricks_vector_search_endpoint" "this" { + name = "${var.prefix}-vs-endpoint" + endpoint_type = "STANDARD" +} + +resource "databricks_vector_search_index" "custom" { + name = "main.search.custom_index" + endpoint_name = databricks_vector_search_endpoint.this.name + primary_key = "doc_id" + index_type = "DIRECT_ACCESS" + + direct_access_index_spec { + schema_json = jsonencode({ + primary_key = "doc_id" + columns = [ + { name = "doc_id", data_type = "string" }, + { name = "text", data_type = "string" }, + { name = "category", data_type = "string" }, + { name = "embedding", data_type = "array" } + ] + }) + + embedding_vector_columns { + name = "embedding" + embedding_dimension = 1536 # e.g. OpenAI text-embedding-3-small + } + } +} + +output "index_api_url" { + value = databricks_vector_search_index.custom.status.index_url + description = "REST API URL for upsert/delete/query operations" +} +``` + +--- + +### Pattern 4: Multiple Indexes on One Endpoint + +A single endpoint can host multiple indexes for different use cases. + +```hcl +resource "databricks_vector_search_endpoint" "shared" { + name = "${var.prefix}-shared-vs" + endpoint_type = "STANDARD" +} + +locals { + indexes = { + products = { + uc_name = "main.catalog.products_vs_index" + source = "main.catalog.products" + text_col = "description" + pk = "product_id" + model = "gte-large-en" + } + articles = { + uc_name = "main.knowledge.articles_vs_index" + source = "main.knowledge.articles" + text_col = "body" + pk = "article_id" + model = "gte-large-en" + } + } +} + +resource "databricks_vector_search_index" "indexes" { + for_each = local.indexes + + name = each.value.uc_name + endpoint_name = databricks_vector_search_endpoint.shared.name + primary_key = each.value.pk + index_type = "DELTA_SYNC" + + delta_sync_index_spec { + source_table = each.value.source + pipeline_type = "TRIGGERED" + + embedding_source_columns { + name = each.value.text_col + embedding_model_endpoint_name = each.value.model + } + } +} + +output "index_statuses" { + value = { for k, v in databricks_vector_search_index.indexes : k => v.status.ready } +} +``` + +--- + +### UC Grants for Vector Search + +Vector Search indexes live in Unity Catalog — use `databricks_grants` to control access. + +```hcl +resource "databricks_grants" "vs_index" { + table = databricks_vector_search_index.documents.name # index is a UC object + + grant { + principal = "data_engineers" + privileges = ["ALL_PRIVILEGES"] + } + + grant { + principal = "data_analysts" + privileges = ["SELECT"] # query the index + } +} +``` + +--- + +### Import (Vector Search) + +```bash +terraform import databricks_vector_search_endpoint.this +terraform import databricks_vector_search_index.documents "catalog.schema.index_name" +``` + +--- + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **Cluster creation fails with `CLOUD_FAILURE`** | Check IAM instance profile (AWS) or managed identity (Azure) permissions | +| **`cannot use workspace-level provider`** | Resources like clusters/jobs require workspace provider, not account-level | +| **Job schedule not triggering** | Verify `pause_status = "UNPAUSED"` and cron expression is valid Quartz format | +| **SQL warehouse in `STARTING` state forever** | Check serverless enablement for workspace; fall back to Classic if not enabled | +| **Notebook import fails** | Ensure `content_base64` is properly encoded; use `filebase64()` for file imports | +| **`CAN_RESTART` vs `CAN_MANAGE`** | Permission hierarchy: CAN_ATTACH_TO < CAN_RESTART < CAN_MANAGE | +| **Secrets not accessible in cluster** | Verify `databricks_secret_acl` grants READ to the correct group/user | +| **Policy assignment fails** | Cluster policy requires `CAN_USE` grant; only account admin can create policies | +| **App `url` is empty after apply** | App URL is populated only after first code deployment; run `databricks apps deploy` then refresh state | +| **App service principal needs additional UC grants** | Use `service_principal_id` output to grant extra UC privileges outside the `resources` block | +| **`resources` block with wrong permission value** | Each resource type has its own permission enum — check the table in the Databricks Apps section | +| **App code not updating on `terraform apply`** | Terraform only manages the container/bindings; redeploy code via CLI or Asset Bundles | +| **VS endpoint stuck in `PROVISIONING`** | Endpoint provisioning can take 10–15 min; poll `endpoint_status.state` | +| **VS index `status.ready = false`** | Index sync not yet complete; wait for the DLT pipeline run to finish | +| **`CONTINUOUS` pipeline not catching up** | Check the underlying DLT pipeline in the Delta Live Tables UI for errors | +| **`embedding_model_endpoint_name` not found** | The model serving endpoint must already exist; use `depends_on` if creating it in the same stack | +| **`DIRECT_ACCESS` upsert fails after `terraform apply`** | Index may still be initializing; check `status.ready` before writing | +| **Grants on VS index fail** | Vector Search indexes are UC objects — ensure UC is enabled and the index name uses three-level format | diff --git a/databricks-skills/databricks-terraform-skill/7-iam-permissions.md b/databricks-skills/databricks-terraform-skill/7-iam-permissions.md new file mode 100644 index 00000000..f4eb8c69 --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/7-iam-permissions.md @@ -0,0 +1,519 @@ +# IAM & Permissions Management + +## Overview + +Databricks has two permission layers: +1. **Unity Catalog permissions** (`databricks_grants`) — data-level access (catalogs, schemas, tables, volumes) +2. **Workspace permissions** (`databricks_permissions`) — compute-level access (clusters, jobs, notebooks, SQL warehouses) + +And two identity levels: +- **Account-level** — users, groups, and service principals managed at the Databricks account (via account-level provider) +- **Workspace-level** — the same identities added to specific workspaces + +--- + +## Users + +### Create Account-Level Users + +```hcl +# Create multiple users (account-level provider required) +resource "databricks_user" "users" { + provider = databricks.mws + for_each = toset(var.user_emails) + user_name = each.key + force = true # Don't fail if user already exists +} + +# Single user +resource "databricks_user" "admin_user" { + provider = databricks.mws + user_name = "admin@company.com" + display_name = "Admin User" + force = true +} + +# Assign account admin role +resource "databricks_user_role" "admin" { + provider = databricks.mws + user_id = databricks_user.admin_user.id + role = "account_admin" +} +``` + +### Add User to Workspace + +```hcl +# Add user to workspace (workspace-level provider) +resource "databricks_user" "workspace_user" { + provider = databricks.workspace + user_name = "analyst@company.com" + display_name = "Data Analyst" +} +``` + +--- + +## Groups + +### Create Groups + +```hcl +# Account-level group (for UC grants) +resource "databricks_group" "data_engineers" { + provider = databricks.mws + display_name = "data_engineers" +} + +resource "databricks_group" "data_analysts" { + provider = databricks.mws + display_name = "data_analysts" +} + +resource "databricks_group" "ml_team" { + provider = databricks.mws + display_name = "ml_team" +} + +resource "databricks_group" "pii_access" { + provider = databricks.mws + display_name = "pii_access_group" +} +``` + +### Add Members to Groups + +```hcl +# Add individual users to group +resource "databricks_group_member" "engineers" { + provider = databricks.mws + for_each = toset(var.engineer_emails) + group_id = databricks_group.data_engineers.id + member_id = databricks_user.users[each.value].id +} + +# Nested groups (group within a group) +resource "databricks_group" "all_data_teams" { + provider = databricks.mws + display_name = "all_data_teams" +} + +resource "databricks_group_member" "engineers_in_all" { + provider = databricks.mws + group_id = databricks_group.all_data_teams.id + member_id = databricks_group.data_engineers.id +} + +resource "databricks_group_member" "analysts_in_all" { + provider = databricks.mws + group_id = databricks_group.all_data_teams.id + member_id = databricks_group.data_analysts.id +} +``` + +### Sync Groups from Cloud Identity Providers + +For large organizations, use group sync rather than Terraform for individual membership. Terraform manages group creation and workspace assignment; Azure AD/Okta/etc. syncs members. + +```hcl +# Create the group in Databricks (members synced from IdP via SCIM) +resource "databricks_group" "synced_group" { + provider = databricks.mws + display_name = "data-team" + allow_instance_pool_create = false + allow_cluster_create = false +} + +# Look up externally-synced group (read-only) +data "databricks_group" "existing" { + provider = databricks.workspace + display_name = "data-team" +} +``` + +--- + +## Service Principals + +### Create Service Principal + +```hcl +# Account-level service principal (for CI/CD, automation) +resource "databricks_service_principal" "ci_cd" { + provider = databricks.mws + display_name = "CI/CD Pipeline SP" + allow_cluster_create = false # Restrict to only what's needed +} + +# Workspace-level service principal +resource "databricks_service_principal" "workspace_sp" { + provider = databricks.workspace + application_id = var.sp_application_id # Azure AD app ID or AWS IAM + display_name = "Workspace Automation SP" +} + +# Grant account admin to SP (for UC admin operations) +resource "databricks_service_principal_role" "sp_admin" { + provider = databricks.mws + service_principal_id = databricks_service_principal.ci_cd.id + role = "account_admin" +} +``` + +### Service Principal OAuth Secrets + +```hcl +# Create OAuth secret for SP (account-level) +resource "databricks_service_principal_secret" "ci_cd" { + provider = databricks.mws + service_principal_id = databricks_service_principal.ci_cd.id +} + +output "sp_client_id" { + value = databricks_service_principal.ci_cd.application_id +} + +output "sp_client_secret" { + value = databricks_service_principal_secret.ci_cd.secret + sensitive = true +} +``` + +--- + +## Workspace Assignment + +```hcl +# Add group to workspace with specific entitlements +resource "databricks_mws_workspace_assignment" "engineers" { + provider = databricks.mws + workspace_id = var.workspace_id + principal_id = databricks_group.data_engineers.id + + permissions = ["USER"] # "USER" or "ADMIN" +} + +resource "databricks_mws_workspace_assignment" "sp_admin" { + provider = databricks.mws + workspace_id = var.workspace_id + principal_id = databricks_service_principal.ci_cd.id + + permissions = ["ADMIN"] +} +``` + +--- + +## Entitlements + +```hcl +# Allow a group to create clusters +resource "databricks_entitlements" "cluster_create" { + provider = databricks.workspace + group_id = databricks_group.data_engineers.id + allow_cluster_create = true + allow_instance_pool_create = true +} + +# Allow SP to create clusters and instance pools +resource "databricks_entitlements" "sp_entitlements" { + provider = databricks.workspace + service_principal_id = databricks_service_principal.ci_cd.id + allow_cluster_create = true + databricks_sql_access = true + workspace_access = true +} + +# Analyst — SQL-only access +resource "databricks_entitlements" "analyst_entitlements" { + provider = databricks.workspace + group_id = databricks_group.data_analysts.id + allow_cluster_create = false + databricks_sql_access = true # Access to DBSQL + workspace_access = true +} +``` + +--- + +## Workspace-Level Permissions Matrix + +### Permission Levels by Resource + +| Resource | Levels | Notes | +|----------|--------|-------| +| **Cluster** | `CAN_ATTACH_TO`, `CAN_RESTART`, `CAN_MANAGE` | Workspace admins can manage all clusters | +| **Job** | `CAN_VIEW`, `CAN_MANAGE_RUN`, `IS_OWNER`, `CAN_MANAGE` | Job owner auto-gets `IS_OWNER` | +| **Notebook** | `CAN_READ`, `CAN_RUN`, `CAN_EDIT`, `CAN_MANAGE` | | +| **Directory** | `CAN_READ`, `CAN_RUN`, `CAN_EDIT`, `CAN_MANAGE` | Applies to all objects within | +| **SQL Warehouse** | `CAN_USE`, `CAN_MANAGE` | | +| **Cluster Policy** | `CAN_USE` | Allows creating clusters under the policy | +| **Secret Scope** | `READ`, `WRITE`, `MANAGE` | | +| **Instance Pool** | `CAN_ATTACH_TO`, `CAN_MANAGE` | | +| **Dashboard** | `CAN_VIEW`, `CAN_RUN`, `CAN_EDIT`, `CAN_MANAGE` | | +| **Alert** | `CAN_VIEW`, `CAN_RUN`, `CAN_EDIT`, `CAN_MANAGE` | | + +### Complete Permissions Example + +```hcl +# Data Engineering workspace permissions setup +locals { + workspace_permissions = { + cluster = { + resource_type = "cluster_id" + resource_id = databricks_cluster.shared_autoscaling.id + groups = { + "data_engineers" = "CAN_RESTART" + "data_analysts" = "CAN_ATTACH_TO" + "ml_team" = "CAN_RESTART" + } + } + } +} + +resource "databricks_permissions" "cluster_permissions" { + provider = databricks.workspace + cluster_id = databricks_cluster.shared_autoscaling.id + + dynamic "access_control" { + for_each = { + "data_engineers" = "CAN_RESTART" + "data_analysts" = "CAN_ATTACH_TO" + "ml_team" = "CAN_RESTART" + } + content { + group_name = access_control.key + permission_level = access_control.value + } + } +} +``` + +--- + +## Unity Catalog Grants (Data Permissions) + +> See [5-unity-catalog.md](5-unity-catalog.md) for full UC grants reference. + +### Quick Reference: Grant All Standard Roles + +```hcl +# Metastore: Admin group gets full control +resource "databricks_grants" "metastore_grants" { + provider = databricks.workspace + metastore = databricks_metastore.this.id + + grant { + principal = "data_engineers" + privileges = ["CREATE_CATALOG", "CREATE_EXTERNAL_LOCATION", "CREATE_STORAGE_CREDENTIAL"] + } +} + +# Production catalog: layered access +resource "databricks_grants" "prod_catalog" { + provider = databricks.workspace + catalog = "prod" + + grant { + principal = "data_engineers" + privileges = ["USE_CATALOG", "CREATE_SCHEMA"] + } + grant { + principal = "data_analysts" + privileges = ["USE_CATALOG"] + } + grant { + principal = "ml_team" + privileges = ["USE_CATALOG"] + } +} + +# Silver schema: transform layer +resource "databricks_grants" "silver" { + provider = databricks.workspace + schema = "prod.silver" + + grant { + principal = "data_engineers" + privileges = ["USE_SCHEMA", "CREATE_TABLE", "SELECT", "MODIFY"] + } + grant { + principal = "ml_team" + privileges = ["USE_SCHEMA", "SELECT"] + } +} + +# Gold schema: business layer (analysts read-only) +resource "databricks_grants" "gold" { + provider = databricks.workspace + schema = "prod.gold" + + grant { + principal = "data_engineers" + privileges = ["USE_SCHEMA", "CREATE_TABLE", "SELECT", "MODIFY"] + } + grant { + principal = "data_analysts" + privileges = ["USE_SCHEMA", "SELECT"] + } + grant { + principal = "ml_team" + privileges = ["USE_SCHEMA", "SELECT"] + } +} +``` + +--- + +## SCIM Integration (Enterprise IdP Sync) + +For large organizations, use SCIM to sync users/groups from Azure AD, Okta, or other IdPs instead of managing individuals in Terraform. + +```hcl +# Terraform manages SCIM configuration; IdP manages user/group membership + +# Azure AD — enable SCIM provisioning endpoint +# (This is configured in Azure AD Enterprise App, not in Terraform) +# Terraform only creates the groups that IdP will sync members into: + +resource "databricks_group" "scim_groups" { + provider = databricks.mws + for_each = toset([ + "data_engineers", + "data_analysts", + "ml_team", + "workspace_admins" + ]) + display_name = each.key +} +``` + +--- + +## Row-Level & Column-Level Security + +Databricks UC supports row filters and column masks — set via SQL, not Terraform directly, but the underlying permissions are managed through grants. + +```hcl +# Grant SELECT but row filters/column masks are applied via SQL functions +resource "databricks_grants" "customer_table" { + provider = databricks.workspace + table = "prod.gold.customer_transactions" + + grant { + principal = "data_analysts" + privileges = ["SELECT"] + # Row filter applied via: ALTER TABLE ... SET ROW FILTER ... + # Column mask via: ALTER TABLE ... ALTER COLUMN ... SET MASK ... + } +} + +# The SQL row filter function must also be granted EXECUTE +resource "databricks_grants" "row_filter_fn" { + provider = databricks.workspace + function = "prod.security.customer_row_filter" + + grant { + principal = "data_analysts" + privileges = ["EXECUTE"] + } +} +``` + +--- + +## Token Policies + +```hcl +# Restrict token lifetime for all users +resource "databricks_workspace_conf" "token_policy" { + provider = databricks.workspace + custom_config = { + "maxTokenLifetimeDays" = "90" + "enableTokensConfig" = "true" + } +} + +# Manage which service principals can generate tokens +resource "databricks_obo_token" "sp_token" { + provider = databricks.workspace + application_id = databricks_service_principal.ci_cd.application_id + comment = "Token for CI/CD pipeline" + lifetime_seconds = 86400 # 1 day +} + +output "sp_obo_token" { + value = databricks_obo_token.sp_token.token_value + sensitive = true +} +``` + +--- + +## Best Practices + +### 1. Use Groups, Not Individual Users + +```hcl +# GOOD — grant to groups +resource "databricks_grants" "gold_schema" { + schema = "prod.gold" + grant { + principal = "data_analysts" # group + privileges = ["USE_SCHEMA", "SELECT"] + } +} + +# AVOID — granting to individual users creates maintenance burden +resource "databricks_grants" "gold_schema_bad" { + schema = "prod.gold" + grant { + principal = "alice@company.com" # user — hard to maintain + privileges = ["USE_SCHEMA", "SELECT"] + } +} +``` + +### 2. Principle of Least Privilege + +```hcl +# Only grant what's needed +resource "databricks_entitlements" "analyst" { + group_id = databricks_group.data_analysts.id + allow_cluster_create = false # Analysts use shared warehouses + databricks_sql_access = true + workspace_access = true +} +``` + +### 3. Separate Account vs Workspace Groups + +```hcl +# Account-level (for UC grants) — use databricks.mws +resource "databricks_group" "uc_group" { + provider = databricks.mws + display_name = "data_engineers" +} + +# Workspace-level entitlements — use databricks.workspace +resource "databricks_entitlements" "ws_entitlements" { + provider = databricks.workspace + group_id = databricks_group.uc_group.id + allow_cluster_create = true +} +``` + +--- + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **`User already exists` when creating** | Set `force = true` on `databricks_user` to allow creation even if user exists | +| **Group not visible in workspace** | Account-level groups must be assigned to workspace via `databricks_mws_workspace_assignment` | +| **`grants` removes metastore admin privileges** | `databricks_grants` is authoritative — always include metastore admin group in every grants resource on that securable | +| **Service principal can't authenticate** | Ensure SP has been assigned to workspace and has `workspace_access = true` entitlement | +| **SCIM-synced users cannot be modified via Terraform** | SCIM-managed users are read-only in Terraform; manage via IdP | +| **`CAN_MANAGE` on job fails** | Job owner (`IS_OWNER`) is set at creation; transfer ownership via UI or API if needed | +| **Entitlements conflict** | `databricks_entitlements` is an update to existing entitlements; importing existing resources is recommended before managing | +| **OBO token requires workspace admin** | Only workspace admins can create OBO tokens for service principals | diff --git a/databricks-skills/databricks-terraform-skill/8-lakebase.md b/databricks-skills/databricks-terraform-skill/8-lakebase.md new file mode 100644 index 00000000..11eeb6c0 --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/8-lakebase.md @@ -0,0 +1,930 @@ +# Lakebase — Managed Postgres on Databricks + +## Overview + +**Lakebase** is Databricks' managed PostgreSQL service. There are **two distinct models** with different resource APIs: + +| Model | Resources | Scaling | Branching | Best For | +|-------|-----------|---------|-----------|----------| +| **Classic** | `databricks_database_instance` | Fixed tiers (CU_1–CU_8) | Child instances via `parent_instance_ref` | Simple managed Postgres, HA with replicas | +| **Autoscaling** | `databricks_postgres_project` + `databricks_postgres_branch` + `databricks_postgres_endpoint` | True autoscaling (min/max CU, auto-suspend) | Copy-on-write branching (instant, storage-efficient) | Multi-env development, ephemeral branches, cost-optimized serverless Postgres | + +Choose **Autoscaling** for new deployments — it provides true autoscaling, instant copy-on-write branches for dev/staging/PITR, and suspend-on-idle behavior. + +--- + +## Provider Requirements + +```hcl +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + version = "~> 1.67.0" # postgres_project/branch/endpoint available from ~1.65+ + } + } +} + +# Workspace-level provider (all Lakebase resources are workspace-scoped) +provider "databricks" { + host = var.databricks_host + token = var.databricks_token +} +``` + +> **Important**: All Lakebase resources are workspace-level — no account-level provider alias is needed. + +--- + +--- + +# Part 1: Lakebase Classic (`databricks_database_instance`) + +## Resource: `databricks_database_instance` + +### Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `name` | string | Yes | Unique name for the database instance within the workspace | +| `capacity` | string | No | Compute capacity: `CU_1`, `CU_2`, `CU_4`, `CU_8` (default: `CU_1`) | +| `node_count` | number | No | Number of nodes. Set `> 1` for high availability | +| `enable_readable_secondaries` | bool | No | Allow read queries on secondary nodes (requires `node_count > 1`) | +| `enable_pg_native_login` | bool | No | Enable native Postgres username/password authentication | +| `retention_window_in_days` | number | No | PITR retention: 2–35 days (default: 7) | +| `stopped` | bool | No | Stop the instance without deleting it (for cost savings) | +| `usage_policy_id` | string | No | ID of the usage policy to apply | +| `custom_tags` | list(object) | No | List of `{ key = string, value = string }` tag pairs | +| `parent_instance_ref` | block | No | Reference to parent instance for PITR child instances | + +### `parent_instance_ref` Block + +```hcl +parent_instance_ref { + name = databricks_database_instance.production.name +} +``` + +### Exported Attributes + +| Attribute | Description | +|-----------|-------------| +| `id` | The instance name (same as `name`) | +| `creation_time` | RFC3339 timestamp of when the instance was created | +| `creator` | Email/identity of the creator | +| `child_instance_refs` | List of child (PITR) instances referencing this instance | +| `effective_capacity` | Resolved capacity after defaults are applied | +| `effective_enable_pg_native_login` | Resolved value for native login setting | +| `effective_enable_readable_secondaries` | Resolved value for readable secondaries | +| `effective_stopped` | Current stopped state | +| `effective_custom_tags` | Merged tag list including system-applied tags | + +--- + +## Classic Pattern 1: Development / Test Instance + +```hcl +resource "databricks_database_instance" "dev" { + name = "${var.prefix}-dev-db" + capacity = "CU_1" + + custom_tags = [ + { key = "environment", value = "dev" }, + { key = "team", value = var.team_name }, + ] +} + +output "dev_db_name" { + value = databricks_database_instance.dev.name +} +``` + +--- + +## Classic Pattern 2: Production High-Availability Instance + +Multi-node instance with readable secondaries, increased retention, and native Postgres login. + +```hcl +resource "databricks_database_instance" "production" { + name = "${var.prefix}-prod-db" + capacity = "CU_8" + node_count = 2 + enable_readable_secondaries = true + enable_pg_native_login = true + retention_window_in_days = 35 + + custom_tags = [ + { key = "environment", value = "production" }, + { key = "team", value = var.team_name }, + { key = "cost-center", value = var.cost_center }, + ] + + lifecycle { + prevent_destroy = true + } +} + +output "prod_db_name" { + value = databricks_database_instance.production.name + description = "Lakebase production database instance name" +} +``` + +--- + +## Classic Pattern 3: PITR Child Instance + +```hcl +# Parent with PITR retention enabled +resource "databricks_database_instance" "production" { + name = "${var.prefix}-prod-db" + capacity = "CU_4" + retention_window_in_days = 14 +} + +# Child — cloned from parent; restore timestamp configured via UI or REST API after apply +resource "databricks_database_instance" "restore" { + name = "${var.prefix}-prod-db-restore" + capacity = "CU_4" + + parent_instance_ref { + name = databricks_database_instance.production.name + } + + custom_tags = [ + { key = "type", value = "pitr-restore" }, + { key = "parent", value = databricks_database_instance.production.name }, + ] +} +``` + +> **Note**: Terraform creates the child instance shell; the specific restore point is set in the Databricks UI or via the REST API after `terraform apply`. + +--- + +## Classic Pattern 4: Stop/Start Lifecycle (Cost Management) + +```hcl +variable "instance_stopped" { + type = bool + default = false +} + +resource "databricks_database_instance" "staging" { + name = "${var.prefix}-staging-db" + capacity = "CU_2" + stopped = var.instance_stopped +} +``` + +```bash +terraform apply -var="instance_stopped=true" # stop +terraform apply -var="instance_stopped=false" # resume +``` + +--- + +## Classic Capacity Sizing Guide + +| Tier | `capacity` | `node_count` | Readable Secondaries | Use Case | +|------|-----------|-------------|---------------------|----------| +| **Dev/Test** | `CU_1` | 1 | No | Local development, quick testing | +| **Small Prod** | `CU_2` | 1 | No | Low-traffic apps, batch workloads | +| **Medium Prod** | `CU_4` | 1 | No | Moderate traffic, typical OLTP | +| **HA Medium** | `CU_4` | 2 | Yes | Moderate traffic + read scaling | +| **Large Prod** | `CU_8` | 2 | Yes | High traffic, analytics read replicas | + +--- + +--- + +# Part 2: Lakebase Autoscaling + +Three resources form a strict hierarchy: + +``` +databricks_postgres_project (root container — one per app/team/env) +└── databricks_postgres_branch (independent DB environment, copy-on-write) + └── databricks_postgres_endpoint (virtualized Postgres connection endpoint) +``` + +**Key behavioral notes:** +- **No drift detection**: Changes made outside Terraform (via UI/API) are not detected. Always use `terraform plan` before applying in shared environments. +- **`spec` vs `status`**: `spec` is your intended configuration; `status` reflects what the system has actually applied. Removing a field from `spec` removes your intent, but server-side defaults persist. +- **Shared state required**: For multi-user/CI environments, use remote state (S3/Azure Blob/GCS) to prevent conflicts. +- A branch can have **only one** `ENDPOINT_TYPE_READ_WRITE` endpoint. Multiple `ENDPOINT_TYPE_READ_ONLY` endpoints are allowed. + +--- + +## Resource: `databricks_postgres_project` + +Top-level container that groups branches, endpoints, databases, and roles. + +### Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `project_id` | string | Yes | 1–63 chars, lowercase letters/numbers/hyphens, must start with a letter. Becomes `projects/{project_id}` | +| `spec` | object | No | Project configuration (see below) | +| `provider_config` | object | No | `{ workspace_id = string }` — for account-level management | + +#### `spec` Fields + +| Field | Type | Description | +|-------|------|-------------| +| `pg_version` | number | PostgreSQL major version: `16` or `17` | +| `display_name` | string | Human-readable name (1–256 chars) | +| `history_retention_duration` | string | PITR window in seconds, e.g. `"1209600s"` (14 days). Max `"2592000s"` (30 days) | +| `budget_policy_id` | string | Associated budget policy ID | +| `custom_tags` | list | List of `{ key = string, value = string }` | +| `default_endpoint_settings` | object | Default autoscaling/suspension settings for endpoints (see below) | + +#### `default_endpoint_settings` Fields + +| Field | Type | Description | +|-------|------|-------------| +| `autoscaling_limit_min_cu` | number | Minimum compute units (≥ 0.5) | +| `autoscaling_limit_max_cu` | number | Maximum compute units (≥ 0.5) | +| `suspend_timeout_duration` | string | Inactivity before suspension, `"60s"`–`"604800s"` | +| `no_suspension` | bool | `true` disables auto-suspension entirely | +| `pg_settings` | object | Raw Postgres configuration key-value pairs | + +### Exported Attributes + +| Attribute | Description | +|-----------|-------------| +| `name` | Full resource path: `projects/{project_id}` | +| `uid` | System-generated unique identifier | +| `create_time` / `update_time` | RFC3339 timestamps | +| `status` | Current system state (mirrors spec fields with effective values) | +| `status.owner` | Project owner email | +| `status.synthetic_storage_size_bytes` | Current storage consumption | + +--- + +## Resource: `databricks_postgres_branch` + +Independent database environment within a project. Branches share underlying storage with their source via **copy-on-write** — creating a branch is instant regardless of database size. + +### Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `branch_id` | string | Yes | 1–63 chars, lowercase letters/numbers/hyphens, starts with a letter. Becomes `projects/{project_id}/branches/{branch_id}` | +| `parent` | string | Yes | Project path — use `databricks_postgres_project.this.name` | +| `spec` | object | No | Branch configuration (see below) | +| `provider_config` | object | No | `{ workspace_id = string }` | + +#### `spec` Fields + +| Field | Type | Description | +|-------|------|-------------| +| `is_protected` | bool | Prevents deletion and reset; also blocks parent project deletion | +| `no_expiry` | bool | Explicitly disables expiration (for permanent branches) | +| `ttl` | string | Relative TTL from creation, e.g. `"604800s"` (7 days) | +| `expire_time` | string | Absolute expiration RFC3339 timestamp | +| `source_branch` | string | Branch path to copy from for PITR: `projects/{p}/branches/{b}` | +| `source_branch_lsn` | string | Log Sequence Number on source for PITR | +| `source_branch_time` | string | RFC3339 timestamp on source branch for PITR | + +### Exported Attributes + +| Attribute | Description | +|-----------|-------------| +| `name` | Full path: `projects/{project_id}/branches/{branch_id}` | +| `uid` | System-generated unique identifier | +| `create_time` / `update_time` | RFC3339 timestamps | +| `status.current_state` | `INIT`, `READY`, `ARCHIVED`, `IMPORTING`, `RESETTING` | +| `status.default` | `true` if this is the project's default branch | +| `status.is_protected` | Effective protection status | +| `status.logical_size_bytes` | Branch logical storage size | +| `status.source_branch` | Lineage parent branch path | + +--- + +## Resource: `databricks_postgres_endpoint` + +A virtualized Postgres service that fronts a branch. Clients connect to the endpoint's hostname to read/write data. + +### Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `endpoint_id` | string | Yes | 1–63 chars, lowercase letters/numbers/hyphens, starts with a letter | +| `parent` | string | Yes | Branch path — use `databricks_postgres_branch.this.name` | +| `spec` | object | No | Endpoint configuration (see below) | +| `provider_config` | object | No | `{ workspace_id = string }` | + +#### `spec` Fields + +| Field | Type | Description | +|-------|------|-------------| +| `endpoint_type` | string | **Required**: `ENDPOINT_TYPE_READ_WRITE` or `ENDPOINT_TYPE_READ_ONLY`. One branch = one READ_WRITE max. | +| `autoscaling_limit_min_cu` | number | Minimum compute units (≥ 0.5) | +| `autoscaling_limit_max_cu` | number | Maximum compute units (≥ 0.5) | +| `suspend_timeout_duration` | string | Inactivity before auto-suspend: `"60s"`–`"604800s"` | +| `no_suspension` | bool | `true` disables auto-suspension | +| `disabled` | bool | `true` restricts all connections and schedules suspension | +| `settings` | object | `{ pg_settings = { key = value, ... } }` — raw Postgres settings | + +### Exported Attributes + +| Attribute | Description | +|-----------|-------------| +| `name` | Full path: `projects/{p}/branches/{b}/endpoints/{endpoint_id}` | +| `uid` | System-generated unique identifier | +| `create_time` / `update_time` | RFC3339 timestamps | +| `status.current_state` | `ACTIVE`, `IDLE`, `INIT` | +| `status.hosts.host` | **Connection hostname** — use this to connect clients | +| `status.pending_state` | Transitional state during operations | + +--- + +## Autoscaling Pattern 1: Single Project with Main Branch + +Minimal setup — one project, one permanent branch, one read-write endpoint. + +```hcl +# Project — top-level container +resource "databricks_postgres_project" "app" { + project_id = "${var.prefix}-app" + + spec = { + pg_version = 17 + display_name = "Application Database" + history_retention_duration = "604800s" # 7-day PITR + + default_endpoint_settings = { + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 4.0 + suspend_timeout_duration = "300s" # suspend after 5 min idle + } + + custom_tags = [ + { key = "team", value = var.team_name }, + { key = "managed-by", value = "terraform" }, + ] + } +} + +# Main branch — permanent, protected +resource "databricks_postgres_branch" "main" { + branch_id = "main" + parent = databricks_postgres_project.app.name + + spec = { + is_protected = true + no_expiry = true + } +} + +# Primary read-write endpoint +resource "databricks_postgres_endpoint" "primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.main.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 4.0 + suspend_timeout_duration = "300s" + } +} + +output "connection_host" { + value = databricks_postgres_endpoint.primary.status.hosts.host + description = "Postgres connection hostname for the primary endpoint" +} +``` + +--- + +## Autoscaling Pattern 2: Multi-Environment Branches + +One project, separate branches per environment — dev/staging branches share storage with main via copy-on-write. + +```hcl +resource "databricks_postgres_project" "platform" { + project_id = "${var.prefix}-platform" + + spec = { + pg_version = 17 + display_name = "Platform Database" + history_retention_duration = "1209600s" # 14-day PITR + + default_endpoint_settings = { + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 8.0 + } + } +} + +# Production branch — protected, no expiry +resource "databricks_postgres_branch" "production" { + branch_id = "production" + parent = databricks_postgres_project.platform.name + + spec = { + is_protected = true + no_expiry = true + } +} + +# Staging branch — derived from production, no expiry +resource "databricks_postgres_branch" "staging" { + branch_id = "staging" + parent = databricks_postgres_project.platform.name + + spec = { + no_expiry = true + source_branch = databricks_postgres_branch.production.name + } +} + +# Dev branch — derived from staging, 30-day TTL +resource "databricks_postgres_branch" "dev" { + branch_id = "dev" + parent = databricks_postgres_project.platform.name + + spec = { + ttl = "2592000s" # 30 days + source_branch = databricks_postgres_branch.staging.name + } +} + +# Endpoints — one per branch +resource "databricks_postgres_endpoint" "prod_primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.production.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 2.0 + autoscaling_limit_max_cu = 8.0 + no_suspension = true # production never suspends + } +} + +resource "databricks_postgres_endpoint" "staging_primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.staging.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 4.0 + suspend_timeout_duration = "600s" + } +} + +resource "databricks_postgres_endpoint" "dev_primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.dev.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 2.0 + suspend_timeout_duration = "120s" # suspend after 2 min idle in dev + } +} + +output "endpoints" { + value = { + production = databricks_postgres_endpoint.prod_primary.status.hosts.host + staging = databricks_postgres_endpoint.staging_primary.status.hosts.host + dev = databricks_postgres_endpoint.dev_primary.status.hosts.host + } +} +``` + +--- + +## Autoscaling Pattern 3: Ephemeral Feature Branch (TTL) + +Spin up a short-lived branch for a feature/PR, auto-expires after the TTL. + +```hcl +variable "feature_name" { + type = string + description = "Feature branch identifier, e.g. 'feature-user-auth'" +} + +resource "databricks_postgres_branch" "feature" { + branch_id = var.feature_name + parent = databricks_postgres_project.platform.name + + spec = { + ttl = "604800s" # 7-day TTL — auto-deleted + source_branch = databricks_postgres_branch.dev.name + } +} + +resource "databricks_postgres_endpoint" "feature_primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.feature.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 1.0 + suspend_timeout_duration = "60s" # aggressive suspension for cost + } +} + +output "feature_host" { + value = databricks_postgres_endpoint.feature_primary.status.hosts.host +} +``` + +Clean up manually before TTL if needed: + +```bash +terraform destroy -target=databricks_postgres_endpoint.feature_primary +terraform destroy -target=databricks_postgres_branch.feature +``` + +--- + +## Autoscaling Pattern 4: Point-in-Time Recovery Branch + +Restore from a specific timestamp on the production branch. + +```hcl +variable "restore_time" { + type = string + description = "RFC3339 timestamp to restore from, e.g. '2025-01-15T10:30:00Z'" +} + +resource "databricks_postgres_branch" "pitr_restore" { + branch_id = "pitr-restore-${formatdate("YYYYMMDD", var.restore_time)}" + parent = databricks_postgres_project.platform.name + + spec = { + source_branch = databricks_postgres_branch.production.name + source_branch_time = var.restore_time # RFC3339 timestamp + no_expiry = true # keep until explicitly deleted + } +} + +resource "databricks_postgres_endpoint" "pitr_endpoint" { + endpoint_id = "restore-primary" + parent = databricks_postgres_branch.pitr_restore.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 4.0 + suspend_timeout_duration = "300s" + } +} + +output "pitr_host" { + value = databricks_postgres_endpoint.pitr_endpoint.status.hosts.host + description = "Connect to this endpoint to inspect the restored database" +} +``` + +--- + +## Autoscaling Pattern 5: Read Scaling with Read-Only Endpoint + +Add a read-only endpoint on the same branch to horizontally scale read traffic (analytics, reporting). + +```hcl +# Primary read-write endpoint (already exists from Pattern 1) +resource "databricks_postgres_endpoint" "primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.main.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 1.0 + autoscaling_limit_max_cu = 8.0 + no_suspension = true + } +} + +# Read-only endpoint for analytics workloads +resource "databricks_postgres_endpoint" "analytics" { + endpoint_id = "analytics" + parent = databricks_postgres_branch.main.name # same branch, different endpoint + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_ONLY" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 4.0 + suspend_timeout_duration = "600s" + } +} + +output "read_write_host" { + value = databricks_postgres_endpoint.primary.status.hosts.host +} + +output "read_only_host" { + value = databricks_postgres_endpoint.analytics.status.hosts.host + description = "Use this for reporting and analytics queries" +} +``` + +--- + +## Autoscaling Pattern 6: Disabled Endpoint (Maintenance Mode) + +Temporarily block all connections without destroying the endpoint. + +```hcl +variable "maintenance_mode" { + type = bool + default = false +} + +resource "databricks_postgres_endpoint" "primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.main.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 1.0 + autoscaling_limit_max_cu = 8.0 + disabled = var.maintenance_mode + } +} +``` + +```bash +terraform apply -var="maintenance_mode=true" # block connections +terraform apply -var="maintenance_mode=false" # restore access +``` + +--- + +## Full Autoscaling Production Example + +```hcl +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + version = "~> 1.67.0" + } + } +} + +provider "databricks" { + host = var.databricks_host + token = var.databricks_token +} + +# ── Project ────────────────────────────────────────────────────────────────── + +resource "databricks_postgres_project" "production" { + project_id = "${var.prefix}-prod" + + spec = { + pg_version = 17 + display_name = "Production Application Database" + history_retention_duration = "1209600s" # 14-day PITR + + default_endpoint_settings = { + autoscaling_limit_min_cu = 1.0 + autoscaling_limit_max_cu = 16.0 + } + + custom_tags = [ + { key = "environment", value = "production" }, + { key = "team", value = var.team_name }, + { key = "managed-by", value = "terraform" }, + ] + } +} + +# ── Branches ───────────────────────────────────────────────────────────────── + +# Production branch — protected, permanent +resource "databricks_postgres_branch" "main" { + branch_id = "main" + parent = databricks_postgres_project.production.name + + spec = { + is_protected = true + no_expiry = true + } +} + +# Staging branch — copy of main, permanent +resource "databricks_postgres_branch" "staging" { + branch_id = "staging" + parent = databricks_postgres_project.production.name + + spec = { + no_expiry = true + source_branch = databricks_postgres_branch.main.name + } +} + +# ── Endpoints ──────────────────────────────────────────────────────────────── + +# Production primary (read-write, never suspends) +resource "databricks_postgres_endpoint" "prod_primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.main.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 2.0 + autoscaling_limit_max_cu = 16.0 + no_suspension = true + } +} + +# Production analytics (read-only, auto-suspends) +resource "databricks_postgres_endpoint" "prod_analytics" { + endpoint_id = "analytics" + parent = databricks_postgres_branch.main.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_ONLY" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 8.0 + suspend_timeout_duration = "600s" + } +} + +# Staging primary (read-write, aggressive suspension) +resource "databricks_postgres_endpoint" "staging_primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.staging.name + + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 4.0 + suspend_timeout_duration = "300s" + } +} + +# ── Outputs ────────────────────────────────────────────────────────────────── + +output "prod_rw_host" { + value = databricks_postgres_endpoint.prod_primary.status.hosts.host + description = "Production read-write connection hostname" +} + +output "prod_ro_host" { + value = databricks_postgres_endpoint.prod_analytics.status.hosts.host + description = "Production read-only analytics hostname" +} + +output "staging_host" { + value = databricks_postgres_endpoint.staging_primary.status.hosts.host + description = "Staging read-write connection hostname" +} +``` + +--- + +## Import (Autoscaling Resources) + +Import uses the full resource `name` path: + +```hcl +# Terraform 1.5+ block syntax +import { + id = "projects/my-app" + to = databricks_postgres_project.app +} + +import { + id = "projects/my-app/branches/main" + to = databricks_postgres_branch.main +} + +import { + id = "projects/my-app/branches/main/endpoints/primary" + to = databricks_postgres_endpoint.primary +} +``` + +```bash +# Legacy CLI import +terraform import databricks_postgres_project.app "projects/my-app" +terraform import databricks_postgres_branch.main "projects/my-app/branches/main" +terraform import databricks_postgres_endpoint.primary "projects/my-app/branches/main/endpoints/primary" +``` + +--- + +## Common Issues + +### Classic (`databricks_database_instance`) + +| Issue | Solution | +|-------|----------| +| **`databricks_database_instance` not found** | Upgrade provider to `~> 1.60.0` or later | +| **`node_count > 1` requires higher capacity** | Multi-node HA requires at least `CU_2` | +| **`enable_readable_secondaries` has no effect** | Requires `node_count >= 2` | +| **PITR child requires retention on parent** | Parent must have `retention_window_in_days > 0` (default: 7) | +| **`stopped = true` doesn't free capacity immediately** | Shutdown takes minutes; check `effective_stopped` | +| **Destroy fails on instance with child instances** | Delete child instances first, then parent | + +### Autoscaling (`databricks_postgres_project/branch/endpoint`) + +| Issue | Solution | +|-------|----------| +| **Resources not found in provider** | Upgrade to `~> 1.65.0` or later | +| **Drift not detected after manual changes** | These resources have no drift detection — run `terraform refresh` before `plan` | +| **`ENDPOINT_TYPE_READ_WRITE` conflict** | Each branch supports only one read-write endpoint; add `ENDPOINT_TYPE_READ_ONLY` for additional endpoints | +| **Branch stuck in `INIT` state** | Wait for branch to reach `READY` before creating endpoints; use `depends_on` | +| **`source_branch_time` out of retention window** | PITR time must be within `history_retention_duration` of the source branch | +| **Protected branch cannot be deleted** | Set `is_protected = false` and apply before `terraform destroy` | +| **`autoscaling_limit_min_cu` must be ≥ 0.5** | Minimum value is 0.5 CU — do not set to 0 | +| **`suspend_timeout_duration` out of range** | Must be between `"60s"` (1 min) and `"604800s"` (7 days) | +| **Endpoint `IDLE` when expecting `ACTIVE`** | Endpoint auto-suspended — the first connection wakes it; set `no_suspension = true` for always-on | + +--- + +## Unity Catalog Integration + +Expose any Lakebase instance or autoscaling endpoint as a UC Connection for querying from notebooks, SQL warehouses, and Genie. + +```hcl +# Works for both Classic (database_instance.name) and Autoscaling (endpoint hostname) +resource "databricks_connection" "lakebase" { + name = "${var.prefix}-lakebase-connection" + connection_type = "POSTGRESQL" + comment = "Lakebase production connection" + + options = { + # For Classic: use databricks_database_instance.production.name + # For Autoscaling: use databricks_postgres_endpoint.prod_primary.status.hosts.host + host = databricks_postgres_endpoint.prod_primary.status.hosts.host + port = "5432" + database = "postgres" + } +} + +resource "databricks_grants" "lakebase_connection" { + connection = databricks_connection.lakebase.name + + grant { + principal = "data_engineers" + privileges = ["ALL_PRIVILEGES"] + } + + grant { + principal = "data_analysts" + privileges = ["USE_CONNECTION"] + } +} +``` + +--- + +## Variables Reference + +```hcl +variable "prefix" { + type = string + description = "Naming prefix for all Lakebase resources" +} + +variable "team_name" { + type = string + description = "Team name for tagging" +} + +variable "cost_center" { + type = string + description = "Cost center for billing tags" + default = "" +} + +variable "databricks_host" { + type = string + description = "Databricks workspace URL" +} + +variable "databricks_token" { + type = string + sensitive = true +} + +variable "dev_instance_stopped" { + type = bool + default = false +} +``` + +--- + +## Related Resources + +- [databricks_database_instance — Terraform Provider Docs](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/database_instance) +- [databricks_postgres_project — Terraform Provider Docs](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/postgres_project) +- [databricks_postgres_branch — Terraform Provider Docs](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/postgres_branch) +- [databricks_postgres_endpoint — Terraform Provider Docs](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/postgres_endpoint) +- [Databricks Connection — Terraform Provider Docs](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/connection) +- [Lakebase Overview — Databricks Docs](https://docs.databricks.com/en/database-instances/index.html) diff --git a/databricks-skills/databricks-terraform-skill/SKILL.md b/databricks-skills/databricks-terraform-skill/SKILL.md new file mode 100644 index 00000000..551ac9bc --- /dev/null +++ b/databricks-skills/databricks-terraform-skill/SKILL.md @@ -0,0 +1,235 @@ +--- +name: databricks-terraform-skill +description: "Terraform automation for Databricks: workspace deployment on AWS/Azure/GCP (with or without PrivateLink), Unity Catalog setup, Databricks resource management (clusters, jobs, warehouses, grants), and Lakebase managed Postgres (Classic database_instance and Autoscaling postgres_project/branch/endpoint). Use when writing or reviewing Terraform for Databricks infrastructure." +--- + +# Databricks Terraform Skill + +End-to-end Terraform automation for Databricks infrastructure — from workspace provisioning to Unity Catalog and resource management across all three major clouds. + +## When to Use This Skill + +Use this skill when: +- **Deploying Databricks workspaces** on AWS, Azure, or GCP (basic or PrivateLink) +- **Setting up Unity Catalog** (metastore, storage credentials, external locations, catalogs, schemas, grants) +- **Managing Databricks resources** via Terraform (clusters, jobs, SQL warehouses, notebooks, secrets, policies, Databricks Apps, Mosaic AI Vector Search) +- **Configuring IAM/access control** (users, groups, service principals, permissions) +- **Provisioning Lakebase Classic** (`databricks_database_instance`) — fixed-tier HA managed Postgres +- **Provisioning Lakebase Autoscaling** (`databricks_postgres_project` / `databricks_postgres_branch` / `databricks_postgres_endpoint`) — true autoscaling, copy-on-write branching, suspend-on-idle +- **Reviewing or troubleshooting** existing Databricks Terraform configurations +- **Migrating** manual Databricks setups to Infrastructure-as-Code + +## Reference Files + +| Topic | File | Description | +|-------|------|-------------| +| Provider & Auth | [1-provider-authentication.md](1-provider-authentication.md) | Provider setup, authentication for all clouds, multi-provider patterns | +| AWS Deployment | [2-aws-workspace-deployment.md](2-aws-workspace-deployment.md) | AWS basic workspace and PrivateLink deployment | +| Azure Deployment | [3-azure-workspace-deployment.md](3-azure-workspace-deployment.md) | Azure basic workspace and Private Link standard deployment | +| GCP Deployment | [4-gcp-workspace-deployment.md](4-gcp-workspace-deployment.md) | GCP managed VPC and BYOVPC workspace deployment | +| Unity Catalog | [5-unity-catalog.md](5-unity-catalog.md) | Metastore, storage credentials, external locations, catalogs, schemas, grants | +| Databricks Resources | [6-databricks-resources.md](6-databricks-resources.md) | Clusters, jobs, SQL warehouses, notebooks, secrets, cluster policies, Databricks Apps, Mosaic AI Vector Search | +| IAM & Permissions | [7-iam-permissions.md](7-iam-permissions.md) | Users, groups, service principals, workspace permissions, grants | +| Lakebase (Postgres) | [8-lakebase.md](8-lakebase.md) | Classic (`database_instance`) and Autoscaling (`postgres_project/branch/endpoint`): HA, PITR, copy-on-write branching, autoscaling, suspend-on-idle | + +## Quick Start + +### 1. Configure the Provider + +```hcl +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + version = "~> 1.38.0" + } + } +} + +# Workspace-level provider (uses PAT or OAuth) +provider "databricks" { + host = var.databricks_host + token = var.databricks_token +} +``` + +### 2. Deploy a Workspace (AWS Quick Start) + +Follow the inline resource pattern in [2-aws-workspace-deployment.md](2-aws-workspace-deployment.md) — the examples use direct resources rather than published registry modules. + +```hcl +# See 2-aws-workspace-deployment.md for the full VPC → IAM → MWS workspace pattern +resource "databricks_mws_workspaces" "this" { + provider = databricks.mws + account_id = var.databricks_account_id + workspace_name = var.workspace_name + aws_region = var.region + # ... see 2-aws-workspace-deployment.md for complete configuration +} +``` + +### 3. Set Up Unity Catalog (Quick Start) + +```hcl +resource "databricks_metastore" "this" { + name = "my-metastore" + storage_root = "s3://my-uc-bucket/metastore" + region = "us-east-1" + force_destroy = true +} + +resource "databricks_metastore_assignment" "this" { + metastore_id = databricks_metastore.this.id + workspace_id = var.workspace_id +} +``` + +### 4. Provision Lakebase (Quick Start) + +**Classic** (fixed capacity, HA replicas): + +```hcl +# See 8-lakebase.md for HA, PITR, stop/start, and UC integration patterns +resource "databricks_database_instance" "production" { + name = "prod-lakebase" + capacity = "CU_4" + node_count = 2 + enable_readable_secondaries = true + enable_pg_native_login = true + retention_window_in_days = 14 +} +``` + +**Autoscaling** (copy-on-write branches, suspend-on-idle — recommended for new deployments): + +```hcl +# See 8-lakebase.md for multi-env branching, PITR branch, read-only endpoint patterns +resource "databricks_postgres_project" "app" { + project_id = "my-app" + spec = { + pg_version = 17 + display_name = "Application Database" + history_retention_duration = "604800s" + } +} + +resource "databricks_postgres_branch" "main" { + branch_id = "main" + parent = databricks_postgres_project.app.name + spec = { is_protected = true, no_expiry = true } +} + +resource "databricks_postgres_endpoint" "primary" { + endpoint_id = "primary" + parent = databricks_postgres_branch.main.name + spec = { + endpoint_type = "ENDPOINT_TYPE_READ_WRITE" + autoscaling_limit_min_cu = 0.5 + autoscaling_limit_max_cu = 4.0 + suspend_timeout_duration = "300s" + } +} +``` + +### 5. Run Terraform + +```bash +terraform init +terraform plan -out=tfplan +terraform apply tfplan +``` + +## Key Design Patterns + +### Multi-Provider Pattern (Account + Workspace) + +Most Unity Catalog operations require **two providers**: one for account-level operations and one per workspace. + +```hcl +# Account-level (for UC, users, groups) +provider "databricks" { + alias = "mws" + host = "https://accounts.cloud.databricks.com" + account_id = var.databricks_account_id + client_id = var.client_id + client_secret = var.client_secret +} + +# Workspace-level (for clusters, jobs, etc.) +provider "databricks" { + alias = "workspace" + host = var.workspace_url + client_id = var.client_id + client_secret = var.client_secret +} +``` + +### Modular Structure (Recommended) + +``` +project/ +├── main.tf # Root module — calls sub-modules +├── variables.tf # Input variables +├── outputs.tf # Output values +├── providers.tf # Provider declarations +├── terraform.tfvars # Variable values (gitignored) +├── modules/ +│ ├── networking/ # VPC, subnets, security groups +│ ├── workspace/ # Databricks workspace +│ └── unity-catalog/ # UC metastore, catalogs, grants +└── backend.tf # Remote state (S3/Azure Blob/GCS) +``` + +### Remote State (Required for Production) + +```hcl +# backend.tf — AWS S3 example +terraform { + backend "s3" { + bucket = "my-terraform-state" + key = "databricks/workspace/terraform.tfstate" + region = "us-east-1" + encrypt = true + dynamodb_table = "terraform-state-lock" + } +} +``` + +## Common Issues + +| Issue | Solution | +|-------|----------| +| **`account_id` required for UC** | Use account-level provider with `host = "https://accounts.cloud.databricks.com"` | +| **Provider version mismatch** | Pin to `~> 1.38.0` and run `terraform init -upgrade` | +| **`grants` overwrites existing permissions** | `databricks_grants` is authoritative — include ALL grants for a securable | +| **Workspace not ready for UC** | Use `depends_on` to ensure workspace is created before metastore assignment | +| **Cross-account AWS role trust issues** | Verify that the Databricks account ID is in the trust policy of the IAM role | +| **Azure SP permissions** | SP needs `Contributor` on resource group + `User Access Administrator` for ADLS | +| **State drift on manual changes** | Run `terraform refresh` to sync state, then `terraform plan` before applying | +| **Sensitive values in state** | Use `sensitive = true` for variables; consider Vault or cloud KMS for secrets | + +## Cloud Decision Matrix + +| Requirement | AWS | Azure | GCP | +|-------------|-----|-------|-----| +| **PrivateLink/PSC** | `aws-databricks-modular-privatelink` | `adb-with-private-link-standard` | `gcp-with-psc-exfiltration-protection` | +| **Custom VPC/VNet** | `aws_vpc` with SG + subnet module | `azurerm_virtual_network` | `gcp-byovpc` | +| **Customer-managed keys** | AWS KMS + `databricks_mws_customer_managed_keys` | Azure Key Vault | Cloud KMS | +| **Identity** | IAM role/instance profile | Azure AD / Entra ID | GCP Service Account | +| **UC storage** | S3 + IAM role | ADLS Gen2 + managed identity | GCS + service account | + +## Related Skills + +- **[databricks-unity-catalog](../databricks-unity-catalog/SKILL.md)** — query UC system tables and manage volumes at the SQL level +- **[databricks-asset-bundles](../databricks-asset-bundles/SKILL.md)** — deploy Databricks resources via DABs (YAML-based CI/CD) +- **[databricks-jobs](../databricks-jobs/SKILL.md)** — job patterns and examples for resources managed via Terraform +- **[databricks-config](../databricks-config/SKILL.md)** — CLI authentication and profile configuration + +## Resources + +- [Databricks Terraform Provider Docs](https://registry.terraform.io/providers/databricks/databricks/latest/docs) +- [terraform-databricks-examples GitHub](https://github.com/databricks/terraform-databricks-examples) +- [Databricks Terraform Provider GitHub](https://github.com/databricks/terraform-provider-databricks) +- [AWS Deployment Examples](https://github.com/databricks/terraform-databricks-examples/tree/main/examples) +- [Azure Deployment Examples](https://github.com/databricks/terraform-databricks-examples/tree/main/examples) +- [GCP Deployment Examples](https://github.com/databricks/terraform-databricks-examples/tree/main/examples)