Terraform module for creation Databricks Premium Runtime
The main idea behind this module is to deploy resources for Databricks Workspace with Premium SKU only.
Here we provide some examples of how to provision it with a different options.
- Clusters (i.e., for Unity Catalog and Shared Autoscaling)
- Workspace IP Access list creation
- ADLS Gen2 Mount
- Create Secret Scope and assign permissions to custom groups
- SQL Endpoint creation and configuration
- Create Cluster policy
- Create an Azure Key Vault-backed secret scope
# Prerequisite resources
variable "databricks_account_id" {}
# Databricks Workspace with Premium SKU
data "azurerm_databricks_workspace" "example" {
  name                = "example-workspace"
  resource_group_name = "example-rg"
}
# Databricks Provider configuration
provider "databricks" {
  alias                       = "main"
  host                        = data.azurerm_databricks_workspace.example.workspace_url
  azure_workspace_resource_id = data.azurerm_databricks_workspace.example.id
}
# Databricks Account-Level Provider configuration
provider "databricks" {
  alias      = "account"
  host       = "https://accounts.azuredatabricks.net"
  account_id = var.databricks_account_id
}
# Key Vault where Service Principal's secrets are stored. Used for mounting Storage Container
data "azurerm_key_vault" "example" {
  name                = "example-key-vault"
  resource_group_name = "example-rg"
}
locals {
  databricks_iam_account_groups = [{
    group_name  = "example-gn"
    permissions = ["ADMIN"]
    entitlements = [
      "allow_instance_pool_create",
      "allow_cluster_create",
      "databricks_sql_access"
    ]
  }]  
}
# Assigns Databricks Account groups to Workspace. It is required to assign Unity Catalog Metastore before assigning Account groups to Workspace
module "databricks_account_groups" {
  count   = length(local.databricks_iam_account_groups) != 0 ? 1 : 0
  source  = "data-platform-hq/databricks-account-groups/databricks"
  version = "1.0.1"
  
  workspace_id               = data.azurerm_databricks_workspace.example.id
  workspace_group_assignment = local.databricks_iam_account_groups
  providers = {
    databricks = databricks.account
  }
}
# Example usage of module for Runtime Premium resources.
module "databricks_runtime_premium" {  
  source  = "data-platform-hq/runtime/databricks"
  version = "~>1.0" 
  project  = "datahq"
  env      = "example"
  location = "eastus"
  # Cloud provider
  cloud_name = "azure"
  # Example configuration for Workspace Groups
  iam_workspace_groups = {
    dev = {
      user = [
        "user1@example.com",
        "user2@example.com"
      ]
      service_principal = []
      entitlements = ["allow_instance_pool_create","allow_cluster_create","databricks_sql_access"]
    }
  }
  # Example configuration for Account Groups
  iam_account_groups = local.databricks_iam_account_groups
  # 1. Databricks clusters configuration, and assign permission to a custom group on clusters.
  databricks_cluster_configs = [ {
    cluster_name       = "Unity Catalog"
    data_security_mode = "USER_ISOLATION"
    availability       = "ON_DEMAND_AZURE"
    spot_bid_max_price = 1
    permissions        = [{ group_name = "DEVELOPERS", permission_level = "CAN_RESTART" }]
  },
  {
    cluster_name       = "shared autoscaling"
    data_security_mode = "NONE"
    availability       = "SPOT_AZURE"
    spot_bid_max_price = -1
    permissions        = [{group_name = "DEVELOPERS", permission_level = "CAN_MANAGE"}]
  }]
  # 2. Workspace could be accessed only from these IP Addresses:
  ip_rules = {
    "ip_range_1" = "10.128.0.0/16",
    "ip_range_2" = "10.33.0.0/16",
  }
  
  # 3. ADLS Gen2 Mount
  mountpoints = {
    storage_account_name = data.azurerm_storage_account.example.name
    container_name       = "example_container"
  }
  # Parameters of Service principal used for ADLS mount
  # Imports App ID and Secret of Service Principal from target Key Vault  
  sp_client_id_secret_name = "sp-client-id" # secret's name that stores Service Principal App ID
  sp_key_secret_name       = "sp-key" # secret's name that stores Service Principal Secret Key
  tenant_id_secret_name    = "infra-arm-tenant-id" # secret's name that stores tenant id value 
  # 4. Create Secret Scope and assign permissions to custom groups 
  secret_scope = [{
    scope_name = "extra-scope"
    acl        = [{ principal = "DEVELOPERS", permission = "READ" }] # Only custom workspace group names are allowed. If left empty then only Workspace admins could access these keys
    secrets    = [{ key = "secret-name", string_value = "secret-value"}]
  }]
  # 5. SQL Warehouse Endpoint
  databricks_sql_endpoint = [{
    name        = "default"  
    enable_serverless_compute = true  
    permissions = [{ group_name = "DEVELOPERS", permission_level = "CAN_USE" },]
  }]
  # 6. Databricks cluster policies
  custom_cluster_policies = [{
    name     = "custom_policy_1",
    can_use  =  "DEVELOPERS", # custom workspace group name, that is allowed to use this policy
    definition = {
      "autoscale.max_workers": {
        "type": "range",
        "maxValue": 3,
        "defaultValue": 2
      },
    }
  }]
  # 7. Azure Key Vault-backed secret scope
  key_vault_secret_scope = [{
    name         = "external"
    key_vault_id = data.azurerm_key_vault.example.id
    dns_name     = data.azurerm_key_vault.example.vault_uri
  }]  
    
  providers = {
    databricks = databricks.main
  }
}
- Clusters (i.e., for Unity Catalog and Shared Autoscaling)
- Workspace IP Access list creation
- Create Secret Scope and assign permissions to custom groups
- SQL Endpoint creation and configuration
- Create Cluster policy
# Prerequisite resources
variable "databricks_account_id" {}
variable "region" {}
# Databricks Workspace ID
data "databricks_mws_workspaces" "example" {
  account_id = var.databricks_account_id
}
# Provider configuration for SSM
provider "aws" {
  alias  = "ssm"
  region = var.region
}
# Databricks Account-Level Provider configuration
provider "databricks" {
  alias         = "mws"
  host          = "https://accounts.cloud.databricks.com"
  account_id    = data.aws_ssm_parameter.this["databricks_account_id"].value
  client_id     = data.aws_ssm_parameter.this["databricks_admin_sp_id"].value
  client_secret = data.aws_ssm_parameter.this["databricks_admin_sp_secret"].value
}
# Databricks Provider configuration
provider "databricks" {
  alias         = "workspace"
  host          = module.databricks_workspace.workspace_url
  client_id     = data.aws_ssm_parameter.this["databricks_admin_sp_id"].value
  client_secret = data.aws_ssm_parameter.this["databricks_admin_sp_secret"].value
}
locals {
  ssm_parameters = [
    "databricks_account_id",
    "databricks_admin_sp_id",
    "databricks_admin_sp_secret",
    "github_pat_token"
  ]
  ssm_parameters_prefix = "/example-prefix/" # Prefix for parameters stored in AWS SSM
  dbx_runtime = {
    iam_account_groups_assignment = [
      { group_name = "example gm1", permissions = ["USER"] },
      { group_name = "example gm2", permissions = ["USER"] }
    ]
    sql_endpoints = [{
      name = "example_test"
      permissions = [
        { group_name = "example gm1", permission_level = "CAN_MANAGE" },
      ]
    }]
    clusters = [{
      cluster_name = "example1"
      permissions = [
        { group_name = "example gm2", permission_level = "CAN_RESTART" },
      ]
      }, {
      cluster_name = "example2"
      permissions = [
        { group_name = "example gm2", permission_level = "CAN_RESTART" },
        { group_name = "example gm1", permission_level = "CAN_MANAGE" },
      ]
    }]
  }
  databricks_custom_cluster_policies = [{
    name       = null
    can_use    = null
    definition = null
  }]
  dbx_inputs = {
    vpc_id             = "vpc-example"
    subnet_ids         = ["subnet-example1", "subnet-example2"]
    security_group_ids = ["sg-example"]
  }
  iam_default_permission_boundary_policy_arn = "arn:aws:iam::{ AWS Account ID }:policy/eo_role_boundary"
}
# SSM Parameter
data "aws_ssm_parameter" "this" {
  for_each = local.ssm_parameters
  name = "${local.ssm_parameters_prefix}${each.key}"
  provider = aws.ssm
}
# Label configuration
module "label" {
  source  = "cloudposse/label/null"
  version = "0.25.0"
  namespace   = "example-namespace" 
  environment = "example-environment"
  stage       = "example-stage"
}
# Databricks Workspace configuration
module "databricks_workspace" {
  source  = "data-platform-hq/aws-workspace/databricks"
  version = "1.0.1"
  label              = module.label.id
  vpc_id             = local.dbx_inputs.vpc_id
  subnet_ids         = local.dbx_inputs.subnet_ids
  security_group_ids = local.dbx_inputs.security_group_ids
  region             = var.region
  account_id         = data.aws_ssm_parameter.this["databricks_account_id"].value
  iam_cross_account_workspace_role_config = {
    permission_boundary_arn = local.iam_default_permission_boundary_policy_arn    
  }
  providers = {
    databricks = databricks.mws
  }
}
# Account level group assignment to the Workspace
module "databricks_account_groups" {
  source  = "data-platform-hq/databricks-account-groups/databricks"
  version = "1.0.1"
  workspace_id               = module.databricks_workspace.workspace_id
  workspace_group_assignment = local.dbx_runtime.iam_account_groups_assignment
  providers = {
    databricks = databricks.mws
  }
}
# Databricks Runtime resources configuration (clusters, sql, secrets, etc.)
module "databricks_runtime" {  
  source  = "data-platform-hq/runtime/databricks"
  version = "1.0.0"
  clusters                      = local.dbx_runtime.clusters
  sql_endpoint                  = local.dbx_runtime.sql_endpoints
  secret_scope                  = flatten([var.dbx_runtime.secret_scopes, local.demo_wwi_secret_scope])
  workspace_admin_token_enabled = var.workspace_admin_token_enabled
  system_schemas_enabled        = alltrue([var.databricks_system_schemas_enabled])
  iam_account_groups      = local.dbx_runtime.iam_account_groups_assignment
  cloud_name              = "aws"
  custom_cluster_policies = local.databricks_custom_cluster_policies
  providers = {
    databricks = databricks.workspace
  }
  depends_on = [module.databricks_workspace, module.databricks_account_groups]
}
| Name | Version | 
|---|---|
| terraform | >=1.3 | 
| databricks | >=1.85.0 | 
| Name | Version | 
|---|---|
| databricks | >=1.85.0 | 
No modules.
| Name | Type | 
|---|---|
| databricks_cluster.this | resource | 
| databricks_cluster_policy.overrides | resource | 
| databricks_cluster_policy.this | resource | 
| databricks_database_instance.this | resource | 
| databricks_disable_legacy_dbfs_setting.this | resource | 
| databricks_entitlements.this | resource | 
| databricks_group.this | resource | 
| databricks_ip_access_list.allowed_list | resource | 
| databricks_mount.adls | resource | 
| databricks_permissions.clusters | resource | 
| databricks_permissions.policy | resource | 
| databricks_permissions.sql_endpoint | resource | 
| databricks_secret.main | resource | 
| databricks_secret.this | resource | 
| databricks_secret_acl.this | resource | 
| databricks_secret_scope.main | resource | 
| databricks_secret_scope.this | resource | 
| databricks_sql_endpoint.this | resource | 
| databricks_token.pat | resource | 
| databricks_workspace_conf.this | resource | 
| databricks_current_metastore.this | data source | 
| databricks_group.account_groups | data source | 
| databricks_sql_warehouses.all | data source | 
| Name | Description | Type | Default | Required | 
|---|---|---|---|---|
| cloud_name | Cloud Name | string | n/a | yes | 
| clusters | Set of objects with parameters to configure Databricks clusters and assign permissions to it for certain custom groups | set(object({ | [] | no | 
| custom_cluster_policies | Provides an ability to create custom cluster policy, assign it to cluster and grant CAN_USE permissions on it to certain custom groups name - name of custom cluster policy to create can_use - list of string, where values are custom group names, there groups have to be created with Terraform; definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; | list(object({ | [ | no | 
| custom_config | Map of AD databricks workspace custom config | map(string) | { | no | 
| default_cluster_policies_override | Provides an ability to override default cluster policy name - name of cluster policy to override family_id - family id of corresponding policy definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; | list(object({ | [ | no | 
| disable_legacy_dbfs | Disables access to DBFS root and mounts in your existing Databricks workspace. When set to true: - Access to DBFS root and mounted paths is blocked. - Manual restart of all-purpose compute clusters and SQL warehouses is required after enabling this setting. - Note: This setting only takes effect when disabling access. Re-enabling must be done manually via the Databricks UI. | bool | false | no | 
| iam_account_groups | List of objects with group name and entitlements for this group | list(object({ | [] | no | 
| iam_workspace_groups | Used to create workspace group. Map of group name and its parameters, such as users and service principals added to the group. Also possible to configure group entitlements. | map(object({ | {} | no | 
| ip_addresses | A map of IP address ranges | map(string) | { | no | 
| key_vault_secret_scope | Object with Azure Key Vault parameters required for creation of Azure-backed Databricks Secret scope | list(object({ | [] | no | 
| lakebase_instance | Map of objects with parameters to configure and deploy OLTP database instances in Databricks. To deploy and use an OLTP database instance in Databricks: - You must be a Databricks workspace owner. - A Databricks workspace must already be deployed in your cloud environment (e.g., AWS or Azure). - The workspace must be on the Premium plan or above. - You must enable the "Lakebase: Managed Postgres OLTP Database" feature in the Preview features section. - Database instances can only be deleted manually through the Databricks UI or using the Databricks CLI with the --purge option. | map(object({ | {} | no | 
| mount_configuration | Configuration for mounting storage, including only service principal details | object({ | { | no | 
| mount_enabled | Boolean flag that determines whether mount point for storage account filesystem is created | bool | false | no | 
| mountpoints | Mountpoints for databricks | map(object({ | {} | no | 
| pat_token_lifetime_seconds | The lifetime of the token, in seconds. If no lifetime is specified, the token remains valid indefinitely | number | 315569520 | no | 
| secret_scope | Provides an ability to create custom Secret Scope, store secrets in it and assigning ACL for access management scope_name - name of Secret Scope to create; acl - list of objects, where 'principal' custom group name, this group is created in 'Premium' module; 'permission' is one of "READ", "WRITE", "MANAGE"; secrets - list of objects, where object's 'key' param is created key name and 'string_value' is a value for it; | list(object({ | [] | no | 
| sql_endpoint | Set of objects with parameters to configure SQL Endpoint and assign permissions to it for certain custom groups | set(object({ | [] | no | 
| suffix | Optional suffix that would be added to the end of resources names. | string | "" | no | 
| workspace_admin_token_enabled | Boolean flag to specify whether to create Workspace Admin Token | bool | n/a | yes | 
| Name | Description | 
|---|---|
| clusters | Provides name and unique identifier for the clusters | 
| metastore_id | The ID of the current metastore in the Databricks workspace. | 
| sql_endpoint_data_source_id | ID of the data source for this endpoint | 
| sql_endpoint_jdbc_url | JDBC connection string of SQL Endpoint | 
| sql_warehouses_list | List of IDs of all SQL warehouses in the Databricks workspace. | 
| token | Databricks Personal Authorization Token | 
Apache 2 Licensed. For more information please see LICENSE