Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 101 additions & 18 deletions src/databricks_ai_bridge/lakebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import logging
import re
import time
import uuid
from enum import Enum
Expand Down Expand Up @@ -97,29 +98,38 @@ def __init__(
token_cache_duration_seconds: int = DEFAULT_TOKEN_CACHE_DURATION_SECONDS,
) -> None:
self.workspace_client: WorkspaceClient = workspace_client or WorkspaceClient()
self.instance_name: str = instance_name
self.token_cache_duration_seconds: int = token_cache_duration_seconds

# Resolve host from the Lakebase name
try:
instance = self.workspace_client.database.get_database_instance(instance_name)
except Exception as exc:
raise ValueError(
f"Unable to resolve Lakebase instance '{instance_name}'. "
"Ensure the instance name is correct."
) from exc

resolved_host = getattr(instance, "read_write_dns", None) or getattr(
instance, "read_only_dns", None
)
# If input is hostname (e.g., from Databricks Apps valueFrom resolution)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it make sense to have the helper inside of this method?

i think if the method here is accepting a param "instance name", then this might be a helpful standalone helper? something that will resolve to a validated instance name, whether given a hostname or an instance name

# resolve to lakebase name
if _is_hostname(instance_name):
# Input is a hostname - resolve to instance name
self.instance_name, self.host = _resolve_instance_name_from_hostname(
self.workspace_client, instance_name
)
else:
# Input is an instance name
self.instance_name = instance_name
try:
instance = self.workspace_client.database.get_database_instance(instance_name)
except Exception as exc:
raise ValueError(
f"Unable to resolve Lakebase instance '{instance_name}'. "
"Ensure the instance name is correct."
) from exc

if not resolved_host:
raise ValueError(
f"Lakebase host not found for instance '{instance_name}'. "
"Ensure the instance is running and in AVAILABLE state."
resolved_host = getattr(instance, "read_write_dns", None) or getattr(
instance, "read_only_dns", None
)

self.host: str = resolved_host
if not resolved_host:
raise ValueError(
f"Lakebase host not found for instance '{instance_name}'. "
"Ensure the instance is running and in AVAILABLE state."
)

self.host = resolved_host

self.username: str = self._infer_username()

self._cached_token: str | None = None
Expand Down Expand Up @@ -869,3 +879,76 @@ def grant_all_sequences_in_schema(
schema,
grantee,
)


# =============================================================================
# Hostname Resolution Helpers
# =============================================================================

# Regex pattern for Lakebase hostnames: *.database.<region>.*.databricks.com
_LAKEBASE_HOSTNAME_PATTERN = re.compile(r"^.+\.database\.[^.]+\..+\.databricks\.com$")


def _is_hostname(value: str) -> bool:
"""
Check if the value looks like a Lakebase hostname rather than an instance name.

Hostname examples:
- instance-uuid-.database.region.cloud.databricks.com

Instance name examples (NOT hostnames):
- lakebase
- my-database-instance

Args:
value: The string to check (either an instance name or hostname)

Returns:
True if the value appears to be a hostname, False if it's an instance name
"""
return bool(_LAKEBASE_HOSTNAME_PATTERN.match(value))


def _resolve_instance_name_from_hostname(
workspace_client: WorkspaceClient, hostname: str
) -> tuple[str, str]:
"""
Resolve instance name from a hostname by listing database instances.

This is useful when a hostname is provided (e.g., from Databricks Apps valueFrom
resolution) instead of an instance name.

Hostname examples:
- instance-uuid-.database.region.cloud.databricks.com

Args:
workspace_client: The WorkspaceClient to use for API calls
hostname: The database hostname (e.g., from Databricks Apps valueFrom: "database")

Returns:
Tuple of (instance_name, host)

Raises:
ValueError: If no matching instance is found or unable to list instances
"""
try:
# Note: This lists all database instances the user has access to. For workspaces
# with many instances, this may have performance implications but there is no way
# to retrieve the instance name from the lakebase hostname
instances = list(workspace_client.database.list_database_instances())
except Exception as exc:
raise ValueError(
f"Unable to list database instances to resolve hostname '{hostname}'. "
"Ensure you have permission to list database instances."
) from exc

# Find the instance that matches this hostname
for instance in instances:
if instance.read_write_dns == hostname:
return instance.name, hostname

raise ValueError(
f"Unable to find database instance matching hostname '{hostname}'. "
"Ensure the hostname is correct, the instance exists, and you have the proper "
"permissions on the Lakebase instance."
)
156 changes: 156 additions & 0 deletions tests/databricks_ai_bridge/test_lakebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,3 +1056,159 @@
error_msg = str(exc_info.value)
assert "Insufficient privileges" in error_msg
assert "CAN MANAGE" in error_msg
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add an integration test where we use real values from dogfood to verify this logic?



# =============================================================================
# Hostname Resolution Tests
# =============================================================================


def test_is_hostname_detects_database_hostname():
"""Test that _is_hostname correctly identifies database hostnames."""
from databricks_ai_bridge.lakebase import _is_hostname

# Should be detected as hostnames (pattern: *.database.<region>.*.databricks.com)
assert _is_hostname(
"instance-f757b615-f2fd-4614-87cc-9ba35f2eeb61.database.staging.cloud.databricks.com"
)
assert _is_hostname("instance-abc123.database.prod.cloud.databricks.com")
assert _is_hostname("my-instance.database.us-west-2.cloud.databricks.com")

# Should NOT be detected as hostnames (regular instance names)
assert not _is_hostname("lakebase")
assert not _is_hostname("my-database-instance")
assert not _is_hostname("production_db")
# Should not match non-databricks domains
assert not _is_hostname("my-db.database.example.net")


def test_lakebase_pool_accepts_hostname(monkeypatch):
"""Test that LakebasePool accepts hostname and resolves instance name."""
TestConnectionPool = _make_connection_pool_class()
monkeypatch.setattr("databricks_ai_bridge.lakebase.ConnectionPool", TestConnectionPool)

workspace = _make_workspace()

# Mock list_database_instances to return an instance matching the hostname
hostname = "instance-abc123.database.staging.cloud.databricks.com"
mock_instance = MagicMock()
mock_instance.name = "my-lakebase-instance"
mock_instance.read_write_dns = hostname
mock_instance.read_only_dns = None
workspace.database.list_database_instances.return_value = [mock_instance]

pool = LakebasePool(
instance_name=hostname, # Pass hostname instead of instance name
workspace_client=workspace,
)

# Should have resolved to the instance name
assert pool.instance_name == "my-lakebase-instance"
assert pool.host == hostname

# get_database_instance should NOT have been called (we used list instead)
workspace.database.get_database_instance.assert_not_called()


def test_lakebase_pool_hostname_not_found_raises_error(monkeypatch):
"""Test that LakebasePool raises error when hostname doesn't match any instance."""
TestConnectionPool = _make_connection_pool_class()
monkeypatch.setattr("databricks_ai_bridge.lakebase.ConnectionPool", TestConnectionPool)

workspace = _make_workspace()

# Mock list_database_instances to return instances that don't match
other_instance = MagicMock()
other_instance.name = "other-instance"
other_instance.read_write_dns = "other-host.database.staging.cloud.databricks.com"
other_instance.read_only_dns = None
workspace.database.list_database_instances.return_value = [other_instance]

hostname = "instance-not-found.database.staging.cloud.databricks.com"

with pytest.raises(ValueError, match="Unable to find database instance matching hostname"):
LakebasePool(
instance_name=hostname,
workspace_client=workspace,
)


@pytest.mark.asyncio
async def test_async_lakebase_pool_accepts_hostname(monkeypatch):
"""Test that AsyncLakebasePool accepts hostname and resolves instance name."""
TestAsyncConnectionPool = _make_async_connection_pool_class()
monkeypatch.setattr(
"databricks_ai_bridge.lakebase.AsyncConnectionPool", TestAsyncConnectionPool
)

workspace = _make_workspace()

# Mock list_database_instances to return an instance matching the hostname
hostname = "instance-xyz789.database.prod.cloud.databricks.com"
mock_instance = MagicMock()
mock_instance.name = "prod-lakebase"
mock_instance.read_write_dns = hostname
mock_instance.read_only_dns = None
workspace.database.list_database_instances.return_value = [mock_instance]

pool = AsyncLakebasePool(
instance_name=hostname, # Pass hostname instead of instance name
workspace_client=workspace,
)

# Should have resolved to the instance name
assert pool.instance_name == "prod-lakebase"
assert pool.host == hostname


# =============================================================================
# Integration Tests for Hostname Resolution
# =============================================================================


@pytest.mark.integration
def test_lakebase_pool_hostname_resolution_integration():
"""
Integration test: Verify hostname resolution works with real Databricks infrastructure.

This test requires:
- DATABRICKS_HOST and authentication configured
- Access to a Lakebase instance in the workspace

Run with: pytest -m integration tests/databricks_ai_bridge/test_lakebase.py
"""
from databricks.sdk import WorkspaceClient

from databricks_ai_bridge.lakebase import _is_hostname, _resolve_instance_name_from_hostname

workspace_client = WorkspaceClient()

# List all database instances and pick the first one
instances = list(workspace_client.database.list_database_instances())
if not instances:
pytest.skip("No Lakebase instances available in the workspace")

# Get the first instance with a read_write_dns
test_instance = None
for instance in instances:
if getattr(instance, "read_write_dns", None):
test_instance = instance
break

if not test_instance:
pytest.skip("No Lakebase instance with read_write_dns found")

hostname = test_instance.read_write_dns

Check warning on line 1201 in tests/databricks_ai_bridge/test_lakebase.py

View workflow job for this annotation

GitHub Actions / typechecking for .

ty (possibly-missing-attribute)

tests/databricks_ai_bridge/test_lakebase.py:1201:16: possibly-missing-attribute: Attribute `read_write_dns` may be missing on object of type `None | DatabaseInstance`
expected_name = test_instance.name

Check warning on line 1202 in tests/databricks_ai_bridge/test_lakebase.py

View workflow job for this annotation

GitHub Actions / typechecking for .

ty (possibly-missing-attribute)

tests/databricks_ai_bridge/test_lakebase.py:1202:21: possibly-missing-attribute: Attribute `name` may be missing on object of type `None | DatabaseInstance`

# Verify hostname detection
assert _is_hostname(hostname), f"Expected '{hostname}' to be detected as hostname"

Check failure on line 1205 in tests/databricks_ai_bridge/test_lakebase.py

View workflow job for this annotation

GitHub Actions / typechecking for .

ty (invalid-argument-type)

tests/databricks_ai_bridge/test_lakebase.py:1205:25: invalid-argument-type: Argument to function `_is_hostname` is incorrect: Expected `str`, found `str | None`

# Verify resolution
resolved_name, resolved_host = _resolve_instance_name_from_hostname(
workspace_client, hostname

Check failure on line 1209 in tests/databricks_ai_bridge/test_lakebase.py

View workflow job for this annotation

GitHub Actions / typechecking for .

ty (invalid-argument-type)

tests/databricks_ai_bridge/test_lakebase.py:1209:27: invalid-argument-type: Argument to function `_resolve_instance_name_from_hostname` is incorrect: Expected `str`, found `str | None`
)
assert resolved_name == expected_name, (
f"Expected instance name '{expected_name}', got '{resolved_name}'"
)
assert resolved_host == hostname
Loading