From d49ea5b1a7f49adb614ff785bac601a6f639a993 Mon Sep 17 00:00:00 2001 From: Jenny Date: Fri, 30 Jan 2026 11:50:15 -0800 Subject: [PATCH 1/2] lakebase resolve instance name --- src/databricks_ai_bridge/lakebase.py | 90 +++++++++++++---- tests/databricks_ai_bridge/test_lakebase.py | 101 ++++++++++++++++++++ 2 files changed, 173 insertions(+), 18 deletions(-) diff --git a/src/databricks_ai_bridge/lakebase.py b/src/databricks_ai_bridge/lakebase.py index 1cb0e83f..c2649b87 100644 --- a/src/databricks_ai_bridge/lakebase.py +++ b/src/databricks_ai_bridge/lakebase.py @@ -97,34 +97,88 @@ def __init__( token_cache_duration_seconds: int = DEFAULT_TOKEN_CACHE_DURATION_SECONDS, ) -> None: self.workspace_client: WorkspaceClient = workspace_client or WorkspaceClient() - self.instance_name: str = instance_name self.token_cache_duration_seconds: int = token_cache_duration_seconds - # Resolve host from the Lakebase name - try: - instance = self.workspace_client.database.get_database_instance(instance_name) - except Exception as exc: - raise ValueError( - f"Unable to resolve Lakebase instance '{instance_name}'. " - "Ensure the instance name is correct." - ) from exc - - resolved_host = getattr(instance, "read_write_dns", None) or getattr( - instance, "read_only_dns", None - ) + # If input is hostname (e.g., from Databricks Apps valueFrom resolution) + # resolve to lakebase name + if self._is_hostname(instance_name): + # Input is a hostname - resolve to instance name + self.instance_name, self.host = self._resolve_from_hostname(instance_name) + else: + # Input is an instance name + self.instance_name = instance_name + try: + instance = self.workspace_client.database.get_database_instance(instance_name) + except Exception as exc: + raise ValueError( + f"Unable to resolve Lakebase instance '{instance_name}'. " + "Ensure the instance name is correct." + ) from exc - if not resolved_host: - raise ValueError( - f"Lakebase host not found for instance '{instance_name}'. " - "Ensure the instance is running and in AVAILABLE state." + resolved_host = getattr(instance, "read_write_dns", None) or getattr( + instance, "read_only_dns", None ) - self.host: str = resolved_host + if not resolved_host: + raise ValueError( + f"Lakebase host not found for instance '{instance_name}'. " + "Ensure the instance is running and in AVAILABLE state." + ) + + self.host = resolved_host + self.username: str = self._infer_username() self._cached_token: str | None = None self._cache_ts: float | None = None + @staticmethod + def _is_hostname(value: str) -> bool: + """Check if the value looks like a Lakebase hostname rather than an instance name.""" + # Hostname pattern: instance-{uuid}.database.{env}.cloud.databricks.com + # or similar patterns containing ".database." and ending with a domain + return ".database." in value and (value.endswith(".com") or value.endswith(".net")) + + def _resolve_from_hostname(self, hostname: str) -> tuple[str, str]: + """ + Resolve instance name from a hostname by listing database instances. + + Args: + hostname: The database hostname (e.g., from Databricks Apps valueFrom: "database") + + Returns: + Tuple of (instance_name, host) + + Raises: + ValueError: If no matching instance is found + """ + try: + instances = list(self.workspace_client.database.list_database_instances()) + except Exception as exc: + raise ValueError( + f"Unable to list database instances to resolve hostname '{hostname}'. " + "Ensure you have access to database instances." + ) from exc + + # Find the instance that matches this hostname + for instance in instances: + rw_dns = getattr(instance, "read_write_dns", None) + ro_dns = getattr(instance, "read_only_dns", None) + + if hostname in (rw_dns, ro_dns): + instance_name = getattr(instance, "name", None) + if not instance_name: + raise ValueError( + f"Found matching instance for hostname '{hostname}' " + "but instance name is not available." + ) + return instance_name, hostname + + raise ValueError( + f"Unable to find database instance matching hostname '{hostname}'. " + "Ensure the hostname is correct and the instance exists." + ) + def _get_cached_token(self) -> str | None: """Check if the cached token is still valid.""" if not self._cached_token or not self._cache_ts: diff --git a/tests/databricks_ai_bridge/test_lakebase.py b/tests/databricks_ai_bridge/test_lakebase.py index b914f721..a831f8f9 100644 --- a/tests/databricks_ai_bridge/test_lakebase.py +++ b/tests/databricks_ai_bridge/test_lakebase.py @@ -1056,3 +1056,104 @@ def test_execute_grant_handles_insufficient_privilege(self): error_msg = str(exc_info.value) assert "Insufficient privileges" in error_msg assert "CAN MANAGE" in error_msg + + +# ============================================================================= +# Hostname Resolution Tests +# ============================================================================= + + +def test_is_hostname_detects_database_hostname(): + """Test that _is_hostname correctly identifies database hostnames.""" + from databricks_ai_bridge.lakebase import _LakebasePoolBase + + # Should be detected as hostnames + assert _LakebasePoolBase._is_hostname( + "instance-f757b615-f2fd-4614-87cc-9ba35f2eeb61.database.staging.cloud.databricks.com" + ) + assert _LakebasePoolBase._is_hostname("instance-abc123.database.prod.cloud.databricks.com") + assert _LakebasePoolBase._is_hostname("my-db.database.example.net") + + # Should NOT be detected as hostnames (regular instance names) + assert not _LakebasePoolBase._is_hostname("lakebase") + assert not _LakebasePoolBase._is_hostname("my-database-instance") + assert not _LakebasePoolBase._is_hostname("production_db") + + +def test_lakebase_pool_accepts_hostname(monkeypatch): + """Test that LakebasePool accepts hostname and resolves instance name.""" + TestConnectionPool = _make_connection_pool_class() + monkeypatch.setattr("databricks_ai_bridge.lakebase.ConnectionPool", TestConnectionPool) + + workspace = _make_workspace() + + # Mock list_database_instances to return an instance matching the hostname + hostname = "instance-abc123.database.staging.cloud.databricks.com" + mock_instance = MagicMock() + mock_instance.name = "my-lakebase-instance" + mock_instance.read_write_dns = hostname + mock_instance.read_only_dns = None + workspace.database.list_database_instances.return_value = [mock_instance] + + pool = LakebasePool( + instance_name=hostname, # Pass hostname instead of instance name + workspace_client=workspace, + ) + + # Should have resolved to the instance name + assert pool.instance_name == "my-lakebase-instance" + assert pool.host == hostname + + # get_database_instance should NOT have been called (we used list instead) + workspace.database.get_database_instance.assert_not_called() + + +def test_lakebase_pool_hostname_not_found_raises_error(monkeypatch): + """Test that LakebasePool raises error when hostname doesn't match any instance.""" + TestConnectionPool = _make_connection_pool_class() + monkeypatch.setattr("databricks_ai_bridge.lakebase.ConnectionPool", TestConnectionPool) + + workspace = _make_workspace() + + # Mock list_database_instances to return instances that don't match + other_instance = MagicMock() + other_instance.name = "other-instance" + other_instance.read_write_dns = "other-host.database.staging.cloud.databricks.com" + other_instance.read_only_dns = None + workspace.database.list_database_instances.return_value = [other_instance] + + hostname = "instance-not-found.database.staging.cloud.databricks.com" + + with pytest.raises(ValueError, match="Unable to find database instance matching hostname"): + LakebasePool( + instance_name=hostname, + workspace_client=workspace, + ) + + +@pytest.mark.asyncio +async def test_async_lakebase_pool_accepts_hostname(monkeypatch): + """Test that AsyncLakebasePool accepts hostname and resolves instance name.""" + TestAsyncConnectionPool = _make_async_connection_pool_class() + monkeypatch.setattr( + "databricks_ai_bridge.lakebase.AsyncConnectionPool", TestAsyncConnectionPool + ) + + workspace = _make_workspace() + + # Mock list_database_instances to return an instance matching the hostname + hostname = "instance-xyz789.database.prod.cloud.databricks.com" + mock_instance = MagicMock() + mock_instance.name = "prod-lakebase" + mock_instance.read_write_dns = hostname + mock_instance.read_only_dns = None + workspace.database.list_database_instances.return_value = [mock_instance] + + pool = AsyncLakebasePool( + instance_name=hostname, # Pass hostname instead of instance name + workspace_client=workspace, + ) + + # Should have resolved to the instance name + assert pool.instance_name == "prod-lakebase" + assert pool.host == hostname From 97289a2622049088f5653d8011d3ff23efc6c6bb Mon Sep 17 00:00:00 2001 From: Jenny Date: Tue, 3 Feb 2026 15:50:38 -0800 Subject: [PATCH 2/2] pr review updates --- src/databricks_ai_bridge/lakebase.py | 127 ++++++++++++-------- tests/databricks_ai_bridge/test_lakebase.py | 71 +++++++++-- 2 files changed, 141 insertions(+), 57 deletions(-) diff --git a/src/databricks_ai_bridge/lakebase.py b/src/databricks_ai_bridge/lakebase.py index c2649b87..b9145b77 100644 --- a/src/databricks_ai_bridge/lakebase.py +++ b/src/databricks_ai_bridge/lakebase.py @@ -2,6 +2,7 @@ import asyncio import logging +import re import time import uuid from enum import Enum @@ -101,9 +102,11 @@ def __init__( # If input is hostname (e.g., from Databricks Apps valueFrom resolution) # resolve to lakebase name - if self._is_hostname(instance_name): + if _is_hostname(instance_name): # Input is a hostname - resolve to instance name - self.instance_name, self.host = self._resolve_from_hostname(instance_name) + self.instance_name, self.host = _resolve_instance_name_from_hostname( + self.workspace_client, instance_name + ) else: # Input is an instance name self.instance_name = instance_name @@ -132,53 +135,6 @@ def __init__( self._cached_token: str | None = None self._cache_ts: float | None = None - @staticmethod - def _is_hostname(value: str) -> bool: - """Check if the value looks like a Lakebase hostname rather than an instance name.""" - # Hostname pattern: instance-{uuid}.database.{env}.cloud.databricks.com - # or similar patterns containing ".database." and ending with a domain - return ".database." in value and (value.endswith(".com") or value.endswith(".net")) - - def _resolve_from_hostname(self, hostname: str) -> tuple[str, str]: - """ - Resolve instance name from a hostname by listing database instances. - - Args: - hostname: The database hostname (e.g., from Databricks Apps valueFrom: "database") - - Returns: - Tuple of (instance_name, host) - - Raises: - ValueError: If no matching instance is found - """ - try: - instances = list(self.workspace_client.database.list_database_instances()) - except Exception as exc: - raise ValueError( - f"Unable to list database instances to resolve hostname '{hostname}'. " - "Ensure you have access to database instances." - ) from exc - - # Find the instance that matches this hostname - for instance in instances: - rw_dns = getattr(instance, "read_write_dns", None) - ro_dns = getattr(instance, "read_only_dns", None) - - if hostname in (rw_dns, ro_dns): - instance_name = getattr(instance, "name", None) - if not instance_name: - raise ValueError( - f"Found matching instance for hostname '{hostname}' " - "but instance name is not available." - ) - return instance_name, hostname - - raise ValueError( - f"Unable to find database instance matching hostname '{hostname}'. " - "Ensure the hostname is correct and the instance exists." - ) - def _get_cached_token(self) -> str | None: """Check if the cached token is still valid.""" if not self._cached_token or not self._cache_ts: @@ -923,3 +879,76 @@ def grant_all_sequences_in_schema( schema, grantee, ) + + +# ============================================================================= +# Hostname Resolution Helpers +# ============================================================================= + +# Regex pattern for Lakebase hostnames: *.database..*.databricks.com +_LAKEBASE_HOSTNAME_PATTERN = re.compile(r"^.+\.database\.[^.]+\..+\.databricks\.com$") + + +def _is_hostname(value: str) -> bool: + """ + Check if the value looks like a Lakebase hostname rather than an instance name. + + Hostname examples: + - instance-uuid-.database.region.cloud.databricks.com + + Instance name examples (NOT hostnames): + - lakebase + - my-database-instance + + Args: + value: The string to check (either an instance name or hostname) + + Returns: + True if the value appears to be a hostname, False if it's an instance name + """ + return bool(_LAKEBASE_HOSTNAME_PATTERN.match(value)) + + +def _resolve_instance_name_from_hostname( + workspace_client: WorkspaceClient, hostname: str +) -> tuple[str, str]: + """ + Resolve instance name from a hostname by listing database instances. + + This is useful when a hostname is provided (e.g., from Databricks Apps valueFrom + resolution) instead of an instance name. + + Hostname examples: + - instance-uuid-.database.region.cloud.databricks.com + + Args: + workspace_client: The WorkspaceClient to use for API calls + hostname: The database hostname (e.g., from Databricks Apps valueFrom: "database") + + Returns: + Tuple of (instance_name, host) + + Raises: + ValueError: If no matching instance is found or unable to list instances + """ + try: + # Note: This lists all database instances the user has access to. For workspaces + # with many instances, this may have performance implications but there is no way + # to retrieve the instance name from the lakebase hostname + instances = list(workspace_client.database.list_database_instances()) + except Exception as exc: + raise ValueError( + f"Unable to list database instances to resolve hostname '{hostname}'. " + "Ensure you have permission to list database instances." + ) from exc + + # Find the instance that matches this hostname + for instance in instances: + if instance.read_write_dns == hostname: + return instance.name, hostname + + raise ValueError( + f"Unable to find database instance matching hostname '{hostname}'. " + "Ensure the hostname is correct, the instance exists, and you have the proper " + "permissions on the Lakebase instance." + ) diff --git a/tests/databricks_ai_bridge/test_lakebase.py b/tests/databricks_ai_bridge/test_lakebase.py index a831f8f9..ec38d8f1 100644 --- a/tests/databricks_ai_bridge/test_lakebase.py +++ b/tests/databricks_ai_bridge/test_lakebase.py @@ -1065,19 +1065,21 @@ def test_execute_grant_handles_insufficient_privilege(self): def test_is_hostname_detects_database_hostname(): """Test that _is_hostname correctly identifies database hostnames.""" - from databricks_ai_bridge.lakebase import _LakebasePoolBase + from databricks_ai_bridge.lakebase import _is_hostname - # Should be detected as hostnames - assert _LakebasePoolBase._is_hostname( + # Should be detected as hostnames (pattern: *.database..*.databricks.com) + assert _is_hostname( "instance-f757b615-f2fd-4614-87cc-9ba35f2eeb61.database.staging.cloud.databricks.com" ) - assert _LakebasePoolBase._is_hostname("instance-abc123.database.prod.cloud.databricks.com") - assert _LakebasePoolBase._is_hostname("my-db.database.example.net") + assert _is_hostname("instance-abc123.database.prod.cloud.databricks.com") + assert _is_hostname("my-instance.database.us-west-2.cloud.databricks.com") # Should NOT be detected as hostnames (regular instance names) - assert not _LakebasePoolBase._is_hostname("lakebase") - assert not _LakebasePoolBase._is_hostname("my-database-instance") - assert not _LakebasePoolBase._is_hostname("production_db") + assert not _is_hostname("lakebase") + assert not _is_hostname("my-database-instance") + assert not _is_hostname("production_db") + # Should not match non-databricks domains + assert not _is_hostname("my-db.database.example.net") def test_lakebase_pool_accepts_hostname(monkeypatch): @@ -1157,3 +1159,56 @@ async def test_async_lakebase_pool_accepts_hostname(monkeypatch): # Should have resolved to the instance name assert pool.instance_name == "prod-lakebase" assert pool.host == hostname + + +# ============================================================================= +# Integration Tests for Hostname Resolution +# ============================================================================= + + +@pytest.mark.integration +def test_lakebase_pool_hostname_resolution_integration(): + """ + Integration test: Verify hostname resolution works with real Databricks infrastructure. + + This test requires: + - DATABRICKS_HOST and authentication configured + - Access to a Lakebase instance in the workspace + + Run with: pytest -m integration tests/databricks_ai_bridge/test_lakebase.py + """ + from databricks.sdk import WorkspaceClient + + from databricks_ai_bridge.lakebase import _is_hostname, _resolve_instance_name_from_hostname + + workspace_client = WorkspaceClient() + + # List all database instances and pick the first one + instances = list(workspace_client.database.list_database_instances()) + if not instances: + pytest.skip("No Lakebase instances available in the workspace") + + # Get the first instance with a read_write_dns + test_instance = None + for instance in instances: + if getattr(instance, "read_write_dns", None): + test_instance = instance + break + + if not test_instance: + pytest.skip("No Lakebase instance with read_write_dns found") + + hostname = test_instance.read_write_dns + expected_name = test_instance.name + + # Verify hostname detection + assert _is_hostname(hostname), f"Expected '{hostname}' to be detected as hostname" + + # Verify resolution + resolved_name, resolved_host = _resolve_instance_name_from_hostname( + workspace_client, hostname + ) + assert resolved_name == expected_name, ( + f"Expected instance name '{expected_name}', got '{resolved_name}'" + ) + assert resolved_host == hostname