-
Notifications
You must be signed in to change notification settings - Fork 47
db-ai-bridge: Add hostname resolution support for Lakebase Pool #310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1056,3 +1056,159 @@ | |
| error_msg = str(exc_info.value) | ||
| assert "Insufficient privileges" in error_msg | ||
| assert "CAN MANAGE" in error_msg | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add an integration test where we use real values from dogfood to verify this logic? |
||
|
|
||
|
|
||
| # ============================================================================= | ||
| # Hostname Resolution Tests | ||
| # ============================================================================= | ||
|
|
||
|
|
||
| def test_is_hostname_detects_database_hostname(): | ||
| """Test that _is_hostname correctly identifies database hostnames.""" | ||
| from databricks_ai_bridge.lakebase import _is_hostname | ||
|
|
||
| # Should be detected as hostnames (pattern: *.database.<region>.*.databricks.com) | ||
| assert _is_hostname( | ||
| "instance-f757b615-f2fd-4614-87cc-9ba35f2eeb61.database.staging.cloud.databricks.com" | ||
| ) | ||
| assert _is_hostname("instance-abc123.database.prod.cloud.databricks.com") | ||
| assert _is_hostname("my-instance.database.us-west-2.cloud.databricks.com") | ||
|
|
||
| # Should NOT be detected as hostnames (regular instance names) | ||
| assert not _is_hostname("lakebase") | ||
| assert not _is_hostname("my-database-instance") | ||
| assert not _is_hostname("production_db") | ||
| # Should not match non-databricks domains | ||
| assert not _is_hostname("my-db.database.example.net") | ||
|
|
||
|
|
||
| def test_lakebase_pool_accepts_hostname(monkeypatch): | ||
| """Test that LakebasePool accepts hostname and resolves instance name.""" | ||
| TestConnectionPool = _make_connection_pool_class() | ||
| monkeypatch.setattr("databricks_ai_bridge.lakebase.ConnectionPool", TestConnectionPool) | ||
|
|
||
| workspace = _make_workspace() | ||
|
|
||
| # Mock list_database_instances to return an instance matching the hostname | ||
| hostname = "instance-abc123.database.staging.cloud.databricks.com" | ||
| mock_instance = MagicMock() | ||
| mock_instance.name = "my-lakebase-instance" | ||
| mock_instance.read_write_dns = hostname | ||
| mock_instance.read_only_dns = None | ||
| workspace.database.list_database_instances.return_value = [mock_instance] | ||
|
|
||
| pool = LakebasePool( | ||
| instance_name=hostname, # Pass hostname instead of instance name | ||
| workspace_client=workspace, | ||
| ) | ||
|
|
||
| # Should have resolved to the instance name | ||
| assert pool.instance_name == "my-lakebase-instance" | ||
| assert pool.host == hostname | ||
|
|
||
| # get_database_instance should NOT have been called (we used list instead) | ||
| workspace.database.get_database_instance.assert_not_called() | ||
|
|
||
|
|
||
| def test_lakebase_pool_hostname_not_found_raises_error(monkeypatch): | ||
| """Test that LakebasePool raises error when hostname doesn't match any instance.""" | ||
| TestConnectionPool = _make_connection_pool_class() | ||
| monkeypatch.setattr("databricks_ai_bridge.lakebase.ConnectionPool", TestConnectionPool) | ||
|
|
||
| workspace = _make_workspace() | ||
|
|
||
| # Mock list_database_instances to return instances that don't match | ||
| other_instance = MagicMock() | ||
| other_instance.name = "other-instance" | ||
| other_instance.read_write_dns = "other-host.database.staging.cloud.databricks.com" | ||
| other_instance.read_only_dns = None | ||
| workspace.database.list_database_instances.return_value = [other_instance] | ||
|
|
||
| hostname = "instance-not-found.database.staging.cloud.databricks.com" | ||
|
|
||
| with pytest.raises(ValueError, match="Unable to find database instance matching hostname"): | ||
| LakebasePool( | ||
| instance_name=hostname, | ||
| workspace_client=workspace, | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_async_lakebase_pool_accepts_hostname(monkeypatch): | ||
| """Test that AsyncLakebasePool accepts hostname and resolves instance name.""" | ||
| TestAsyncConnectionPool = _make_async_connection_pool_class() | ||
| monkeypatch.setattr( | ||
| "databricks_ai_bridge.lakebase.AsyncConnectionPool", TestAsyncConnectionPool | ||
| ) | ||
|
|
||
| workspace = _make_workspace() | ||
|
|
||
| # Mock list_database_instances to return an instance matching the hostname | ||
| hostname = "instance-xyz789.database.prod.cloud.databricks.com" | ||
| mock_instance = MagicMock() | ||
| mock_instance.name = "prod-lakebase" | ||
| mock_instance.read_write_dns = hostname | ||
| mock_instance.read_only_dns = None | ||
| workspace.database.list_database_instances.return_value = [mock_instance] | ||
|
|
||
| pool = AsyncLakebasePool( | ||
| instance_name=hostname, # Pass hostname instead of instance name | ||
| workspace_client=workspace, | ||
| ) | ||
|
|
||
| # Should have resolved to the instance name | ||
| assert pool.instance_name == "prod-lakebase" | ||
| assert pool.host == hostname | ||
|
|
||
|
|
||
| # ============================================================================= | ||
| # Integration Tests for Hostname Resolution | ||
| # ============================================================================= | ||
|
|
||
|
|
||
| @pytest.mark.integration | ||
| def test_lakebase_pool_hostname_resolution_integration(): | ||
| """ | ||
| Integration test: Verify hostname resolution works with real Databricks infrastructure. | ||
|
|
||
| This test requires: | ||
| - DATABRICKS_HOST and authentication configured | ||
| - Access to a Lakebase instance in the workspace | ||
|
|
||
| Run with: pytest -m integration tests/databricks_ai_bridge/test_lakebase.py | ||
| """ | ||
| from databricks.sdk import WorkspaceClient | ||
|
|
||
| from databricks_ai_bridge.lakebase import _is_hostname, _resolve_instance_name_from_hostname | ||
|
|
||
| workspace_client = WorkspaceClient() | ||
|
|
||
| # List all database instances and pick the first one | ||
| instances = list(workspace_client.database.list_database_instances()) | ||
| if not instances: | ||
| pytest.skip("No Lakebase instances available in the workspace") | ||
|
|
||
| # Get the first instance with a read_write_dns | ||
| test_instance = None | ||
| for instance in instances: | ||
| if getattr(instance, "read_write_dns", None): | ||
| test_instance = instance | ||
| break | ||
|
|
||
| if not test_instance: | ||
| pytest.skip("No Lakebase instance with read_write_dns found") | ||
|
|
||
| hostname = test_instance.read_write_dns | ||
|
Check warning on line 1201 in tests/databricks_ai_bridge/test_lakebase.py
|
||
| expected_name = test_instance.name | ||
|
Check warning on line 1202 in tests/databricks_ai_bridge/test_lakebase.py
|
||
|
|
||
| # Verify hostname detection | ||
| assert _is_hostname(hostname), f"Expected '{hostname}' to be detected as hostname" | ||
|
Check failure on line 1205 in tests/databricks_ai_bridge/test_lakebase.py
|
||
|
|
||
| # Verify resolution | ||
| resolved_name, resolved_host = _resolve_instance_name_from_hostname( | ||
| workspace_client, hostname | ||
|
Check failure on line 1209 in tests/databricks_ai_bridge/test_lakebase.py
|
||
| ) | ||
| assert resolved_name == expected_name, ( | ||
| f"Expected instance name '{expected_name}', got '{resolved_name}'" | ||
| ) | ||
| assert resolved_host == hostname | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does it make sense to have the helper inside of this method?
i think if the method here is accepting a param "instance name", then this might be a helpful standalone helper? something that will resolve to a validated instance name, whether given a hostname or an instance name