From 7a09c16659007fd3ffccef2c90b7b53383aa8f99 Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Thu, 18 Sep 2025 12:26:14 -0400 Subject: [PATCH 1/8] Switched cluster type to user_isolation for wasbs migration --- src/databricks/labs/ucx/hive_metastore/workflows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/hive_metastore/workflows.py b/src/databricks/labs/ucx/hive_metastore/workflows.py index 640602a4d8..c15df4a2ec 100644 --- a/src/databricks/labs/ucx/hive_metastore/workflows.py +++ b/src/databricks/labs/ucx/hive_metastore/workflows.py @@ -342,7 +342,7 @@ class ConvertWASBSToADLSGen2(Workflow): def __init__(self): super().__init__('convert-wasbs-to-adls-gen2-experimental') - @job_task(job_cluster="user_isolation", depends_on=[Assessment.crawl_tables]) + @job_task(job_cluster="main", depends_on=[Assessment.crawl_tables]) def convert_wasbs_to_adls_gen2(self, ctx: RuntimeContext): """This workflow task converts WASBS paths to ADLS Gen2 paths in the Hive Metastore.""" ctx.tables_migrator.convert_wasbs_to_adls_gen2() From 1b7d79e984c9750c347fad600365f11da0232d93 Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Thu, 18 Sep 2025 13:52:07 -0400 Subject: [PATCH 2/8] Added collation as a parameter to CatalogTable --- src/databricks/labs/ucx/hive_metastore/table_migrate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index 740de3901b..34ab3ba7cb 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -381,6 +381,7 @@ def _convert_hms_table_to_external(self, src_table: Table) -> bool: def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: """ Converts a Hive metastore azure wasbs table to abfss using alter table command. + This command and workflow requires a single user cluster with DB16 or newer. """ logger.info(f"Changing HMS managed table {src_table.name} to External Table type.") inventory_table = self._tables_crawler.full_name @@ -420,6 +421,7 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: old_table.stats(), old_table.viewText(), old_table.comment(), + old_table.collation(), old_table.unsupportedFeatures(), old_table.tracksPartitionsInCatalog(), old_table.schemaPreservesCase(), @@ -428,7 +430,7 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: # From DBR 16, there's a new constructor argument: entityStorageLocations (Seq[EntityStorageLocation]) # (We can't detect whether the argument is needed by the constructor, but assume that if the accessor # is present on the source table then the argument is needed.) - *([entity_storage_locations] if entity_storage_locations is not None else []), + # *([entity_storage_locations] if entity_storage_locations is not None else []), ) self._catalog.alterTable(new_table) except Exception as e: # pylint: disable=broad-exception-caught From 08f40d14fdf91068766034985a3d68f149204072 Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Thu, 18 Sep 2025 13:58:31 -0400 Subject: [PATCH 3/8] Added collation as a parameter to CatalogTable --- .../labs/ucx/hive_metastore/table_migrate.py | 76 ++++++++++++------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index 34ab3ba7cb..3f9319b6a9 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -393,7 +393,7 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: return False try: old_table = self._catalog.getTableMetadata(table_identifier) - entity_storage_locations = self._get_entity_storage_locations(old_table) + collation = old_table.collation() if 'collation' in dir(old_table) else None table_location = old_table.storage() new_location = self._catalog_storage( self._spark._jvm.scala.Some( # pylint: disable=protected-access @@ -405,33 +405,53 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: table_location.compressed(), table_location.properties(), ) - new_table = self._catalog_table( - old_table.identifier(), - old_table.tableType(), - new_location, - old_table.schema(), - old_table.provider(), - old_table.partitionColumnNames(), - old_table.bucketSpec(), - old_table.owner(), - old_table.createTime(), - old_table.lastAccessTime(), - old_table.createVersion(), - old_table.properties(), - old_table.stats(), - old_table.viewText(), - old_table.comment(), - old_table.collation(), - old_table.unsupportedFeatures(), - old_table.tracksPartitionsInCatalog(), - old_table.schemaPreservesCase(), - old_table.ignoredProperties(), - old_table.viewOriginalText(), - # From DBR 16, there's a new constructor argument: entityStorageLocations (Seq[EntityStorageLocation]) - # (We can't detect whether the argument is needed by the constructor, but assume that if the accessor - # is present on the source table then the argument is needed.) - # *([entity_storage_locations] if entity_storage_locations is not None else []), - ) + if collation: + new_table = self._catalog_table( + old_table.identifier(), + old_table.tableType(), + new_location, + old_table.schema(), + old_table.provider(), + old_table.partitionColumnNames(), + old_table.bucketSpec(), + old_table.owner(), + old_table.createTime(), + old_table.lastAccessTime(), + old_table.createVersion(), + old_table.properties(), + old_table.stats(), + old_table.viewText(), + old_table.comment(), + old_table.collation(), + old_table.unsupportedFeatures(), + old_table.tracksPartitionsInCatalog(), + old_table.schemaPreservesCase(), + old_table.ignoredProperties(), + old_table.viewOriginalText(), + ) + else: + new_table = self._catalog_table( + old_table.identifier(), + old_table.tableType(), + new_location, + old_table.schema(), + old_table.provider(), + old_table.partitionColumnNames(), + old_table.bucketSpec(), + old_table.owner(), + old_table.createTime(), + old_table.lastAccessTime(), + old_table.createVersion(), + old_table.properties(), + old_table.stats(), + old_table.viewText(), + old_table.comment(), + old_table.unsupportedFeatures(), + old_table.tracksPartitionsInCatalog(), + old_table.schemaPreservesCase(), + old_table.ignoredProperties(), + old_table.viewOriginalText(), + ) self._catalog.alterTable(new_table) except Exception as e: # pylint: disable=broad-exception-caught logger.warning(f"Error converting HMS table {src_table.name} to abfss: {e}", exc_info=True) From 27761cb4b5bdcbeb379cd38ee02f6015dd8b801b Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Thu, 18 Sep 2025 14:20:37 -0400 Subject: [PATCH 4/8] Fixed issue with empty collation --- src/databricks/labs/ucx/hive_metastore/table_migrate.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index 3f9319b6a9..a5d876163d 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -393,7 +393,6 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: return False try: old_table = self._catalog.getTableMetadata(table_identifier) - collation = old_table.collation() if 'collation' in dir(old_table) else None table_location = old_table.storage() new_location = self._catalog_storage( self._spark._jvm.scala.Some( # pylint: disable=protected-access @@ -405,7 +404,7 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: table_location.compressed(), table_location.properties(), ) - if collation: + if 'collation' in dir(old_table): new_table = self._catalog_table( old_table.identifier(), old_table.tableType(), From d9aa8fc2ce949f0b1c88a17baba0a6847dc5d77e Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Thu, 18 Sep 2025 15:09:58 -0400 Subject: [PATCH 5/8] Fixed issue with missing constructor parameters --- .../labs/ucx/hive_metastore/table_migrate.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index a5d876163d..74fbcb3dca 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -427,6 +427,36 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: old_table.schemaPreservesCase(), old_table.ignoredProperties(), old_table.viewOriginalText(), + old_table.viewDependencyList(), + old_table.tableConstraints(), + old_table.deltaRuntimeProperties(), + old_table.pipelineUuid(), + old_table.rowFilter(), + old_table.columnMasks(), + old_table.enableAutoMaintenance(), + old_table.effectiveAutoMaintenanceFlag(), + old_table.baseTableId(), + old_table.baseTableLocation(), + old_table.accessPoint(), + old_table.deltaUniformIceberg(), + old_table.shallowClones(), + old_table.encryptionDetails(), + old_table.deltaSharingKind(), + old_table.reconciliationDefinition(), + old_table.parentTable(), + old_table.partitionLogLocation(), + old_table.capabilities(), + old_table.auxiliaryManagedLocation(), + old_table.provisioningInfo(), + old_table.useRemoteFiltering(), + old_table.deltaCoordinatedCommitsInfo(), + old_table.rowFiltersImplicitFromABAC(), + old_table.columnMasksImplicitFromABAC(), + old_table.entityStorageLocations(), + old_table.parentTableUuid(), + old_table.isArclightTableCreation(), + old_table.resourceName(), + old_table.governanceMetadata() ) else: new_table = self._catalog_table( From dc247d2718a44cf8a7f93a6dcc5259cee289ee84 Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Thu, 18 Sep 2025 15:19:45 -0400 Subject: [PATCH 6/8] Added logging message --- src/databricks/labs/ucx/hive_metastore/table_migrate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index 74fbcb3dca..9aa5c3250d 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -404,6 +404,7 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: table_location.compressed(), table_location.properties(), ) + logger.info(f"Updating table {src_table.name} location from {src_table.location} to {new_table_location}") if 'collation' in dir(old_table): new_table = self._catalog_table( old_table.identifier(), From d979493a05f5ab7f9967c36db5768c3c318f665e Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Fri, 19 Sep 2025 10:04:25 -0400 Subject: [PATCH 7/8] Addressed FMT issue. --- src/databricks/labs/ucx/hive_metastore/table_migrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index 9aa5c3250d..c4599fe942 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -457,7 +457,7 @@ def _convert_wasbs_table_to_abfss(self, src_table: Table) -> bool: old_table.parentTableUuid(), old_table.isArclightTableCreation(), old_table.resourceName(), - old_table.governanceMetadata() + old_table.governanceMetadata(), ) else: new_table = self._catalog_table( From 7929f7d7ccf2c9d4652775774d1d55a63a473860 Mon Sep 17 00:00:00 2001 From: Liran Bareket Date: Wed, 8 Oct 2025 10:37:51 -0400 Subject: [PATCH 8/8] Addressed FMT issue. --- src/databricks/labs/ucx/workspace_access/groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index b16d3d726f..cd7799511a 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -929,7 +929,7 @@ def pick_owner_group(self, prompt: Prompts) -> str | None: return None if len(groups) == 1: return groups[0].display_name - group_names = [group.display_name for group in groups] + group_names = [group.display_name for group in groups if group and group.display_name] return prompt.choice("Select the group to be used as the owner group", group_names, max_attempts=3) def user_in_group(self, group_name: str, user: User) -> bool: