-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Allow time travel reads to Hudi Tables #27140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |||||
|
|
||||||
| import com.google.common.collect.ImmutableList; | ||||||
| import com.google.common.collect.ImmutableMap; | ||||||
| import io.airlift.slice.Slice; | ||||||
| import io.trino.filesystem.Location; | ||||||
| import io.trino.filesystem.TrinoFileSystemFactory; | ||||||
| import io.trino.metastore.Column; | ||||||
|
|
@@ -33,6 +34,7 @@ | |||||
| import io.trino.spi.connector.ConnectorTableVersion; | ||||||
| import io.trino.spi.connector.Constraint; | ||||||
| import io.trino.spi.connector.ConstraintApplicationResult; | ||||||
| import io.trino.spi.connector.PointerType; | ||||||
| import io.trino.spi.connector.RelationColumnsMetadata; | ||||||
| import io.trino.spi.connector.SchemaTableName; | ||||||
| import io.trino.spi.connector.SchemaTablePrefix; | ||||||
|
|
@@ -104,17 +106,14 @@ public List<String> listSchemaNames(ConnectorSession session) | |||||
| @Override | ||||||
| public HudiTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional<ConnectorTableVersion> startVersion, Optional<ConnectorTableVersion> endVersion) | ||||||
| { | ||||||
| if (startVersion.isPresent() || endVersion.isPresent()) { | ||||||
| throw new TrinoException(NOT_SUPPORTED, "This connector does not support versioned tables"); | ||||||
| } | ||||||
|
|
||||||
| if (isHiveSystemSchema(tableName.getSchemaName())) { | ||||||
| return null; | ||||||
| } | ||||||
| Optional<Table> table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()); | ||||||
| if (table.isEmpty()) { | ||||||
| return null; | ||||||
| } | ||||||
| Optional<String> readVersion = getReadVersion(startVersion, endVersion); | ||||||
| if (!isHudiTable(table.get())) { | ||||||
| throw new TrinoException(UNSUPPORTED_TABLE_TYPE, format("Not a Hudi table: %s", tableName)); | ||||||
| } | ||||||
|
|
@@ -130,7 +129,30 @@ public HudiTableHandle getTableHandle(ConnectorSession session, SchemaTableName | |||||
| COPY_ON_WRITE, | ||||||
| getPartitionKeyColumnHandles(table.get(), typeManager), | ||||||
| TupleDomain.all(), | ||||||
| TupleDomain.all()); | ||||||
| TupleDomain.all(), | ||||||
| readVersion); | ||||||
| } | ||||||
|
|
||||||
| private static Optional<String> getReadVersion(Optional<ConnectorTableVersion> startVersion, Optional<ConnectorTableVersion> endVersion) | ||||||
| { | ||||||
| if (startVersion.isPresent()) { | ||||||
| throw new TrinoException(NOT_SUPPORTED, "Read table with start version is not supported"); | ||||||
| } | ||||||
|
|
||||||
| if (endVersion.isEmpty()) { | ||||||
| return Optional.empty(); | ||||||
| } | ||||||
|
|
||||||
| ConnectorTableVersion version = endVersion.get(); | ||||||
| if (version.getPointerType() == PointerType.TEMPORAL) { | ||||||
| throw new TrinoException(NOT_SUPPORTED, "Cannot read 'TIMESTAMP' of Hudi table, use 'VERSION' instead"); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, is this limitation(reading with temporal) coming from Hudi, or is it something that could be supported in the future? |
||||||
| } | ||||||
|
|
||||||
| if (version.getVersion() instanceof Slice slice) { | ||||||
| return Optional.of(slice.toStringUtf8()); | ||||||
| } | ||||||
|
|
||||||
| throw new TrinoException(NOT_SUPPORTED, "Provided read version must be a string"); | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,13 +25,15 @@ | |
| public class TestHudiConnectorTest | ||
| extends BaseConnectorTest | ||
| { | ||
| private static final long COMMIT_TIMESTAMP = 20251027183851494L; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the naming aligned with the Hudi glossary? using "commit timestamp" sounds more like a temporal read, it actually more like a "commit version" |
||
|
|
||
| @Override | ||
| protected QueryRunner createQueryRunner() | ||
| throws Exception | ||
| { | ||
| return HudiQueryRunner.builder() | ||
| .addConnectorProperty("hudi.columns-to-hide", COLUMNS_TO_HIDE) | ||
| .setDataLoader(new TpchHudiTablesInitializer(REQUIRED_TPCH_TABLES)) | ||
| .setDataLoader(new TpchHudiTablesInitializer(REQUIRED_TPCH_TABLES, String.valueOf(COMMIT_TIMESTAMP))) | ||
| .build(); | ||
| } | ||
|
|
||
|
|
@@ -88,4 +90,24 @@ public void testHideHiveSysSchema() | |
| assertThat(computeActual("SHOW SCHEMAS").getOnlyColumnAsSet()).doesNotContain("sys"); | ||
| assertQueryFails("SHOW TABLES IN hudi.sys", ".*Schema 'sys' does not exist"); | ||
| } | ||
|
|
||
| @Override | ||
| protected void verifyVersionedQueryFailurePermissible(Exception e) | ||
| { | ||
| assertThat(e) | ||
| .hasMessageMatching("Read table with start version is not supported|" + | ||
| "Cannot read 'TIMESTAMP' of Hudi table, use 'VERSION' instead|" + | ||
| "Provided read version must be a string"); | ||
| } | ||
|
|
||
| @Test | ||
| public void testSelectTableUsingTargetIdVersion() | ||
| { | ||
| String expectedValues = "VALUES 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24"; | ||
| assertThat(query("SELECT CAST(nationkey AS INT) FROM hudi.tests.nation")).matches(expectedValues); | ||
| assertThat(query("SELECT CAST(nationkey AS INT) FROM hudi.tests.nation FOR VERSION AS OF '" + COMMIT_TIMESTAMP + "'")).matches(expectedValues); | ||
| assertThat(query("SELECT CAST(nationkey AS INT) FROM hudi.tests.nation FOR VERSION AS OF '" + (COMMIT_TIMESTAMP - 1) + "'")).returnsEmptyResult(); | ||
| assertQueryFails("SELECT CAST(nationkey AS INT) FROM hudi.tests.nation FOR VERSION AS OF 0", "Provided read version must be a string"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also add a case that is negative number/string? |
||
| assertQueryFails("SELECT CAST(nationkey AS INT) FROM hudi.tests.nation FOR TIMESTAMP AS OF TIMESTAMP '2025-10-29 15:00:00'", "Cannot read 'TIMESTAMP' of Hudi table, use 'VERSION' instead"); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.