Skip to content

Commit 9027663

Browse files
Fix failure when reading deep or shallow cloned Delta Lake tables.
1 parent 03eedce commit 9027663

18 files changed

+183
-1
lines changed

plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public record CheckpointMetadataEntry(long version, Optional<Map<String, String>
3333

3434
public CheckpointMetadataEntry
3535
{
36-
checkArgument(version > 0, "version is not positive: %s", version);
36+
checkArgument(version >= 0, "version is negative: %s", version);
3737
requireNonNull(tags, "tags is null");
3838
tags = tags.map(ImmutableMap::copyOf);
3939
}

plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
import static io.trino.testing.TestingNames.randomNameSuffix;
114114
import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER;
115115
import static java.lang.String.format;
116+
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
116117
import static java.time.ZoneOffset.UTC;
117118
import static org.assertj.core.api.Assertions.assertThat;
118119
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -2766,6 +2767,46 @@ private static MetadataEntry loadMetadataEntry(long entryNumber, Path tableLocat
27662767
return transactionLog.getMetaData();
27672768
}
27682769

2770+
@Test
2771+
public void testDeepClonedTableWithCheckpointVersionZero()
2772+
throws Exception
2773+
{
2774+
String resource = "databricks154/clone_checkpoint_version_zero/checkpoint_v2/deep_cloned_table";
2775+
String tableName = "test_deep_cloned_table" + randomNameSuffix();
2776+
Path tableLocation = catalogDir.resolve(tableName);
2777+
copyDirectoryContents(new File(Resources.getResource(resource).toURI()).toPath(), tableLocation);
2778+
assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')".formatted(tableName, tableLocation.toUri()));
2779+
2780+
assertThat(query("SELECT * FROM " + tableName + " ORDER BY id"))
2781+
.matches("VALUES " +
2782+
"(1, VARCHAR 'Alice', 25), " +
2783+
"(2, VARCHAR 'Bob', 30), " +
2784+
"(3, VARCHAR 'Charlie', 28)");
2785+
assertUpdate("DROP TABLE " + tableName);
2786+
}
2787+
2788+
@Test
2789+
public void testShallowClonedTableWithCheckpointVersionZero()
2790+
throws Exception
2791+
{
2792+
String resource = "databricks154/clone_checkpoint_version_zero/checkpoint_v2/shallow_cloned_table";
2793+
String dataFileResource = "databricks154/clone_checkpoint_version_zero/checkpoint_v2/clone_source/part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet";
2794+
String tableName = "test_shallow_cloned_table" + randomNameSuffix();
2795+
Path tableLocation = catalogDir.resolve(tableName);
2796+
Path dataFilePath = new File(Resources.getResource(dataFileResource).toURI()).toPath();
2797+
Path targetFilePath = tableLocation.resolve("part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet");
2798+
copyDirectoryContents(new File(Resources.getResource(resource).toURI()).toPath(), tableLocation);
2799+
Files.copy(dataFilePath, targetFilePath, REPLACE_EXISTING);
2800+
assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')".formatted(tableName, tableLocation.toUri()));
2801+
2802+
assertThat(query("SELECT * FROM " + tableName + " ORDER BY id"))
2803+
.matches("VALUES " +
2804+
"(1, VARCHAR 'Alice', 25), " +
2805+
"(2, VARCHAR 'Bob', 30), " +
2806+
"(3, VARCHAR 'Charlie', 28)");
2807+
assertUpdate("DROP TABLE " + tableName);
2808+
}
2809+
27692810
private static ProtocolEntry loadProtocolEntry(long entryNumber, Path tableLocation)
27702811
throws IOException
27712812
{
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.deltalake.transactionlog;
15+
16+
import com.google.common.collect.ImmutableMap;
17+
import io.airlift.json.JsonCodec;
18+
import org.intellij.lang.annotations.Language;
19+
import org.junit.jupiter.api.Test;
20+
21+
import java.util.Optional;
22+
23+
import static org.assertj.core.api.Assertions.assertThat;
24+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
25+
26+
final class TestCheckpointMetadataEntry
27+
{
28+
private final JsonCodec<CheckpointMetadataEntry> codec = JsonCodec.jsonCodec(CheckpointMetadataEntry.class);
29+
30+
@Test
31+
void testCheckpointMetadataEntry()
32+
{
33+
@Language("JSON")
34+
String json = "{\"version\":5,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
35+
assertThat(codec.fromJson(json)).isEqualTo(new CheckpointMetadataEntry(
36+
5,
37+
Optional.of(ImmutableMap.of(
38+
"sidecarNumActions", "1",
39+
"sidecarSizeInBytes", "20965",
40+
"numOfAddFiles", "1",
41+
"sidecarFileSchema", ""))));
42+
43+
@Language("JSON")
44+
String jsonWithVersionZero = "{\"version\":0,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
45+
assertThat(codec.fromJson(jsonWithVersionZero)).isEqualTo(new CheckpointMetadataEntry(
46+
0,
47+
Optional.of(ImmutableMap.of(
48+
"sidecarNumActions", "1",
49+
"sidecarSizeInBytes", "20965",
50+
"numOfAddFiles", "1",
51+
"sidecarFileSchema", ""))));
52+
}
53+
54+
@Test
55+
void testInvalidCheckpointMetadataEntry()
56+
{
57+
@Language("JSON")
58+
String jsonWithNegativeVersion = "{\"version\":-1,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
59+
assertThatThrownBy(() -> codec.fromJson(jsonWithNegativeVersion))
60+
.isInstanceOf(IllegalArgumentException.class)
61+
.hasMessageContaining("Invalid JSON string for");
62+
63+
@Language("JSON")
64+
String jsonWithoutTags = "{\"version\":-1}";
65+
assertThatThrownBy(() -> codec.fromJson(jsonWithoutTags))
66+
.isInstanceOf(IllegalArgumentException.class)
67+
.hasMessageContaining("Invalid JSON string for");
68+
}
69+
70+
@Test
71+
void testCheckpointMetadataEntryToJson()
72+
{
73+
assertThat(codec.toJson(new CheckpointMetadataEntry(
74+
100,
75+
Optional.of(ImmutableMap.of(
76+
"sidecarNumActions", "1",
77+
"sidecarSizeInBytes", "20965",
78+
"numOfAddFiles", "1",
79+
"sidecarFileSchema", "")))))
80+
.isEqualTo("{\n" +
81+
" \"version\" : 100,\n" +
82+
" \"tags\" : {\n" +
83+
" \"sidecarNumActions\" : \"1\",\n" +
84+
" \"sidecarSizeInBytes\" : \"20965\",\n" +
85+
" \"numOfAddFiles\" : \"1\",\n" +
86+
" \"sidecarFileSchema\" : \"\"\n" +
87+
" }\n" +
88+
"}");
89+
}
90+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Data generated using Databricks 15.4:
2+
3+
```sql
4+
CREATE TABLE source_table (
5+
id INT,
6+
name STRING,
7+
age INT
8+
)
9+
USING DELTA
10+
TBLPROPERTIES (
11+
'delta.checkpointPolicy' = 'v2'
12+
);
13+
14+
INSERT INTO source_table VALUES
15+
(1, 'Alice', 25),
16+
(2, 'Bob', 30),
17+
(3, 'Charlie', 28);
18+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"commitInfo":{"timestamp":1761436420945,"userId":"user1","userName":"user1","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"true","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"2\",\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"70e732d3-dd95-4615-b187-d3862aa1c181"}}
2+
{"metaData":{"id":"b916c720-895c-4ccd-8b74-5a52754d3e26","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"commitInfo":{"timestamp":1761436433491,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"1112"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"482c7b06-a45c-4d4e-be3b-cf4314f4afc5"}}
2+
{"add":{"path":"part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761436433000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761436433000000","MIN_INSERTION_TIME":"1761436433000000","MAX_INSERTION_TIME":"1761436433000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Data generated using Databricks 15.4:
2+
3+
```sql
4+
CREATE TABLE deep_cloned_table DEEP CLONE source_table;
5+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"checkpointMetadata":{"version":0,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"13505","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"tightBounds\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}}
2+
{"sidecar":{"path":"00000000000000000000.checkpoint.0000000001.0000000001.7be61843-e74d-45d2-8db5-4b4e56714412.parquet","sizeInBytes":13505,"modificationTime":1761436515000}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}
4+
{"metaData":{"id":"45c721d6-56cd-47b6-bcb6-ccdeef56ce80","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"commitInfo":{"timestamp":1761436510659,"userId":"user1","userName":"user1","operation":"CLONE","operationParameters":{"source":"source_table","sourceVersion":1,"isShallow":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":-1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"removedFilesSize":"0","numRemovedFiles":"0","sourceTableSize":"1112","numCopiedFiles":"1","copiedFilesSize":"1112","sourceNumOfFiles":"1"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"a073c2c4-75c9-4ff8-bb73-fbf49115cf88"}}
2+
{"metaData":{"id":"45c721d6-56cd-47b6-bcb6-ccdeef56ce80","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
3+
{"add":{"path":"part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761436433000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761436433000000","MIN_INSERTION_TIME":"1761436433000000","MAX_INSERTION_TIME":"1761436433000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
4+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}

0 commit comments

Comments
 (0)