Skip to content

Commit 3be6244

Browse files
Fix failure when reading deep or shallow cloned Delta Lake tables.
1 parent 03eedce commit 3be6244

File tree

13 files changed

+150
-1
lines changed

13 files changed

+150
-1
lines changed

plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public record CheckpointMetadataEntry(long version, Optional<Map<String, String>
3333

3434
public CheckpointMetadataEntry
3535
{
36-
checkArgument(version > 0, "version is not positive: %s", version);
36+
checkArgument(version >= 0, "version is negative: %s", version);
3737
requireNonNull(tags, "tags is null");
3838
tags = tags.map(ImmutableMap::copyOf);
3939
}

plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2766,6 +2766,28 @@ private static MetadataEntry loadMetadataEntry(long entryNumber, Path tableLocat
27662766
return transactionLog.getMetaData();
27672767
}
27682768

2769+
@Test
2770+
public void testClonedTableWithCheckpointVersionZero()
2771+
throws Exception
2772+
{
2773+
testClonedTableWithCheckpointVersionZero("databricks154/clone_checkpoint_version_zero/checkpoint_v2/cloned_table");
2774+
}
2775+
2776+
private void testClonedTableWithCheckpointVersionZero(String resourceName)
2777+
throws Exception
2778+
{
2779+
String tableName = "test_cloned_table" + randomNameSuffix();
2780+
Path tableLocation = catalogDir.resolve(tableName);
2781+
copyDirectoryContents(new File(Resources.getResource(resourceName).toURI()).toPath(), tableLocation);
2782+
assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')".formatted(tableName, tableLocation.toUri()));
2783+
2784+
assertThat(query("SELECT * FROM " + tableName + " ORDER BY id")).matches("VALUES " +
2785+
"(1, VARCHAR 'Alice', 25), " +
2786+
"(2, VARCHAR 'Bob', 30), " +
2787+
"(3, VARCHAR 'Charlie', 28)");
2788+
assertUpdate("DROP TABLE " + tableName);
2789+
}
2790+
27692791
private static ProtocolEntry loadProtocolEntry(long entryNumber, Path tableLocation)
27702792
throws IOException
27712793
{
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.deltalake.transactionlog;
15+
16+
import com.google.common.collect.ImmutableMap;
17+
import io.airlift.json.JsonCodec;
18+
import org.intellij.lang.annotations.Language;
19+
import org.junit.jupiter.api.Test;
20+
21+
import java.util.Optional;
22+
23+
import static org.assertj.core.api.Assertions.assertThat;
24+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
25+
26+
class TestCheckpointMetadataEntry
27+
{
28+
private final JsonCodec<CheckpointMetadataEntry> codec = JsonCodec.jsonCodec(CheckpointMetadataEntry.class);
29+
30+
@Test
31+
void testCheckpointMetadataEntry()
32+
{
33+
@Language("JSON")
34+
String json = "{\"version\":5,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
35+
assertThat(codec.fromJson(json)).isEqualTo(new CheckpointMetadataEntry(
36+
5,
37+
Optional.of(ImmutableMap.of(
38+
"sidecarNumActions", "1",
39+
"sidecarSizeInBytes", "20965",
40+
"numOfAddFiles", "1",
41+
"sidecarFileSchema", ""))));
42+
43+
@Language("JSON")
44+
String jsonWithVersionZero = "{\"version\":0,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
45+
assertThat(codec.fromJson(jsonWithVersionZero)).isEqualTo(new CheckpointMetadataEntry(
46+
0,
47+
Optional.of(ImmutableMap.of(
48+
"sidecarNumActions", "1",
49+
"sidecarSizeInBytes", "20965",
50+
"numOfAddFiles", "1",
51+
"sidecarFileSchema", ""))));
52+
}
53+
54+
@Test
55+
void testInvalidCheckpointMetadataEntry()
56+
{
57+
@Language("JSON")
58+
String jsonWithNegativeVersion = "{\"version\":-1,\"tags\":{\"sidecarNumActions\":\"1\",\"sidecarSizeInBytes\":\"20965\",\"numOfAddFiles\":\"1\",\"sidecarFileSchema\":\"\"}}";
59+
assertThatThrownBy(() -> codec.fromJson(jsonWithNegativeVersion))
60+
.isInstanceOf(IllegalArgumentException.class)
61+
.hasMessageContaining("Invalid JSON string for");
62+
63+
@Language("JSON")
64+
String jsonWithoutTags = "{\"version\":-1}";
65+
assertThatThrownBy(() -> codec.fromJson(jsonWithoutTags))
66+
.isInstanceOf(IllegalArgumentException.class)
67+
.hasMessageContaining("Invalid JSON string for");
68+
}
69+
70+
@Test
71+
void testCheckpointMetadataEntryToJson()
72+
{
73+
assertThat(codec.toJson(new CheckpointMetadataEntry(
74+
100,
75+
Optional.of(ImmutableMap.of(
76+
"sidecarNumActions", "1",
77+
"sidecarSizeInBytes", "20965",
78+
"numOfAddFiles", "1",
79+
"sidecarFileSchema", "")))))
80+
.isEqualTo("{\n" +
81+
" \"version\" : 100,\n" +
82+
" \"tags\" : {\n" +
83+
" \"sidecarNumActions\" : \"1\",\n" +
84+
" \"sidecarSizeInBytes\" : \"20965\",\n" +
85+
" \"numOfAddFiles\" : \"1\",\n" +
86+
" \"sidecarFileSchema\" : \"\"\n" +
87+
" }\n" +
88+
"}");
89+
}
90+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Data generated using Databricks 15.4:
2+
3+
```sql
4+
CREATE TABLE source_table (
5+
id INT,
6+
name STRING,
7+
age INT
8+
)
9+
USING DELTA
10+
TBLPROPERTIES (
11+
'delta.checkpointPolicy' = 'v2'
12+
);
13+
14+
INSERT INTO source_table VALUES
15+
(1, 'Alice', 25),
16+
(2, 'Bob', 30),
17+
(3, 'Charlie', 28);
18+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"commitInfo":{"timestamp":1761436420945,"userId":"user1","userName":"user1","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"true","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"2\",\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"70e732d3-dd95-4615-b187-d3862aa1c181"}}
2+
{"metaData":{"id":"b916c720-895c-4ccd-8b74-5a52754d3e26","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"commitInfo":{"timestamp":1761436433491,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"1112"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"482c7b06-a45c-4d4e-be3b-cf4314f4afc5"}}
2+
{"add":{"path":"part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761436433000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761436433000000","MIN_INSERTION_TIME":"1761436433000000","MAX_INSERTION_TIME":"1761436433000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Data generated using Databricks 15.4:
2+
3+
```sql
4+
CREATE TABLE cloned_table DEEP CLONE source_table;
5+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"checkpointMetadata":{"version":0,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"13505","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"tightBounds\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}}
2+
{"sidecar":{"path":"00000000000000000000.checkpoint.0000000001.0000000001.7be61843-e74d-45d2-8db5-4b4e56714412.parquet","sizeInBytes":13505,"modificationTime":1761436515000}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}
4+
{"metaData":{"id":"45c721d6-56cd-47b6-bcb6-ccdeef56ce80","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"commitInfo":{"timestamp":1761436510659,"userId":"user1","userName":"user1","operation":"CLONE","operationParameters":{"source":"source_table","sourceVersion":1,"isShallow":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":-1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"removedFilesSize":"0","numRemovedFiles":"0","sourceTableSize":"1112","numCopiedFiles":"1","copiedFilesSize":"1112","sourceNumOfFiles":"1"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"a073c2c4-75c9-4ff8-bb73-fbf49115cf88"}}
2+
{"metaData":{"id":"45c721d6-56cd-47b6-bcb6-ccdeef56ce80","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"2","delta.enableDeletionVectors":"true"},"createdTime":1761436420654}}
3+
{"add":{"path":"part-00000-d47cc824-9a87-40c6-9e6b-528d933a30f9-c000.snappy.parquet","partitionValues":{},"size":1112,"modificationTime":1761436433000,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"id\":1,\"name\":\"Alice\",\"age\":25},\"maxValues\":{\"id\":3,\"name\":\"Charlie\",\"age\":30},\"nullCount\":{\"id\":0,\"name\":0,\"age\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761436433000000","MIN_INSERTION_TIME":"1761436433000000","MAX_INSERTION_TIME":"1761436433000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
4+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","v2Checkpoint"],"writerFeatures":["deletionVectors","v2Checkpoint"]}}

0 commit comments

Comments
 (0)