From 9f14975f0fca1a6342fb0a36ce81bfb133a79e4d Mon Sep 17 00:00:00 2001
From: Maksim Yegorov <997437+myegorov@users.noreply.github.com>
Date: Thu, 7 Nov 2024 20:15:39 -0500
Subject: [PATCH 1/2] GH-44626: [Java] fix SplitAndTransfer throws for empty
MapVector (#44627)
Empty MapVector.splitAndTransfer throws `java.lang.IndexOutOfBoundsException`. Details in https://github.com/apache/arrow/issues/44626
Fixed for MapVector as for other vector types in #41066
Added unit test mimicking the scenario we've observed where MapVector's offset buffer capacity is 0.
* GitHub Issue: #44626
Authored-by: Maksim Yegorov <59841139+maksimyego-db@users.noreply.github.com>
Signed-off-by: David Li
---
.../arrow/vector/complex/MapVector.java | 32 +++++-----
.../arrow/vector/TestSplitAndTransfer.java | 63 +++++++++++++++++++
2 files changed, 80 insertions(+), 15 deletions(-)
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index 5c6b9b22556..23cda8401b0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -211,23 +211,25 @@ public void splitAndTransfer(int startIndex, int length) {
startIndex,
length,
valueCount);
- final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
- final int sliceLength =
- offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.clear();
- to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
- /* splitAndTransfer offset buffer */
- for (int i = 0; i < length + 1; i++) {
- final int relativeOffset =
- offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
- to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+ if (length > 0) {
+ final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
+ final int sliceLength =
+ offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
+ to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
+ /* splitAndTransfer offset buffer */
+ for (int i = 0; i < length + 1; i++) {
+ final int relativeOffset =
+ offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
+ to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+ }
+ /* splitAndTransfer validity buffer */
+ splitAndTransferValidityBuffer(startIndex, length, to);
+ /* splitAndTransfer data buffer */
+ dataTransferPair.splitAndTransfer(startPoint, sliceLength);
+ to.lastSet = length - 1;
+ to.setValueCount(length);
}
- /* splitAndTransfer validity buffer */
- splitAndTransferValidityBuffer(startIndex, length, to);
- /* splitAndTransfer data buffer */
- dataTransferPair.splitAndTransfer(startPoint, sliceLength);
- to.lastSet = length - 1;
- to.setValueCount(length);
}
/*
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
index a3f25bc5207..adf4eba10cb 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
@@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;
+import static java.util.Arrays.asList;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -23,7 +24,9 @@
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
@@ -36,6 +39,7 @@
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
@@ -198,6 +202,65 @@ public void testWithEmptyVector() {
toDUV.clear();
}
+ @Test
+ public void testWithNullVector() {
+ int valueCount = 123;
+ int startIndex = 10;
+ NullVector fromNullVector = new NullVector("nullVector");
+ fromNullVector.setValueCount(valueCount);
+ TransferPair transferPair = fromNullVector.getTransferPair(fromNullVector.getAllocator());
+ transferPair.splitAndTransfer(startIndex, valueCount - startIndex);
+ NullVector toNullVector = (NullVector) transferPair.getTo();
+
+ assertEquals(valueCount - startIndex, toNullVector.getValueCount());
+ // no allocations to clear for NullVector
+ }
+
+ @Test
+ public void testWithZeroVector() {
+ ZeroVector fromZeroVector = new ZeroVector("zeroVector");
+ TransferPair transferPair = fromZeroVector.getTransferPair(fromZeroVector.getAllocator());
+ transferPair.splitAndTransfer(0, 0);
+ ZeroVector toZeroVector = (ZeroVector) transferPair.getTo();
+
+ assertEquals(0, toZeroVector.getValueCount());
+ // no allocations to clear for ZeroVector
+ }
+
+ @Test
+ public void testListVectorWithEmptyMapVector() {
+ // List not null>>
+ int valueCount = 1;
+ List children = new ArrayList<>();
+ children.add(new Field("key", FieldType.notNullable(new ArrowType.Utf8()), null));
+ children.add(new Field("value", FieldType.nullable(new ArrowType.Utf8()), null));
+ Field structField =
+ new Field("entries", FieldType.notNullable(ArrowType.Struct.INSTANCE), children);
+
+ Field mapField =
+ new Field("element", FieldType.notNullable(new ArrowType.Map(false)), asList(structField));
+
+ Field listField = new Field("list", FieldType.nullable(new ArrowType.List()), asList(mapField));
+
+ ListVector fromListVector = (ListVector) listField.createVector(allocator);
+ fromListVector.allocateNew();
+ fromListVector.setValueCount(valueCount);
+
+ // child vector is empty
+ MapVector dataVector = (MapVector) fromListVector.getDataVector();
+ dataVector.allocateNew();
+ // unset capacity to mimic observed failure mode
+ dataVector.getOffsetBuffer().capacity(0);
+
+ TransferPair transferPair = fromListVector.getTransferPair(fromListVector.getAllocator());
+ transferPair.splitAndTransfer(0, valueCount);
+ ListVector toListVector = (ListVector) transferPair.getTo();
+
+ assertEquals(valueCount, toListVector.getValueCount());
+ fromListVector.clear();
+ toListVector.clear();
+ }
+
@Test /* VarCharVector */
public void test() throws Exception {
try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
From f6dd9af549117a3b5dbf41a5720f8ab2d07f0015 Mon Sep 17 00:00:00 2001
From: Maksim Yegorov <997437+myegorov@users.noreply.github.com>
Date: Tue, 5 Nov 2024 19:07:06 -0500
Subject: [PATCH 2/2] GH-44344: [Java] fix VectorSchemaRoot.getTransferPair for
NullVector (#44631)
Do not throw [UnsupportedOperationException("Tried to get allocator from NullVector")](https://github.com/apache/arrow/blob/release-18.0.0-rc0/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java#L160) from [VectorSchemaRoot.slice()](https://github.com/apache/arrow/blob/release-18.0.0-rc0/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java#L341) when slicing a VSR containing a NullVector or ZeroVector. Details in https://github.com/apache/arrow/issues/44344
Added unit test that would trigger an UnsupportedOperationException on the legacy path.
* GitHub Issue: #44344
Authored-by: Maksim Yegorov <59841139+maksimyego-db@users.noreply.github.com>
Signed-off-by: David Li
---
.../main/java/org/apache/arrow/vector/NullVector.java | 9 ++++++++-
.../main/java/org/apache/arrow/vector/ValueVector.java | 6 ++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
index 227ca716f63..6bfe540d232 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
@@ -155,9 +155,16 @@ public boolean allocateNewSafe() {
@Override
public void reAlloc() {}
+ /*
+ * IMPORTANT NOTE
+ * It's essential that NullVector (and ZeroVector) do not require BufferAllocator for any data storage.
+ * However, some methods of the parent interface may require passing in a BufferAllocator, even if null.
+ *
+ * @return null
+ */
@Override
public BufferAllocator getAllocator() {
- throw new UnsupportedOperationException("Tried to get allocator from NullVector");
+ return null;
}
@Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
index 724941aa2a1..0a45409eb98 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
@@ -80,6 +80,12 @@ public interface ValueVector extends Closeable, Iterable {
*/
void reAlloc();
+ /**
+ * Get the allocator associated with the vector. CAVEAT: Some ValueVector subclasses (e.g.
+ * NullVector) do not require an allocator for data storage and may return null.
+ *
+ * @return Returns nullable allocator.
+ */
BufferAllocator getAllocator();
/**