From 9f14975f0fca1a6342fb0a36ce81bfb133a79e4d Mon Sep 17 00:00:00 2001 From: Maksim Yegorov <997437+myegorov@users.noreply.github.com> Date: Thu, 7 Nov 2024 20:15:39 -0500 Subject: [PATCH 1/2] GH-44626: [Java] fix SplitAndTransfer throws for empty MapVector (#44627) Empty MapVector.splitAndTransfer throws `java.lang.IndexOutOfBoundsException`. Details in https://github.com/apache/arrow/issues/44626 Fixed for MapVector as for other vector types in #41066 Added unit test mimicking the scenario we've observed where MapVector's offset buffer capacity is 0. * GitHub Issue: #44626 Authored-by: Maksim Yegorov <59841139+maksimyego-db@users.noreply.github.com> Signed-off-by: David Li --- .../arrow/vector/complex/MapVector.java | 32 +++++----- .../arrow/vector/TestSplitAndTransfer.java | 63 +++++++++++++++++++ 2 files changed, 80 insertions(+), 15 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 5c6b9b22556..23cda8401b0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -211,23 +211,25 @@ public void splitAndTransfer(int startIndex, int length) { startIndex, length, valueCount); - final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); - final int sliceLength = - offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; to.clear(); - to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); - /* splitAndTransfer offset buffer */ - for (int i = 0; i < length + 1; i++) { - final int relativeOffset = - offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); + if (length > 0) { + final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); + final int sliceLength = + offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; + to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); + /* splitAndTransfer offset buffer */ + for (int i = 0; i < length + 1; i++) { + final int relativeOffset = + offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; + to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); + } + /* splitAndTransfer validity buffer */ + splitAndTransferValidityBuffer(startIndex, length, to); + /* splitAndTransfer data buffer */ + dataTransferPair.splitAndTransfer(startPoint, sliceLength); + to.lastSet = length - 1; + to.setValueCount(length); } - /* splitAndTransfer validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - /* splitAndTransfer data buffer */ - dataTransferPair.splitAndTransfer(startPoint, sliceLength); - to.lastSet = length - 1; - to.setValueCount(length); } /* diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index a3f25bc5207..adf4eba10cb 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector; +import static java.util.Arrays.asList; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -23,7 +24,9 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -36,6 +39,7 @@ import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; @@ -198,6 +202,65 @@ public void testWithEmptyVector() { toDUV.clear(); } + @Test + public void testWithNullVector() { + int valueCount = 123; + int startIndex = 10; + NullVector fromNullVector = new NullVector("nullVector"); + fromNullVector.setValueCount(valueCount); + TransferPair transferPair = fromNullVector.getTransferPair(fromNullVector.getAllocator()); + transferPair.splitAndTransfer(startIndex, valueCount - startIndex); + NullVector toNullVector = (NullVector) transferPair.getTo(); + + assertEquals(valueCount - startIndex, toNullVector.getValueCount()); + // no allocations to clear for NullVector + } + + @Test + public void testWithZeroVector() { + ZeroVector fromZeroVector = new ZeroVector("zeroVector"); + TransferPair transferPair = fromZeroVector.getTransferPair(fromZeroVector.getAllocator()); + transferPair.splitAndTransfer(0, 0); + ZeroVector toZeroVector = (ZeroVector) transferPair.getTo(); + + assertEquals(0, toZeroVector.getValueCount()); + // no allocations to clear for ZeroVector + } + + @Test + public void testListVectorWithEmptyMapVector() { + // List not null>> + int valueCount = 1; + List children = new ArrayList<>(); + children.add(new Field("key", FieldType.notNullable(new ArrowType.Utf8()), null)); + children.add(new Field("value", FieldType.nullable(new ArrowType.Utf8()), null)); + Field structField = + new Field("entries", FieldType.notNullable(ArrowType.Struct.INSTANCE), children); + + Field mapField = + new Field("element", FieldType.notNullable(new ArrowType.Map(false)), asList(structField)); + + Field listField = new Field("list", FieldType.nullable(new ArrowType.List()), asList(mapField)); + + ListVector fromListVector = (ListVector) listField.createVector(allocator); + fromListVector.allocateNew(); + fromListVector.setValueCount(valueCount); + + // child vector is empty + MapVector dataVector = (MapVector) fromListVector.getDataVector(); + dataVector.allocateNew(); + // unset capacity to mimic observed failure mode + dataVector.getOffsetBuffer().capacity(0); + + TransferPair transferPair = fromListVector.getTransferPair(fromListVector.getAllocator()); + transferPair.splitAndTransfer(0, valueCount); + ListVector toListVector = (ListVector) transferPair.getTo(); + + assertEquals(valueCount, toListVector.getValueCount()); + fromListVector.clear(); + toListVector.clear(); + } + @Test /* VarCharVector */ public void test() throws Exception { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { From f6dd9af549117a3b5dbf41a5720f8ab2d07f0015 Mon Sep 17 00:00:00 2001 From: Maksim Yegorov <997437+myegorov@users.noreply.github.com> Date: Tue, 5 Nov 2024 19:07:06 -0500 Subject: [PATCH 2/2] GH-44344: [Java] fix VectorSchemaRoot.getTransferPair for NullVector (#44631) Do not throw [UnsupportedOperationException("Tried to get allocator from NullVector")](https://github.com/apache/arrow/blob/release-18.0.0-rc0/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java#L160) from [VectorSchemaRoot.slice()](https://github.com/apache/arrow/blob/release-18.0.0-rc0/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java#L341) when slicing a VSR containing a NullVector or ZeroVector. Details in https://github.com/apache/arrow/issues/44344 Added unit test that would trigger an UnsupportedOperationException on the legacy path. * GitHub Issue: #44344 Authored-by: Maksim Yegorov <59841139+maksimyego-db@users.noreply.github.com> Signed-off-by: David Li --- .../main/java/org/apache/arrow/vector/NullVector.java | 9 ++++++++- .../main/java/org/apache/arrow/vector/ValueVector.java | 6 ++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java index 227ca716f63..6bfe540d232 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java @@ -155,9 +155,16 @@ public boolean allocateNewSafe() { @Override public void reAlloc() {} + /* + * IMPORTANT NOTE + * It's essential that NullVector (and ZeroVector) do not require BufferAllocator for any data storage. + * However, some methods of the parent interface may require passing in a BufferAllocator, even if null. + * + * @return null + */ @Override public BufferAllocator getAllocator() { - throw new UnsupportedOperationException("Tried to get allocator from NullVector"); + return null; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 724941aa2a1..0a45409eb98 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -80,6 +80,12 @@ public interface ValueVector extends Closeable, Iterable { */ void reAlloc(); + /** + * Get the allocator associated with the vector. CAVEAT: Some ValueVector subclasses (e.g. + * NullVector) do not require an allocator for data storage and may return null. + * + * @return Returns nullable allocator. + */ BufferAllocator getAllocator(); /**