diff --git a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java index e2bbe35480b..23f5d65fbb5 100644 --- a/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java +++ b/java/memory/src/main/java/io/netty/buffer/ArrowBuf.java @@ -23,6 +23,7 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.FileChannel; import java.nio.channels.GatheringByteChannel; import java.nio.channels.ScatteringByteChannel; import java.nio.charset.Charset; @@ -493,6 +494,16 @@ public ArrowBuf retain() { return retain(1); } + @Override + public ByteBuf touch() { + return this; + } + + @Override + public ByteBuf touch(Object hint) { + return this; + } + @Override public long getLong(int index) { chk(index, 8); @@ -505,6 +516,17 @@ public float getFloat(int index) { return Float.intBitsToFloat(getInt(index)); } + /** + * Gets a 64-bit long integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public long getLongLE(int index) { + chk(index, 8); + final long v = PlatformDependent.getLong(addr(index)); + return Long.reverseBytes(v); + } + @Override public double getDouble(int index) { return Double.longBitsToDouble(getLong(index)); @@ -527,6 +549,17 @@ public int getInt(int index) { return v; } + /** + * Gets a 32-bit integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public int getIntLE(int index) { + chk(index, 4); + final int v = PlatformDependent.getInt(addr(index)); + return Integer.reverseBytes(v); + } + @Override public int getUnsignedShort(int index) { return getShort(index) & 0xFFFF; @@ -535,10 +568,44 @@ public int getUnsignedShort(int index) { @Override public short getShort(int index) { chk(index, 2); - short v = PlatformDependent.getShort(addr(index)); + final short v = PlatformDependent.getShort(addr(index)); return v; } + /** + * Gets a 16-bit short integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public short getShortLE(int index) { + final short v = PlatformDependent.getShort(addr(index)); + return Short.reverseBytes(v); + } + + /** + * Gets an unsigned 24-bit medium integer at the specified absolute + * {@code index} in this buffer. + */ + @Override + public int getUnsignedMedium(int index) { + chk(index, 3); + final long addr = addr(index); + return (PlatformDependent.getByte(addr) & 0xff) << 16 | + (PlatformDependent.getShort(addr + 1) & 0xffff); + } + + /** + * Gets an unsigned 24-bit medium integer at the specified absolute {@code index} in + * this buffer in Big Endian Byte Order. + */ + @Override + public int getUnsignedMediumLE(int index) { + chk(index, 3); + final long addr = addr(index); + return (PlatformDependent.getByte(addr) & 0xff) | + (Short.reverseBytes(PlatformDependent.getShort(addr + 1)) & 0xffff) << 8; + } + @Override public ArrowBuf setShort(int index, int value) { chk(index, 2); @@ -546,6 +613,44 @@ public ArrowBuf setShort(int index, int value) { return this; } + /** + * Sets the specified 16-bit short integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setShortLE(int index, int value) { + chk(index, 2); + PlatformDependent.putShort(addr(index), Short.reverseBytes((short) value)); + return this; + } + + /** + * Sets the specified 24-bit medium integer at the specified absolute + * {@code index} in this buffer. + */ + @Override + public ByteBuf setMedium(int index, int value) { + chk(index, 3); + final long addr = addr(index); + PlatformDependent.putByte(addr, (byte) (value >>> 16)); + PlatformDependent.putShort(addr + 1, (short) value); + return this; + } + + + /** + * Sets the specified 24-bit medium integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setMediumLE(int index, int value) { + chk(index, 3); + final long addr = addr(index); + PlatformDependent.putByte(addr, (byte) value); + PlatformDependent.putShort(addr + 1, Short.reverseBytes((short) (value >>> 8))); + return this; + } + @Override public ArrowBuf setInt(int index, int value) { chk(index, 4); @@ -553,6 +658,17 @@ public ArrowBuf setInt(int index, int value) { return this; } + /** + * Sets the specified 32-bit integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setIntLE(int index, int value) { + chk(index, 4); + PlatformDependent.putInt(addr(index), Integer.reverseBytes(value)); + return this; + } + @Override public ArrowBuf setLong(int index, long value) { chk(index, 8); @@ -560,6 +676,17 @@ public ArrowBuf setLong(int index, long value) { return this; } + /** + * Sets the specified 64-bit long integer at the specified absolute {@code index} + * in this buffer with Big Endian byte order. + */ + @Override + public ByteBuf setLongLE(int index, long value) { + chk(index, 8); + PlatformDependent.putLong(addr(index), Long.reverseBytes(value)); + return this; + } + @Override public ArrowBuf setChar(int index, int value) { chk(index, 2); @@ -668,16 +795,46 @@ protected short _getShort(int index) { return getShort(index); } + /** @see {@link #getShortLE(int)} */ + @Override + protected short _getShortLE(int index) { + return getShortLE(index); + } + @Override protected int _getInt(int index) { return getInt(index); } + /** @see {@link #getIntLE(int)} */ + @Override + protected int _getIntLE(int index) { + return getIntLE(index); + } + + /** @see {@link #getUnsignedMedium(int)} */ + @Override + protected int _getUnsignedMedium(int index) { + return getUnsignedMedium(index); + } + + /** @see {@link #getUnsignedMediumLE(int)} */ + @Override + protected int _getUnsignedMediumLE(int index) { + return getUnsignedMediumLE(index); + } + @Override protected long _getLong(int index) { return getLong(index); } + /** @see {@link #getLongLE(int)} */ + @Override + protected long _getLongLE(int index) { + return getLongLE(index); + } + @Override protected void _setByte(int index, int value) { setByte(index, value); @@ -688,21 +845,45 @@ protected void _setShort(int index, int value) { setShort(index, value); } + /** @see {@link #setShortLE(int, int)} */ + @Override + protected void _setShortLE(int index, int value) { + setShortLE(index, value); + } + @Override protected void _setMedium(int index, int value) { setMedium(index, value); } + /** @see {@link #setMediumLE(int, int)} */ + @Override + protected void _setMediumLE(int index, int value) { + setMediumLE(index, value); + } + @Override protected void _setInt(int index, int value) { setInt(index, value); } + /** @see {@link #setIntLE(int, int)} */ + @Override + protected void _setIntLE(int index, int value) { + setIntLE(index, value); + } + @Override protected void _setLong(int index, long value) { setLong(index, value); } + /** @see {@link #setLongLE(int, long)} */ + @Override + public void _setLongLE(int index, long value) { + setLongLE(index, value); + } + @Override public ArrowBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { udle.getBytes(index + offset, dst, dstIndex, length); @@ -716,16 +897,13 @@ public ArrowBuf getBytes(int index, OutputStream out, int length) throws IOExcep } @Override - protected int _getUnsignedMedium(int index) { - final long addr = addr(index); - return (PlatformDependent.getByte(addr) & 0xff) << 16 | - (PlatformDependent.getByte(addr + 1) & 0xff) << 8 | - PlatformDependent.getByte(addr + 2) & 0xff; + public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { + return udle.getBytes(index + offset, out, length); } @Override - public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { - return udle.getBytes(index + offset, out, length); + public int getBytes(int index, FileChannel out, long position, int length) throws IOException { + return udle.getBytes(index + offset, out, position, length); } @Override @@ -776,6 +954,11 @@ public int setBytes(int index, ScatteringByteChannel in, int length) throws IOEx return udle.setBytes(index + offset, in, length); } + @Override + public int setBytes(int index, FileChannel in, long position, int length) throws IOException { + return udle.setBytes(index + offset, in, position, length); + } + @Override public byte getByte(int index) { chk(index, 1); diff --git a/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java b/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java index a5683adccbc..f0bc84cdc2d 100644 --- a/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java +++ b/java/memory/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java @@ -23,9 +23,12 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.FileChannel; import java.nio.channels.GatheringByteChannel; import java.nio.channels.ScatteringByteChannel; +import io.netty.util.ByteProcessor; + /** * This is basically a complete copy of DuplicatedByteBuf. We copy because we want to override * some behaviors and make @@ -128,6 +131,16 @@ protected short _getShort(int index) { return buffer.getShort(index); } + @Override + public short getShortLE(int index) { + return buffer.getShortLE(index); + } + + @Override + protected short _getShortLE(int index) { + return buffer.getShortLE(index); + } + @Override public int getUnsignedMedium(int index) { return _getUnsignedMedium(index); @@ -138,6 +151,16 @@ protected int _getUnsignedMedium(int index) { return buffer.getUnsignedMedium(index); } + @Override + public int getUnsignedMediumLE(int index) { + return buffer.getUnsignedMediumLE(index); + } + + @Override + protected int _getUnsignedMediumLE(int index) { + return buffer.getUnsignedMediumLE(index); + } + @Override public int getInt(int index) { return _getInt(index); @@ -148,6 +171,16 @@ protected int _getInt(int index) { return buffer.getInt(index); } + @Override + public int getIntLE(int index) { + return buffer.getIntLE(index); + } + + @Override + protected int _getIntLE(int index) { + return buffer.getIntLE(index); + } + @Override public long getLong(int index) { return _getLong(index); @@ -158,6 +191,16 @@ protected long _getLong(int index) { return buffer.getLong(index); } + @Override + public long getLongLE(int index) { + return buffer.getLongLE(index); + } + + @Override + protected long _getLongLE(int index) { + return buffer.getLongLE(index); + } + @Override public abstract ByteBuf copy(int index, int length); @@ -206,6 +249,17 @@ protected void _setShort(int index, int value) { buffer.setShort(index, value); } + @Override + public ByteBuf setShortLE(int index, int value) { + buffer.setShortLE(index, value); + return this; + } + + @Override + protected void _setShortLE(int index, int value) { + buffer.setShortLE(index, value); + } + @Override public ByteBuf setMedium(int index, int value) { _setMedium(index, value); @@ -217,6 +271,17 @@ protected void _setMedium(int index, int value) { buffer.setMedium(index, value); } + @Override + public ByteBuf setMediumLE(int index, int value) { + buffer.setMediumLE(index, value); + return this; + } + + @Override + protected void _setMediumLE(int index, int value) { + buffer.setMediumLE(index, value); + } + @Override public ByteBuf setInt(int index, int value) { _setInt(index, value); @@ -228,6 +293,17 @@ protected void _setInt(int index, int value) { buffer.setInt(index, value); } + @Override + public ByteBuf setIntLE(int index, int value) { + buffer.setIntLE(index, value); + return this; + } + + @Override + protected void _setIntLE(int index, int value) { + buffer.setIntLE(index, value); + } + @Override public ByteBuf setLong(int index, long value) { _setLong(index, value); @@ -239,6 +315,17 @@ protected void _setLong(int index, long value) { buffer.setLong(index, value); } + @Override + public ByteBuf setLongLE(int index, long value) { + buffer.setLongLE(index, value); + return this; + } + + @Override + protected void _setLongLE(int index, long value) { + buffer.setLongLE(index, value); + } + @Override public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { buffer.setBytes(index, src, srcIndex, length); @@ -257,6 +344,12 @@ public ByteBuf setBytes(int index, ByteBuffer src) { return this; } + @Override + public int setBytes(int index, FileChannel in, long position, int length) + throws IOException { + return buffer.setBytes(index, in, position, length); + } + @Override public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { @@ -282,6 +375,13 @@ public int setBytes(int index, ScatteringByteChannel in, int length) return buffer.setBytes(index, in, length); } + + @Override + public int getBytes(int index, FileChannel out, long position, int length) + throws IOException { + return buffer.getBytes(index, out, position, length); + } + @Override public int nioBufferCount() { return buffer.nioBufferCount(); @@ -298,12 +398,12 @@ public ByteBuffer internalNioBuffer(int index, int length) { } @Override - public int forEachByte(int index, int length, ByteBufProcessor processor) { + public int forEachByte(int index, int length, ByteProcessor processor) { return buffer.forEachByte(index, length, processor); } @Override - public int forEachByteDesc(int index, int length, ByteBufProcessor processor) { + public int forEachByteDesc(int index, int length, ByteProcessor processor) { return buffer.forEachByteDesc(index, length, processor); } @@ -312,6 +412,18 @@ public final int refCnt() { return unwrap().refCnt(); } + @Override + public final ByteBuf touch() { + unwrap().touch(); + return this; + } + + @Override + public final ByteBuf touch(Object hint) { + unwrap().touch(hint); + return this; + } + @Override public final ByteBuf retain() { unwrap().retain(); diff --git a/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java b/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java index 5bd6b9fe379..ed2184aadab 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/Accountant.java @@ -219,6 +219,15 @@ public long getLimit() { return allocationLimit.get(); } + /** + * Return the initial reservation. + * + * @return reservation in bytes. + */ + public long getInitReservation() { + return reservation; + } + /** * Set the maximum amount of memory that can be allocated in the this Accountant before failing * an allocation. diff --git a/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java b/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java index b8b5283423c..94102992139 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java @@ -18,8 +18,8 @@ package org.apache.arrow.memory; +import io.netty.buffer.AbstractByteBufAllocator; import io.netty.buffer.ByteBuf; -import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.CompositeByteBuf; import io.netty.buffer.ExpandableByteBuf; @@ -32,7 +32,7 @@ * otherwise non-expandable * ArrowBufs to be expandable. */ -public class ArrowByteBufAllocator implements ByteBufAllocator { +public class ArrowByteBufAllocator extends AbstractByteBufAllocator { private static final int DEFAULT_BUFFER_SIZE = 4096; private static final int DEFAULT_MAX_COMPOSITE_COMPONENTS = 16; @@ -142,8 +142,17 @@ public CompositeByteBuf compositeHeapBuffer(int maxNumComponents) { throw fail(); } + @Override + protected ByteBuf newHeapBuffer(int initialCapacity, int maxCapacity) { + throw fail(); + } + + @Override + protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) { + return buffer(initialCapacity, maxCapacity); + } + private RuntimeException fail() { throw new UnsupportedOperationException("Allocator doesn't support heap-based memory."); } - } diff --git a/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java b/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java index b23a6e4bd85..a5da50e6278 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/BufferAllocator.java @@ -91,6 +91,13 @@ public interface BufferAllocator extends AutoCloseable { */ public long getLimit(); + /** + * Return the initial reservation. + * + * @return reservation in bytes. + */ + public long getInitReservation(); + /** * Set the maximum amount of memory this allocator is allowed to allocate. * diff --git a/java/pom.xml b/java/pom.xml index 0a0f2e0ce8f..384ef56882f 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -32,7 +32,7 @@ 4.11 1.7.25 18.0 - 4.0.49.Final + 4.1.17.Final 2.7.9 2.7.1 1.2.0-3f79e055 diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index 853f67fd0dd..fce6876025a 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -67,6 +67,10 @@ public void write(${name}Holder holder) { public void write${minor.class}(${friendlyType} value) { fail("${name}"); } + + public void writeBigEndianBytesToDecimal(byte[] value) { + fail("${name}"); + } diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 228c2c531f9..7f4a13d4f06 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -16,6 +16,8 @@ * limitations under the License. */ +import io.netty.buffer.ArrowBuf; +import org.apache.arrow.vector.types.Types; import org.apache.drill.common.types.TypeProtos.MinorType; <@pp.dropOutputFile /> @@ -82,6 +84,12 @@ public void write(${name}Holder holder) { getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); } + <#if minor.class == "Decimal"> + public void writeBigEndianBytesToDecimal(byte[] value) { + getWriter(Types.MinorType.DECIMAL).writeBigEndianBytesToDecimal(value); + } + + public void writeNull() { } diff --git a/java/vector/src/main/codegen/templates/ComplexWriters.java b/java/vector/src/main/codegen/templates/ComplexWriters.java index 406bbb39c7f..8cad12ac318 100644 --- a/java/vector/src/main/codegen/templates/ComplexWriters.java +++ b/java/vector/src/main/codegen/templates/ComplexWriters.java @@ -120,6 +120,11 @@ public void write(Nullable${minor.class}Holder h) { vector.setSafe(idx(), value); vector.setValueCount(idx()+1); } + + public void writeBigEndianBytesToDecimal(byte[] value) { + vector.setBigEndianSafe(idx(), value); + vector.setValueCount(idx()+1); + } <#if mode == "Nullable"> @@ -148,6 +153,8 @@ public interface ${eName}Writer extends BaseWriter { <#if minor.class == "Decimal"> public void write${minor.class}(${friendlyType} value); + + public void writeBigEndianBytesToDecimal(byte[] value); } diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index e44edbd47b6..354afd26acc 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -283,6 +283,7 @@ private void reallocTypeBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/codegen/templates/VariableLengthVectors.java b/java/vector/src/main/codegen/templates/VariableLengthVectors.java index 3934e74f11b..4d0342b8db5 100644 --- a/java/vector/src/main/codegen/templates/VariableLengthVectors.java +++ b/java/vector/src/main/codegen/templates/VariableLengthVectors.java @@ -18,6 +18,7 @@ import java.lang.Override; +import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.drill.exec.exception.OutOfMemoryException; import org.apache.drill.exec.vector.BaseDataValueVector; import org.apache.drill.exec.vector.BaseValueVector; @@ -301,7 +302,18 @@ public void setInitialCapacity(final int valueCount) { allocationSizeInBytes = (int)size; offsetVector.setInitialCapacity(valueCount + 1); } + + @Override + public void setInitialCapacity(int valueCount, double density) { + long size = (long) (valueCount * density); + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size"); + } + allocationSizeInBytes = (int)size; + offsetVector.setInitialCapacity(valueCount + 1); + } + @Override public void allocateNew() { if(!allocateNewSafe()){ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java index 209758e4ece..7bfdff52719 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableFixedWidthVector.java @@ -455,6 +455,7 @@ private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean dataBuffer) long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index edf4987de57..db717c48bc1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -24,7 +24,6 @@ import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; @@ -181,6 +180,44 @@ public void setInitialCapacity(int valueCount) { offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH; } + /** + * Sets the desired value capacity for the vector. This function doesn't + * allocate any memory for the vector. + * @param valueCount desired number of elements in the vector + * @param density average number of bytes per variable width element + */ + @Override + public void setInitialCapacity(int valueCount, double density) { + long size = (long) (valueCount * density); + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + + if(size == 0) { + size = 1; + } + valueAllocationSizeInBytes = (int) size; + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount); + /* to track the end offset of last data element in vector, we need + * an additional slot in offset buffer. + */ + offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH; + } + + /** + * Get the density of this ListVector + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final int startOffset = offsetBuffer.getInt(0); + final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH); + final double totalListSize = endOffset - startOffset; + return totalListSize/valueCount; + } + /** * Get the current value capacity for the vector * @return number of elements that vector can hold. @@ -464,6 +501,7 @@ public void reallocDataBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); @@ -516,6 +554,7 @@ private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean offsetBuffer long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); @@ -658,7 +697,6 @@ public TransferPair getTransferPair(BufferAllocator allocator) { * @param target destination vector for transfer */ public void transferTo(BaseNullableVariableWidthVector target) { - compareTypes(target, "transferTo"); target.clear(); target.validityBuffer = validityBuffer.transferOwnership(target.allocator).buffer; target.valueBuffer = valueBuffer.transferOwnership(target.allocator).buffer; @@ -684,9 +722,7 @@ public void splitAndTransferTo(int startIndex, int length, splitAndTransferValidityBuffer(startIndex, length, target); splitAndTransferOffsetBuffer(startIndex, length, target); target.setLastSet(length - 1); - if (this.valueCount > 0) { - target.setValueCount(this.valueCount); - } + target.setValueCount(length); } /* diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java new file mode 100644 index 00000000000..9544b23cefd --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +/** + * Vector that support density aware initial capacity settings. + */ +public interface DensityAwareVector { + /** + * Set value with density + * @param valueCount + * @param density + */ + void setInitialCapacity(int valueCount, double density); + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java index dcc551094ae..69bcd33fc6e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector; +import com.google.common.base.Preconditions; import io.netty.buffer.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.impl.DecimalReaderImpl; @@ -199,6 +200,61 @@ public void set(int index, ArrowBuf buffer) { valueBuffer.setBytes(index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH); } + /** + * Set the decimal element at given index to the provided array of bytes. + * Decimal is now implemented as Little Endian. This API allows the user + * to pass a decimal value in the form of byte array in BE byte order. + * + * Consumers of Arrow code can use this API instead of first swapping + * the source bytes (doing a write and read) and then finally writing to + * ArrowBuf of decimal vector. + * + * This method takes care of adding the necessary padding if the length + * of byte array is less then 16 (length of decimal type). + * + * @param index position of element + * @param value array of bytes containing decimal in big endian byte order. + */ + public void setBigEndian(int index, byte[] value) { + BitVectorHelper.setValidityBitToOne(validityBuffer, index); + final int length = value.length; + int startIndex = index * TYPE_WIDTH; + if (length == TYPE_WIDTH) { + for (int i = TYPE_WIDTH - 1; i >= 3; i-=4) { + valueBuffer.setByte(startIndex, value[i]); + valueBuffer.setByte(startIndex + 1, value[i-1]); + valueBuffer.setByte(startIndex + 2, value[i-2]); + valueBuffer.setByte(startIndex + 3, value[i-3]); + startIndex += 4; + } + + return; + } + + if (length == 0) { + valueBuffer.setZero(startIndex, TYPE_WIDTH); + return; + } + + if (length < 16) { + for (int i = length - 1; i >= 0; i--) { + valueBuffer.setByte(startIndex, value[i]); + startIndex++; + } + + final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); + final int maxStartIndex = (index + 1) * TYPE_WIDTH; + while (startIndex < maxStartIndex) { + valueBuffer.setByte(startIndex, pad); + startIndex++; + } + + return; + } + + throw new IllegalArgumentException("Invalid decimal value length. Valid length in [1 - 16], got " + length); + } + /** * Set the element at the given index to the given value. * @@ -266,6 +322,16 @@ public void setSafe(int index, ArrowBuf buffer) { set(index, buffer); } + /** + * Same as {@link #setBigEndian(int, byte[])} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + */ + public void setBigEndianSafe(int index, byte[] value) { + handleSafe(index); + setBigEndian(index, value); + } + /** * Same as {@link #set(int, int, ArrowBuf)} except that it handles the * case when index is greater than or equal to existing @@ -424,4 +490,4 @@ public void copyValueSafe(int fromIndex, int toIndex) { to.copyFromSafe(fromIndex, toIndex, NullableDecimalVector.this); } } -} \ No newline at end of file +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java index 04c00b7c834..7182fa8476c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector; -public interface VariableWidthVector extends ValueVector { +public interface VariableWidthVector extends ValueVector, DensityAwareVector { /** * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index db0ff86df47..c777618fdfb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -20,6 +20,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; @@ -33,7 +34,7 @@ * * This class implements common functionality of composite vectors. */ -public abstract class AbstractContainerVector implements ValueVector { +public abstract class AbstractContainerVector implements ValueVector, DensityAwareVector { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); protected final String name; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 4648d078949..c4414871eb8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -25,10 +25,13 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.BaseNullableVariableWidthVector; +import org.apache.arrow.vector.BaseNullableFixedWidthVector; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; @@ -106,6 +109,7 @@ protected void reallocOffsetBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); @@ -134,7 +138,53 @@ public FieldVector getDataVector() { @Override public void setInitialCapacity(int numRecords) { offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; - vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + if (vector instanceof BaseNullableVariableWidthVector || vector instanceof BaseNullableFixedWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } + } + + /** + * Specialized version of setInitialCapacity() for ListVector. This is + * used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. This is + * very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. In + * such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListVector. Density is the average size of + * list per position in the List vector. For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1 and + * remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + @Override + public void setInitialCapacity(int numRecords, double density) { + if ((numRecords * density) >= 2_000_000_000) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; + int innerValueCapacity = (int)(numRecords * density); + + if(innerValueCapacity == 0) { + innerValueCapacity = 1; + } + + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector)vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 6713b1c7871..408ee930321 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -222,6 +222,7 @@ private void reallocValidityBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index afe86a692c3..33698ca61a0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -31,12 +31,7 @@ import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.*; import org.apache.arrow.vector.complex.impl.ComplexCopier; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; @@ -102,6 +97,58 @@ public void initializeChildrenFromFields(List children) { addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); } + @Override + public void setInitialCapacity(int numRecords) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords); + } + + /** + * Specialized version of setInitialCapacity() for ListVector. This is + * used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. This is + * very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. In + * such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListVector. Density is the average size of + * list per position in the List vector. For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1 and + * remaining positions are null (no lists). This helps + * in tightly controlling the memory we provision for + * inner data vector. + * remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords, density); + } + + /** + * Get the density of this ListVector + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final int startOffset = offsetBuffer.getInt(0); + final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH); + final double totalListSize = endOffset - startOffset; + return totalListSize/valueCount; + } + @Override public List getChildrenFromFields() { return singletonList(getDataVector()); @@ -244,6 +291,7 @@ private void reallocValidityBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); @@ -616,7 +664,7 @@ public int getNullCount() { */ @Override public int getValueCapacity() { - return Math.min(getValidityBufferValueCapacity(), super.getValueCapacity()); + return getValidityAndOffsetValueCapacity(); } private int getValidityAndOffsetValueCapacity() { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 6089a67924f..52395026939 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -28,7 +28,6 @@ import javax.annotation.Nullable; -import com.google.common.base.Preconditions; import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; @@ -101,6 +100,17 @@ public void setInitialCapacity(int numRecords) { } } + @Override + public void setInitialCapacity(int valueCount, double density) { + for (final ValueVector vector : (Iterable) this) { + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector)vector).setInitialCapacity(valueCount, density); + } else { + vector.setInitialCapacity(valueCount); + } + } + } + @Override public int getBufferSize() { if (valueCount == 0 || size() == 0) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index f95302f55f8..2f938466679 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -21,8 +21,6 @@ import static com.google.common.base.Preconditions.checkNotNull; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.List; import com.google.common.collect.ObjectArrays; @@ -132,7 +130,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) { @Override public TransferPair makeTransferPair(ValueVector to) { - return new NullableMapTransferPair(this, (NullableMapVector) to, true); + return new NullableMapTransferPair(this, (NullableMapVector) to, false); } @Override @@ -348,6 +346,12 @@ public void setInitialCapacity(int numRecords) { super.setInitialCapacity(numRecords); } + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords); + super.setInitialCapacity(numRecords, density); + } + @Override public boolean allocateNewSafe() { /* Boolean to keep track if all the memory allocations were successful @@ -394,6 +398,7 @@ private void reallocValidityBuffer() { long newAllocationSize = baseSize * 2L; newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.max(newAllocationSize, 1); if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Unable to expand the buffer"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java index 91147c663f2..e107430f0ae 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.complex; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; @@ -28,7 +29,7 @@ * Current design maintains data and offsets vectors. Each cell is stored in the data vector. Repeated vector * uses the offset vector to determine the sequence of cells pertaining to an individual value. */ -public interface RepeatedValueVector extends ValueVector { +public interface RepeatedValueVector extends ValueVector, DensityAwareVector { final static int DEFAULT_REPEAT_PER_RECORD = 5; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java index 480bd76d445..b6db29a7fc6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java @@ -19,7 +19,6 @@ package org.apache.arrow.vector.util; import java.util.ArrayList; -import java.util.List; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -40,21 +39,6 @@ public JsonStringArrayList(int size) { super(size); } - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof List)) { - return false; - } - List other = (List) obj; - return this.size() == other.size() && this.containsAll(other); - } - @Override public final String toString() { try { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java index 6d3b390379a..b863fa8af86 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java @@ -134,9 +134,9 @@ public Set keySet() { @Override public Collection values() { - return Lists.newArrayList(Iterables.transform(secondary.entries(), new Function, V>() { + return Lists.newArrayList(Iterables.transform(secondary.entries(), new Function, V>() { @Override - public V apply(IntObjectMap.Entry entry) { + public V apply(IntObjectMap.PrimitiveEntry entry) { return Preconditions.checkNotNull(entry).value(); } })); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java index 4d844d6d3ca..be21200d697 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.math.BigDecimal; import java.math.BigInteger; @@ -111,4 +112,57 @@ public void testBigDecimalDifferentScaleAndPrecision() { } } } + + /** + * Test {@link NullableDecimalVector#setBigEndian(int, byte[])} which takes BE layout input and stores in LE layout. + * Cases to cover: value given in byte array in different lengths in range [1-16] and negative values. + */ + @Test + public void decimalBE2LE() { + try (NullableDecimalVector decimalVector = TestUtils.newVector(NullableDecimalVector.class, "decimal", new ArrowType.Decimal(21, 2), allocator);) { + decimalVector.allocateNew(); + + BigInteger[] testBigInts = new BigInteger[] { + new BigInteger("0"), + new BigInteger("-1"), + new BigInteger("23"), + new BigInteger("234234"), + new BigInteger("-234234234"), + new BigInteger("234234234234"), + new BigInteger("-56345345345345"), + new BigInteger("29823462983462893462934679234653456345"), // converts to 16 byte array + new BigInteger("-3894572983475982374598324598234346536"), // converts to 16 byte array + new BigInteger("-345345"), + new BigInteger("754533") + }; + + int insertionIdx = 0; + insertionIdx++; // insert a null + for (BigInteger val : testBigInts) { + decimalVector.setBigEndian(insertionIdx++, val.toByteArray()); + } + insertionIdx++; // insert a null + // insert a zero length buffer + decimalVector.setBigEndian(insertionIdx++, new byte[0]); + + // Try inserting a buffer larger than 16bytes and expect a failure + try { + decimalVector.setBigEndian(insertionIdx, new byte[17]); + fail("above statement should have failed"); + } catch (IllegalArgumentException ex) { + assertTrue(ex.getMessage().equals("Invalid decimal value length. Valid length in [1 - 16], got 17")); + } + decimalVector.setValueCount(insertionIdx); + + // retrieve values and check if they are correct + int outputIdx = 0; + assertTrue(decimalVector.isNull(outputIdx++)); + for (BigInteger expected : testBigInts) { + final BigDecimal actual = decimalVector.getObject(outputIdx++); + assertEquals(expected, actual.unscaledValue()); + } + assertTrue(decimalVector.isNull(outputIdx++)); + assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue()); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index f6aa86a3008..20c63a1b2dc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -112,6 +112,9 @@ public void testCopyFrom() throws Exception { result = outVector.getObject(2); resultSet = (ArrayList) result; assertEquals(0, resultSet.size()); + + /* 3+0+0/3 */ + assertEquals(1.0D, inVector.getDensity(), 0); } } @@ -209,6 +212,9 @@ public void testSetLastSetUsage() throws Exception { listVector.setLastSet(3); listVector.setValueCount(10); + /* (3+2+3)/10 */ + assertEquals(0.8D, listVector.getDensity(), 0); + index = 0; offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); assertEquals(Integer.toString(0), Integer.toString(offset)); @@ -559,6 +565,134 @@ public void testNestedListVector() throws Exception { } } + @Test + public void testNestedListVector1() throws Exception { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LIST; + MinorType scalarType = MinorType.BIGINT; + + listVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList1 = (ListVector)listVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList2 = (ListVector)innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList3 = (ListVector)innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList4 = (ListVector)innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList5 = (ListVector)innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList6 = (ListVector)innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listVector.setInitialCapacity(128); + } + } + + @Test + public void testNestedListVector2() throws Exception { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + listVector.setInitialCapacity(1); + UnionListWriter listWriter = listVector.getWriter(); + /* allocate memory */ + listWriter.allocate(); + + /* write one or more inner lists at index 0 */ + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(50); + listWriter.list().bigInt().writeBigInt(100); + listWriter.list().bigInt().writeBigInt(200); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(75); + listWriter.list().bigInt().writeBigInt(125); + listWriter.list().endList(); + + listWriter.endList(); + + /* write one or more inner lists at index 1 */ + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(15); + listWriter.list().bigInt().writeBigInt(20); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(25); + listWriter.list().bigInt().writeBigInt(30); + listWriter.list().bigInt().writeBigInt(35); + listWriter.list().endList(); + + listWriter.endList(); + + assertEquals(2, listVector.getLastSet()); + + listVector.setValueCount(2); + + assertEquals(2, listVector.getValueCount()); + + /* get listVector value at index 0 -- the value itself is a listvector */ + Object result = listVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(50), list.get(0)); + assertEquals(new Long(100), list.get(1)); + assertEquals(new Long(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(new Long(75), list.get(0)); + assertEquals(new Long(125), list.get(1)); + + /* get listVector value at index 1 -- the value itself is a listvector */ + result = listVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(2, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(3, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(15), list.get(0)); + assertEquals(new Long(20), list.get(1)); + + list = resultSet.get(1); + assertEquals(new Long(25), list.get(0)); + assertEquals(new Long(30), list.get(1)); + assertEquals(new Long(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listVector.isNull(0)); + assertFalse(listVector.isNull(1)); + + /* check underlying offsets */ + final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + /* listVector has 2 lists at index 0 and 3 lists at index 1 */ + assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH)); + } + } + @Test public void testGetBufferAddress() throws Exception { try (ListVector listVector = ListVector.empty("vector", allocator)) { @@ -581,6 +715,8 @@ public void testGetBufferAddress() throws Exception { listWriter.bigInt().writeBigInt(300); listWriter.endList(); + listVector.setValueCount(2); + /* check listVector contents */ Object result = listVector.getObject(0); ArrayList resultSet = (ArrayList) result; @@ -611,6 +747,9 @@ public void testGetBufferAddress() throws Exception { assertEquals(2, buffers.size()); assertEquals(bitAddress, buffers.get(0).memoryAddress()); assertEquals(offsetAddress, buffers.get(1).memoryAddress()); + + /* (3+2)/2 */ + assertEquals(2.5, listVector.getDensity(), 0); } } @@ -625,4 +764,52 @@ public void testConsistentChildName() throws Exception { assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); } } + + @Test + public void testSetInitialCapacity() { + try (final ListVector vector = ListVector.empty("", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + /** + * use the default multiplier of 5, + * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity. + */ + vector.setInitialCapacity(512); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(4096, vector.getDataVector().getValueCapacity()); + + /* use density as 4 */ + vector.setInitialCapacity(512, 4); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(512*4, vector.getDataVector().getValueCapacity()); + + /** + * inner value capacity we pass to data vector is 512 * 0.1 => 51 + * For an int vector this is 204 bytes of memory for data buffer + * and 7 bytes for validity buffer. + * and with power of 2 allocation, we allocate 256 bytes and 8 bytes + * for the data buffer and validity buffer of the inner vector. Thus + * value capacity of inner vector is 64 + */ + vector.setInitialCapacity(512, 0.1); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(64, vector.getDataVector().getValueCapacity()); + + /** + * inner value capacity we pass to data vector is 512 * 0.01 => 5 + * For an int vector this is 20 bytes of memory for data buffer + * and 1 byte for validity buffer. + * and with power of 2 allocation, we allocate 32 bytes and 1 bytes + * for the data buffer and validity buffer of the inner vector. Thus + * value capacity of inner vector is 8 + */ + vector.setInitialCapacity(512, 0.01); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertEquals(8, vector.getDataVector().getValueCapacity()); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index c7ee202f946..be2946262a4 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -1925,4 +1925,30 @@ public static void setBytes(int index, byte[] bytes, NullableVarCharVector vecto vector.offsetBuffer.setInt((index + 1) * vector.OFFSET_WIDTH, currentOffset + bytes.length); vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length); } + + @Test /* VarCharVector */ + public void testSetInitialCapacity() { + try (final NullableVarCharVector vector = new NullableVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + /* use the default 8 data bytes on average per element */ + vector.setInitialCapacity(4096); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(4096 * 8, vector.getDataBuffer().capacity()); + + vector.setInitialCapacity(4096, 1); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(4096, vector.getDataBuffer().capacity()); + + vector.setInitialCapacity(4096, 0.1); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(512, vector.getDataBuffer().capacity()); + + vector.setInitialCapacity(4096, 0.01); + vector.allocateNew(); + assertEquals(4096, vector.getValueCapacity()); + assertEquals(64, vector.getDataBuffer().capacity()); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index 293ffbfe192..3807d978aef 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -105,17 +105,17 @@ public void testListType() { vector.setInitialCapacity(512); vector.allocateNew(); - assertEquals(1023, vector.getValueCapacity()); + assertEquals(512, vector.getValueCapacity()); try { - vector.getOffsetVector().getAccessor().get(2014); + vector.getOffsetBuffer().getInt(2014 * 4); Assert.fail("Expected out of bounds exception"); } catch (Exception e) { // ok } vector.reAlloc(); - assertEquals(2047, vector.getValueCapacity()); // note: size - 1 + assertEquals(1024, vector.getValueCapacity()); assertEquals(0, vector.getOffsetBuffer().getInt(2014 * ListVector.OFFSET_WIDTH)); } }