diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index b0aa5fce3df..f7dd177e875 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -125,7 +125,7 @@ jobs: # Homebrew's python@XXX is updated without "--overwrite", it # tries to replace /usr/local/bin/2to3 and so on and causes # a conflict error. - brew update + # brew update for python_package in $(brew list | grep python@); do brew install --overwrite ${python_package} done @@ -154,8 +154,10 @@ jobs: mkdir -p homebrew-custom/Formula curl -o homebrew-custom/Formula/cmake.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/f68532bfe5cb87474093df8a839c3818c6aa44dd/Formula/c/cmake.rb curl -o homebrew-custom/Formula/boost.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/23f9c56c5075dd56b4471e2c93f89f6400b49ddd/Formula/b/boost.rb - brew install -v ./homebrew-custom/Formula/cmake.rb - brew install -v ./homebrew-custom/Formula/boost.rb + brew tap-new local/homebrew-custom + cp ./homebrew-custom/Formula/*.rb "$(brew --repo local/homebrew-custom)/Formula/" + brew install -v local/homebrew-custom/cmake + brew install -v local/homebrew-custom/boost brew pin cmake brew pin boost # diff --git a/java/vector/src/main/codegen/includes/vv_imports.ftl b/java/vector/src/main/codegen/includes/vv_imports.ftl index f4c72a1a6cb..a8ef7c16c1d 100644 --- a/java/vector/src/main/codegen/includes/vv_imports.ftl +++ b/java/vector/src/main/codegen/includes/vv_imports.ftl @@ -34,6 +34,7 @@ import org.apache.arrow.vector.complex.*; import org.apache.arrow.vector.complex.reader.*; import org.apache.arrow.vector.complex.impl.*; import org.apache.arrow.vector.complex.writer.*; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; diff --git a/java/vector/src/main/codegen/templates/AbstractFieldReader.java b/java/vector/src/main/codegen/templates/AbstractFieldReader.java index e3c8729469c..8a29be63a1e 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldReader.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldReader.java @@ -108,6 +108,27 @@ public void copyAsField(String name, ${name}Writer writer) { } + + public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory) { + fail("CopyAsValue StructWriter"); + } + + public void read(ExtensionHolder holder) { + fail("Extension"); + } + + public void read(int arrayIndex, ExtensionHolder holder) { + fail("RepeatedExtension"); + } + + public void copyAsValue(AbstractExtensionTypeWriter writer) { + fail("CopyAsValueExtension"); + } + + public void copyAsField(String name, AbstractExtensionTypeWriter writer) { + fail("CopyAsFieldExtension"); + } + public FieldReader reader(String name) { fail("reader(String name)"); return null; diff --git a/java/vector/src/main/codegen/templates/BaseReader.java b/java/vector/src/main/codegen/templates/BaseReader.java index 85d582a53bf..dab97f4dcea 100644 --- a/java/vector/src/main/codegen/templates/BaseReader.java +++ b/java/vector/src/main/codegen/templates/BaseReader.java @@ -21,7 +21,7 @@ <#include "/@includes/license.ftl" /> -package org.apache.arrow.vector.complex.reader; + package org.apache.arrow.vector.complex.reader; <#include "/@includes/vv_imports.ftl" /> @@ -44,21 +44,23 @@ public interface BaseReader extends Positionable{ public interface StructReader extends BaseReader, Iterable{ FieldReader reader(String name); } - + public interface RepeatedStructReader extends StructReader{ boolean next(); int size(); void copyAsValue(StructWriter writer); + void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory); } - + public interface ListReader extends BaseReader{ - FieldReader reader(); + FieldReader reader(); } - + public interface RepeatedListReader extends ListReader{ boolean next(); int size(); void copyAsValue(ListWriter writer); + void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory); } public interface MapReader extends BaseReader{ @@ -69,12 +71,13 @@ public interface RepeatedMapReader extends MapReader{ boolean next(); int size(); void copyAsValue(MapWriter writer); + void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory); } - - public interface ScalarReader extends - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, - BaseReader {} - + + public interface ScalarReader extends + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, + ExtensionReader, BaseReader {} + interface ComplexReader{ StructReader rootAsStruct(); ListReader rootAsList(); @@ -82,4 +85,3 @@ interface ComplexReader{ boolean ok(); } } - diff --git a/java/vector/src/main/codegen/templates/ComplexCopier.java b/java/vector/src/main/codegen/templates/ComplexCopier.java index 5adad523120..9386483c51b 100644 --- a/java/vector/src/main/codegen/templates/ComplexCopier.java +++ b/java/vector/src/main/codegen/templates/ComplexCopier.java @@ -42,10 +42,14 @@ public class ComplexCopier { * @param output field to write to */ public static void copy(FieldReader input, FieldWriter output) { - writeValue(input, output); + writeValue(input, output, null); } - private static void writeValue(FieldReader reader, FieldWriter writer) { + public static void copy(FieldReader input, FieldWriter output, ExtensionTypeWriterFactory extensionTypeWriterFactory) { + writeValue(input, output, extensionTypeWriterFactory); + } + + private static void writeValue(FieldReader reader, FieldWriter writer, ExtensionTypeWriterFactory extensionTypeWriterFactory) { final MinorType mt = reader.getMinorType(); switch (mt) { @@ -61,7 +65,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { FieldReader childReader = reader.reader(); FieldWriter childWriter = getListWriterForReader(childReader, writer); if (childReader.isSet()) { - writeValue(childReader, childWriter); + writeValue(childReader, childWriter, extensionTypeWriterFactory); } else { childWriter.writeNull(); } @@ -79,8 +83,8 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { FieldReader structReader = reader.reader(); if (structReader.isSet()) { writer.startEntry(); - writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key())); - writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value())); + writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key()), extensionTypeWriterFactory); + writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value()), extensionTypeWriterFactory); writer.endEntry(); } else { writer.writeNull(); @@ -99,7 +103,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { if (childReader.getMinorType() != Types.MinorType.NULL) { FieldWriter childWriter = getStructWriterForReader(childReader, writer, name); if (childReader.isSet()) { - writeValue(childReader, childWriter); + writeValue(childReader, childWriter, extensionTypeWriterFactory); } else { childWriter.writeNull(); } @@ -110,6 +114,20 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { writer.writeNull(); } break; + case EXTENSIONTYPE: + if (extensionTypeWriterFactory == null) { + throw new IllegalArgumentException("Must provide ExtensionTypeWriterFactory"); + } + if (reader.isSet()) { + Object value = reader.readObject(); + if (value != null) { + writer.addExtensionTypeWriterFactory(extensionTypeWriterFactory); + writer.writeExtension(value); + } + } else { + writer.writeNull(); + } + break; <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign uncappedName = name?uncap_first/> @@ -162,6 +180,9 @@ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWr return (FieldWriter) writer.map(name); case LISTVIEW: return (FieldWriter) writer.listView(name); + case EXTENSIONTYPE: + ExtensionWriter extensionWriter = writer.extension(name, reader.getField().getType()); + return (FieldWriter) extensionWriter; default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } @@ -186,6 +207,9 @@ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter return (FieldWriter) writer.list(); case LISTVIEW: return (FieldWriter) writer.listView(); + case EXTENSIONTYPE: + ExtensionWriter extensionWriter = writer.extension(reader.getField().getType()); + return (FieldWriter) extensionWriter; default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } @@ -211,6 +235,9 @@ private static FieldWriter getMapWriterForReader(FieldReader reader, MapWriter w return (FieldWriter) writer.listView(); case MAP: return (FieldWriter) writer.map(false); + case EXTENSIONTYPE: + ExtensionWriter extensionWriter = writer.extension(reader.getField().getType()); + return (FieldWriter) extensionWriter; default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } diff --git a/java/vector/src/main/codegen/templates/NullReader.java b/java/vector/src/main/codegen/templates/NullReader.java index 0c65f9a56bf..0529633478f 100644 --- a/java/vector/src/main/codegen/templates/NullReader.java +++ b/java/vector/src/main/codegen/templates/NullReader.java @@ -33,12 +33,12 @@ */ @SuppressWarnings("unused") public class NullReader extends AbstractBaseReader implements FieldReader{ - + public static final NullReader INSTANCE = new NullReader(); public static final NullReader EMPTY_LIST_INSTANCE = new NullReader(MinorType.NULL); public static final NullReader EMPTY_STRUCT_INSTANCE = new NullReader(MinorType.STRUCT); private MinorType type; - + private NullReader(){ super(); type = MinorType.NULL; @@ -77,7 +77,7 @@ public void read(Nullable${name}Holder holder){ public void read(int arrayIndex, ${name}Holder holder){ throw new ArrayIndexOutOfBoundsException(); } - + public void copyAsValue(${minor.class}Writer writer){} public void copyAsField(String name, ${minor.class}Writer writer){} @@ -85,63 +85,65 @@ public void read(int arrayIndex, Nullable${name}Holder holder){ throw new ArrayIndexOutOfBoundsException(); } - + + public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory){} + public void read(ExtensionHolder holder) { + holder.isSet = 0; + } + public int size(){ return 0; } - + public boolean isSet(){ return false; } - + public boolean next(){ return false; } - + public RepeatedStructReader struct(){ return this; } - + public RepeatedListReader list(){ return this; } - + public StructReader struct(String name){ return this; } - + public ListReader list(String name){ return this; } - + public FieldReader reader(String name){ return this; } - + public FieldReader reader(){ return this; } - + private void fail(String name){ throw new IllegalArgumentException(String.format("You tried to read a %s type when you are using a ValueReader of type %s.", name, this.getClass().getSimpleName())); } - + <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean", "LocalDateTime", "Duration", "Period", "Double", "Float", "Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType> <#assign safeType=friendlyType /> <#if safeType=="byte[]"><#assign safeType="ByteArray" /> - + public ${friendlyType} read${safeType}(int arrayIndex){ return null; } - + public ${friendlyType} read${safeType}(){ return null; } - -} - - +} diff --git a/java/vector/src/main/codegen/templates/PromotableWriter.java b/java/vector/src/main/codegen/templates/PromotableWriter.java index 8d7d57bb9d2..d22eb00b2c3 100644 --- a/java/vector/src/main/codegen/templates/PromotableWriter.java +++ b/java/vector/src/main/codegen/templates/PromotableWriter.java @@ -550,6 +550,10 @@ public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory); } + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory, ArrowType arrowType) { + getWriter(MinorType.EXTENSIONTYPE, arrowType).addExtensionTypeWriterFactory(factory); + } + @Override public void allocate() { getWriter().allocate(); diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 598a21a6cda..4c9a45ead4c 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -53,6 +53,7 @@ public class Union${listName}Writer extends AbstractFieldWriter { private boolean inStruct = false; private boolean listStarted = false; private String structName; + private ArrowType extensionType; <#if listName == "LargeList" || listName == "LargeListView"> private static final long OFFSET_WIDTH = 8; <#else> @@ -203,9 +204,9 @@ public MapWriter map(String name, boolean keysSorted) { @Override public ExtensionWriter extension(ArrowType arrowType) { + this.extensionType = arrowType; return this; } - @Override public ExtensionWriter extension(String name, ArrowType arrowType) { ExtensionWriter extensionWriter = writer.extension(name, arrowType); @@ -339,15 +340,14 @@ public void writeExtension(Object value) { writer.writeExtension(value); writer.setPosition(writer.idx() + 1); } - + @Override public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { - writer.addExtensionTypeWriterFactory(var1); + writer.addExtensionTypeWriterFactory(var1, extensionType); } - + public void write(ExtensionHolder var1) { writer.write(var1); - writer.setPosition(writer.idx() + 1); } <#list vv.types as type> diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java index 9befcb890f0..6abf030dd2f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java @@ -22,6 +22,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.ReferenceManager; import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.util.DataSizeRoundingUtil; import org.apache.arrow.vector.util.TransferPair; @@ -248,4 +249,16 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } + + @Override + public void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + + @Override + public void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java index 6bfe540d232..0d6dab28371 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java @@ -27,6 +27,7 @@ import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.NullReader; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; @@ -329,6 +330,18 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } + @Override + public void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + + @Override + public void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + @Override public String getName() { return this.getField().getName(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 0a45409eb98..08fc859c758 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -22,6 +22,7 @@ import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; @@ -309,6 +310,30 @@ public interface ValueVector extends Closeable, Iterable { */ void copyFromSafe(int fromIndex, int thisIndex, ValueVector from); + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory); + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory); + /** * Accept a generic {@link VectorVisitor} and return the result. * diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index a6a71cf1a41..429f9884bb7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -21,6 +21,7 @@ import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; @@ -151,6 +152,18 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } + @Override + public void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + + @Override + public void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + @Override public String getName() { return name; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index ed075352c93..3a900d01597 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -48,6 +48,7 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionLargeListReader; import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -483,12 +484,42 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { */ @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { + copyFrom(inIndex, outIndex, from, null); + } + + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); UnionLargeListWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out); + ComplexCopier.copy(in, out, writerFactory); + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + copyFrom(inIndex, outIndex, from, writerFactory); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 84c6f03edb2..6bfdea3a0f4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -41,6 +41,7 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionLargeListViewReader; import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListReader; @@ -347,6 +348,20 @@ public void copyFrom(int inIndex, int outIndex, ValueVector from) { "LargeListViewVector does not support copyFrom operation yet."); } + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException( + "LargeListViewVector does not support copyFromSafe operation yet."); + } + + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException( + "LargeListViewVector does not support copyFrom operation yet."); + } + @Override public FieldVector getDataVector() { return vector; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 3daeb6d77be..1e82fa22f2c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -42,6 +42,7 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -401,12 +402,42 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { */ @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { + copyFrom(inIndex, outIndex, from, null); + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + copyFrom(inIndex, outIndex, from, writerFactory); + } + + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out); + ComplexCopier.copy(in, out, writerFactory); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 9b4e6b4c0cd..fb703a6fb33 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -42,6 +42,7 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionListViewReader; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -339,6 +340,12 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { copyFrom(inIndex, outIndex, from); } + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + copyFrom(inIndex, outIndex, from, writerFactory); + } + @Override public OUT accept(VectorVisitor visitor, IN value) { return visitor.visit(this, value); @@ -346,12 +353,18 @@ public OUT accept(VectorVisitor visitor, IN value) { @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { + copyFrom(inIndex, outIndex, from, null); + } + + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out); + ComplexCopier.copy(in, out, writerFactory); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java index b2e95663f73..bf074ecb906 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java @@ -115,4 +115,14 @@ public void copyAsValue(ListWriter writer) { public void copyAsValue(MapWriter writer) { ComplexCopier.copy(this, (FieldWriter) writer); } + + @Override + public void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory) { + ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); + } + + @Override + public void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory) { + ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java index d341384bd95..4219069cbae 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -76,4 +76,9 @@ public void setPosition(int index) { this.writer.setPosition(index); } } + + @Override + public void writeNull() { + this.writer.writeNull(); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java index be236c31662..a9104cb0d23 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java @@ -105,4 +105,8 @@ public boolean next() { public void copyAsValue(UnionLargeListWriter writer) { ComplexCopier.copy(this, (FieldWriter) writer); } + + public void copyAsValue(UnionLargeListWriter writer, ExtensionTypeWriterFactory writerFactory) { + ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/ExtensionReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/ExtensionReader.java new file mode 100644 index 00000000000..1ba7b27156e --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/ExtensionReader.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.reader; + +import org.apache.arrow.vector.holders.ExtensionHolder; + +/** Interface for reading extension types. Extends the functionality of {@link BaseReader}. */ +public interface ExtensionReader extends BaseReader { + + /** + * Reads to the given extension holder. + * + * @param holder the {@link ExtensionHolder} to read + */ + void read(ExtensionHolder holder); + + /** + * Reads and returns an object representation of the extension type. + * + * @return the object representation of the extension type + */ + Object readObject(); + + /** + * Checks if the current value is set. + * + * @return true if the value is set, false otherwise + */ + boolean isSet(); +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 1d6fa39f9ea..ace36334ee1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -23,16 +23,22 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; @@ -41,6 +47,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -1198,6 +1205,114 @@ public void testGetTransferPairWithField() { } } + @Test + public void testListVectorWithExtensionType() throws Exception { + final FieldType type = FieldType.nullable(new UuidType()); + try (final ListVector inVector = new ListVector("list", allocator, type, null)) { + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + extensionWriter.writeExtension(u2); + writer.endList(); + + writer.setValueCount(1); + + FieldReader reader = inVector.getReader(); + assertTrue(reader.isSet(), "shouldn't be null"); + Object result = inVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(u1, resultSet.get(0)); + assertEquals(u2, resultSet.get(1)); + } + } + + @Test + public void testListVectorReaderForExtensionType() throws Exception { + final FieldType type = FieldType.nullable(new UuidType()); + try (final ListVector inVector = new ListVector("list", allocator, type, null)) { + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + extensionWriter.writeExtension(u2); + writer.endList(); + + writer.setValueCount(1); + + UnionListReader reader = inVector.getReader(); + assertTrue(reader.isSet(), "shouldn't be null"); + reader.setPosition(0); + reader.next(); + FieldReader uuidReader = reader.reader(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u2, actualUuid); + } + } + + @Test + public void testCopyFromForExtensionType() throws Exception { + try (ListVector inVector = ListVector.empty("input", allocator); + ListVector outVector = ListVector.empty("output", allocator)) { + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + extensionWriter.writeExtension(u2); + extensionWriter.writeNull(); + writer.endList(); + + writer.setValueCount(1); + + // copy values from input to output + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector, new UuidWriterFactory()); + outVector.setValueCount(1); + + UnionListReader reader = outVector.getReader(); + assertTrue(reader.isSet(), "shouldn't be null"); + reader.setPosition(0); + reader.next(); + FieldReader uuidReader = reader.reader(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u2, actualUuid); + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index a4197c50b5b..af9ea9176df 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -22,23 +22,29 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; @@ -1241,4 +1247,94 @@ public void testMakeTransferPairPreserveNullability() { assertEquals(intField, vec.getField().getChildren().get(0)); assertEquals(intField, res.getField().getChildren().get(0)); } + + @Test + public void testMapVectorWithExtensionType() throws Exception { + try (final MapVector inVector = MapVector.empty("map", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u2); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + + UnionMapReader mapReader = inVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader uuidReader = mapReader.value(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + mapReader.next(); + uuidReader = mapReader.value(); + uuidReader.read(holder); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u2, actualUuid); + } + } + + @Test + public void testCopyFromForExtensionType() throws Exception { + try (final MapVector inVector = MapVector.empty("in", allocator, false); + final MapVector outVector = MapVector.empty("out", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u2); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector, new UuidWriterFactory()); + outVector.setValueCount(1); + + UnionMapReader mapReader = outVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader uuidReader = mapReader.value(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + mapReader.next(); + uuidReader = mapReader.value(); + uuidReader.read(holder); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u2, actualUuid); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java index 5c90d45f60f..72ba4aa555c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java @@ -20,6 +20,9 @@ import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.complex.impl.UuidReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.UuidType; @@ -79,11 +82,21 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((UuidVector) to); } + @Override + protected FieldReader getReaderImpl() { + return new UuidReaderImpl(this); + } + public void setSafe(int index, byte[] value) { getUnderlyingVector().setIndexDefined(index); getUnderlyingVector().setSafe(index, value); } + public void get(int index, UuidHolder holder) { + holder.value = getUnderlyingVector().get(index); + holder.isSet = 1; + } + public class TransferImpl implements TransferPair { UuidVector to; ValueVector targetUnderlyingVector; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java index 3bc02c60298..738e8905e32 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java @@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.DecimalVector; @@ -30,12 +31,14 @@ import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -845,4 +848,115 @@ public void testCopyMapVectorWithMapValue() { assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); } } + + @Test + public void testCopyListVectorWithExtensionType() { + try (ListVector from = ListVector.empty("v", allocator); + ListVector to = ListVector.empty("v", allocator)) { + + UnionListWriter listWriter = from.getWriter(); + listWriter.allocate(); + + for (int i = 0; i < COUNT; i++) { + listWriter.setPosition(i); + listWriter.startList(); + ExtensionWriter extensionWriter = listWriter.extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(UUID.randomUUID()); + extensionWriter.writeExtension(UUID.randomUUID()); + listWriter.endList(); + } + from.setValueCount(COUNT); + + // copy values + FieldReader in = from.getReader(); + FieldWriter out = to.getWriter(); + for (int i = 0; i < COUNT; i++) { + in.setPosition(i); + out.setPosition(i); + ComplexCopier.copy(in, out, new UuidWriterFactory()); + } + + to.setValueCount(COUNT); + + // validate equals + assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); + } + } + + @Test + public void testCopyMapVectorWithExtensionType() { + try (final MapVector from = MapVector.empty("v", allocator, false); + final MapVector to = MapVector.empty("v", allocator, false)) { + + from.allocateNew(); + + UnionMapWriter mapWriter = from.getWriter(); + for (int i = 0; i < COUNT; i++) { + mapWriter.setPosition(i); + mapWriter.startMap(); + mapWriter.startEntry(); + ExtensionWriter extensionKeyWriter = mapWriter.key().extension(new UuidType()); + extensionKeyWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionKeyWriter.writeExtension(UUID.randomUUID()); + ExtensionWriter extensionValueWriter = mapWriter.value().extension(new UuidType()); + extensionValueWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionValueWriter.writeExtension(UUID.randomUUID()); + mapWriter.endEntry(); + mapWriter.endMap(); + } + + from.setValueCount(COUNT); + + // copy values + FieldReader in = from.getReader(); + FieldWriter out = to.getWriter(); + for (int i = 0; i < COUNT; i++) { + in.setPosition(i); + out.setPosition(i); + ComplexCopier.copy(in, out, new UuidWriterFactory()); + } + to.setValueCount(COUNT); + + // validate equals + assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); + } + } + + @Test + public void testCopyStructVectorWithExtensionType() { + try (final StructVector from = StructVector.empty("v", allocator); + final StructVector to = StructVector.empty("v", allocator)) { + + from.allocateNewSafe(); + + NullableStructWriter structWriter = from.getWriter(); + for (int i = 0; i < COUNT; i++) { + structWriter.setPosition(i); + structWriter.start(); + ExtensionWriter extensionWriter1 = structWriter.extension("timestamp1", new UuidType()); + extensionWriter1.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter1.writeExtension(UUID.randomUUID()); + ExtensionWriter extensionWriter2 = structWriter.extension("timestamp2", new UuidType()); + extensionWriter2.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter2.writeExtension(UUID.randomUUID()); + structWriter.end(); + } + + from.setValueCount(COUNT); + + // copy values + FieldReader in = from.getReader(); + FieldWriter out = to.getWriter(); + for (int i = 0; i < COUNT; i++) { + in.setPosition(i); + out.setPosition(i); + ComplexCopier.copy(in, out, new UuidWriterFactory()); + } + to.setValueCount(COUNT); + + // validate equals + assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 1556852c5a1..7b8b1f9ef9e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -805,4 +805,29 @@ public void testExtensionType() throws Exception { assertEquals(u2, uuidVector.getObject(1)); } } + + @Test + public void testExtensionTypeForList() throws Exception { + try (final ListVector container = ListVector.empty(EMPTY_SCHEMA_PATH, allocator); + final UuidVector v = + (UuidVector) container.addOrGetVector(FieldType.nullable(new UuidType())).getVector(); + final PromotableWriter writer = new PromotableWriter(v, container)) { + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + container.allocateNew(); + container.setValueCount(1); + writer.addExtensionTypeWriterFactory(new UuidWriterFactory()); + + writer.setPosition(0); + writer.writeExtension(u1); + writer.setPosition(1); + writer.writeExtension(u2); + + container.setValueCount(2); + + UuidVector uuidVector = (UuidVector) container.getDataVector(); + assertEquals(u1, uuidVector.getObject(0)); + assertEquals(u2, uuidVector.getObject(1)); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java new file mode 100644 index 00000000000..6b98d3b3404 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; + +public class UuidReaderImpl extends AbstractFieldReader { + + private final UuidVector vector; + + public UuidReaderImpl(UuidVector vector) { + super(); + this.vector = vector; + } + + @Override + public MinorType getMinorType() { + return vector.getMinorType(); + } + + @Override + public Field getField() { + return vector.getField(); + } + + @Override + public boolean isSet() { + return !vector.isNull(idx()); + } + + @Override + public void read(ExtensionHolder holder) { + vector.get(idx(), (UuidHolder) holder); + } + + @Override + public void read(int arrayIndex, ExtensionHolder holder) { + vector.get(arrayIndex, (UuidHolder) holder); + } + + @Override + public void copyAsValue(AbstractExtensionTypeWriter writer) { + UuidWriterImpl impl = (UuidWriterImpl) writer; + impl.vector.copyFromSafe(idx(), impl.idx(), vector); + } + + @Override + public Object readObject() { + return vector.getObject(idx()); + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 2745386db4e..f374eb41e46 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -19,6 +19,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -31,6 +32,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -64,6 +66,7 @@ import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionReader; import org.apache.arrow.vector.complex.impl.UnionWriter; +import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.BaseReader.StructReader; import org.apache.arrow.vector.complex.reader.BigIntReader; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -71,9 +74,11 @@ import org.apache.arrow.vector.complex.reader.Float8Reader; import org.apache.arrow.vector.complex.reader.IntReader; import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; @@ -84,6 +89,7 @@ import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.ArrowType.Int; @@ -93,6 +99,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.JsonStringArrayList; @@ -2489,4 +2496,38 @@ public void unionWithVarCharAndBinaryHelpers() throws Exception { "row12", new String(vector.getLargeVarBinaryVector().get(11), StandardCharsets.UTF_8)); } } + + @Test + public void extensionWriterReader() throws Exception { + // test values + UUID u1 = UUID.randomUUID(); + + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + // write + + ComplexWriter writer = new ComplexWriterImpl("root", parent); + StructWriter rootWriter = writer.rootAsStruct(); + + { + ExtensionWriter extensionWriter = rootWriter.extension("uuid1", new UuidType()); + extensionWriter.setPosition(0); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + } + // read + StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); + { + FieldReader uuidReader = rootReader.reader("uuid1"); + uuidReader.setPosition(0); + UuidHolder uuidHolder = new UuidHolder(); + uuidReader.read(uuidHolder); + final ByteBuffer bb = ByteBuffer.wrap(uuidHolder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + assertTrue(uuidReader.isSet()); + assertEquals(uuidReader.getMinorType(), MinorType.EXTENSIONTYPE); + assertInstanceOf(UuidType.class, uuidReader.getField().getFieldType().getType()); + } + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index bf1b9b0dfa1..269cff06705 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -30,6 +30,7 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl; +import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; @@ -204,4 +205,23 @@ public void testWriteToExtensionVector() throws Exception { assertEquals(uuid, result); } } + + @Test + public void testReaderCopyAsValueExtensionVector() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidVector vectorForRead = new UuidVector("test2", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + vectorForRead.setValueCount(1); + vectorForRead.set(0, uuid); + UuidReaderImpl reader = (UuidReaderImpl) vectorForRead.getReader(); + reader.copyAsValue(writer); + UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); + UuidHolder holder = new UuidHolder(); + reader2.read(0, holder); + final ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(uuid, actualUuid); + } + } }