Skip to content

Commit d55177b

Browse files
BryanCutleryongyanw
authored andcommitted
ARROW-2594: [Java] When realloc Vectors, zero out all unfilled bytes of new buffer
Currently when reallocating vectors, only the second half of the new buffer will be zeroed out assuming that it is doubled from the previous buffer and the first half is already populated or cleaned. This isn't the case if the vector had been cleared and the buffer is empty causing incorrect values in the new buffer if it was recycled from an old one. Added a new test with a ListVector that should reuse a previous buffer after being cleared. Author: Bryan Cutler <cutlerb@gmail.com> Closes apache#2054 from BryanCutler/java-vector-realloc-clear-buffer-ARROW-2594 and squashes the following commits: 28b8095 <Bryan Cutler> added a comment about clear be3ee8f <Bryan Cutler> remove extra spaces 5a39790 <Bryan Cutler> zero out any newly allocated buffer bytes
1 parent 96f8a83 commit d55177b

File tree

8 files changed

+52
-12
lines changed

8 files changed

+52
-12
lines changed

java/vector/src/main/codegen/templates/UnionVector.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,7 @@ private void reallocTypeBuffer() {
290290

291291
final ArrowBuf newBuf = allocator.buffer((int)newAllocationSize);
292292
newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity);
293-
final int halfNewCapacity = newBuf.capacity() / 2;
294-
newBuf.setZero(halfNewCapacity, halfNewCapacity);
293+
newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
295294
typeBuffer.release(1);
296295
typeBuffer = newBuf;
297296
typeBufferAllocationSizeInBytes = (int)newAllocationSize;

java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,8 +450,7 @@ private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean dataBuffer)
450450

451451
final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
452452
newBuf.setBytes(0, buffer, 0, currentBufferCapacity);
453-
final int halfNewCapacity = newBuf.capacity() / 2;
454-
newBuf.setZero(halfNewCapacity, halfNewCapacity);
453+
newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
455454
buffer.release(1);
456455
buffer = newBuf;
457456
if (dataBuffer) {

java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,7 @@ private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean offsetBuffer
549549

550550
final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
551551
newBuf.setBytes(0, buffer, 0, currentBufferCapacity);
552-
final int halfNewCapacity = newBuf.capacity() / 2;
553-
newBuf.setZero(halfNewCapacity, halfNewCapacity);
552+
newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
554553
buffer.release(1);
555554
buffer = newBuf;
556555
if (offsetBuffer) {

java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,7 @@ protected void reallocOffsetBuffer() {
117117

118118
final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
119119
newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
120-
final int halfNewCapacity = newBuf.capacity() / 2;
121-
newBuf.setZero(halfNewCapacity, halfNewCapacity);
120+
newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
122121
offsetBuffer.release(1);
123122
offsetBuffer = newBuf;
124123
offsetAllocationSizeInBytes = (int) newAllocationSize;

java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ private void reallocValidityBuffer() {
217217
}
218218

219219
final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
220-
newBuf.setZero(0, newBuf.capacity());
221220
newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
221+
newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
222222
validityBuffer.release(1);
223223
validityBuffer = newBuf;
224224
validityAllocationSizeInBytes = (int) newAllocationSize;

java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,7 @@ private void reallocValidityBuffer() {
296296

297297
final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
298298
newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
299-
final int halfNewCapacity = newBuf.capacity() / 2;
300-
newBuf.setZero(halfNewCapacity, halfNewCapacity);
299+
newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
301300
validityBuffer.release(1);
302301
validityBuffer = newBuf;
303302
validityAllocationSizeInBytes = (int) newAllocationSize;

java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,8 @@ private void reallocValidityBuffer() {
414414
}
415415

416416
final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
417-
newBuf.setZero(0, newBuf.capacity());
418417
newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
418+
newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
419419
validityBuffer.release(1);
420420
validityBuffer = newBuf;
421421
validityAllocationSizeInBytes = (int) newAllocationSize;

java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,4 +828,49 @@ public void testSetInitialCapacity() {
828828
assertEquals(1, vector.getDataVector().getValueCapacity());
829829
}
830830
}
831+
832+
@Test
833+
public void testClearAndReuse() {
834+
try (final ListVector vector = ListVector.empty("list", allocator)) {
835+
BigIntVector bigIntVector = (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector();
836+
vector.setInitialCapacity(10);
837+
vector.allocateNew();
838+
839+
vector.startNewValue(0);
840+
bigIntVector.setSafe(0, 7);
841+
vector.endValue(0, 1);
842+
vector.startNewValue(1);
843+
bigIntVector.setSafe(1, 8);
844+
vector.endValue(1, 1);
845+
vector.setValueCount(2);
846+
847+
Object result = vector.getObject(0);
848+
ArrayList<Long> resultSet = (ArrayList<Long>) result;
849+
assertEquals(new Long(7), resultSet.get(0));
850+
851+
result = vector.getObject(1);
852+
resultSet = (ArrayList<Long>) result;
853+
assertEquals(new Long(8), resultSet.get(0));
854+
855+
// Clear and release the buffers to trigger a realloc when adding next value
856+
vector.clear();
857+
858+
// The list vector should reuse a buffer when reallocating the offset buffer
859+
vector.startNewValue(0);
860+
bigIntVector.setSafe(0, 7);
861+
vector.endValue(0, 1);
862+
vector.startNewValue(1);
863+
bigIntVector.setSafe(1, 8);
864+
vector.endValue(1, 1);
865+
vector.setValueCount(2);
866+
867+
result = vector.getObject(0);
868+
resultSet = (ArrayList<Long>) result;
869+
assertEquals(new Long(7), resultSet.get(0));
870+
871+
result = vector.getObject(1);
872+
resultSet = (ArrayList<Long>) result;
873+
assertEquals(new Long(8), resultSet.get(0));
874+
}
875+
}
831876
}

0 commit comments

Comments
 (0)