From c3081833c3add62e3e913209ad6d50ccfb0f86d4 Mon Sep 17 00:00:00 2001
From: tejassp-db <241722411+tejassp-db@users.noreply.github.com>
Date: Fri, 26 Dec 2025 09:34:49 +0530
Subject: [PATCH 1/2] PECOBLR-1121 JMH benchmark for Arrow parsing.
A JMH benchmark for Arrow parsing of patched and unpatched Arrow Buffers
and Buffer allocators.
---
pom.xml | 20 ++
.../arrow/memory/ArrowParsingBenchmark.java | 179 ++++++++++++++++++
2 files changed, 199 insertions(+)
create mode 100644 src/test/java/org/apache/arrow/memory/ArrowParsingBenchmark.java
diff --git a/pom.xml b/pom.xml
index 2b7ab69622..a27e47204d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,6 +74,7 @@
1.71.0
1.7.0
3.2.0
+ 1.37
@@ -295,6 +296,20 @@
jts-core
1.20.0
+
+
+ org.openjdk.jmh
+ jmh-core
+ ${jmh.version}
+ test
+
+
+
+ org.openjdk.jmh
+ jmh-generator-annprocess
+ ${jmh.version}
+ test
+
@@ -392,6 +407,11 @@
value
${immutables.value.version}
+
+ org.openjdk.jmh
+ jmh-generator-annprocess
+ ${jmh.version}
+
diff --git a/src/test/java/org/apache/arrow/memory/ArrowParsingBenchmark.java b/src/test/java/org/apache/arrow/memory/ArrowParsingBenchmark.java
new file mode 100644
index 0000000000..2fac08ddb6
--- /dev/null
+++ b/src/test/java/org/apache/arrow/memory/ArrowParsingBenchmark.java
@@ -0,0 +1,179 @@
+package org.apache.arrow.memory;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import net.jpountz.lz4.LZ4FrameInputStream;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.util.TransferPair;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+@State(Scope.Benchmark)
+public class ArrowParsingBenchmark {
+ private static final int FORK_VALUE = 1;
+ private static final int ITERATIONS = 20;
+ private static final int WARMUP_ITERATIONS = 20;
+
+ /** Path to an arrow chunk. */
+ private static final Path ARROW_CHUNK_PATH = Path.of("arrow", "chunk_1.arrow");
+
+ /** Path to a LZ4 compressed arrow chunk. */
+ private static final Path ARROW_CHUNK_COMPRESSED_PATH = Path.of("arrow", "chunk_1.arrow.lz4");
+
+ /** Compressed Arrow file suffix. */
+ private static final String ARROW_CHUNK_COMPRESSED_FILE_SUFFIX = ".lz4";
+
+ public static void main(String[] args) throws RunnerException {
+ Options options =
+ new OptionsBuilder().include(ArrowParsingBenchmark.class.getSimpleName()).build();
+ new Runner(options).run();
+ }
+
+ // Pre-loaded file contents
+ private byte[] arrowChunkBytes;
+ private byte[] arrowChunkCompressedBytes;
+
+ @Setup(Level.Trial)
+ public void setup() throws IOException {
+ // Load files into memory once before all benchmark iterations
+ arrowChunkBytes = loadFileToMemory(ARROW_CHUNK_PATH);
+ arrowChunkCompressedBytes = loadFileToMemory(ARROW_CHUNK_COMPRESSED_PATH);
+ }
+
+ private byte[] loadFileToMemory(Path filePath) throws IOException {
+ try (InputStream stream =
+ getClass().getClassLoader().getResourceAsStream(filePath.toString())) {
+ assertNotNull(stream, filePath + " not found");
+ return stream.readAllBytes();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @Fork(value = FORK_VALUE)
+ @Measurement(iterations = ITERATIONS, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ @Warmup(iterations = WARMUP_ITERATIONS, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+ public List