Skip to content

Commit d3bd7c7

Browse files
authored
Speed up NumericUtils#{subtract,add} (#15303)
1 parent 08538bb commit d3bd7c7

File tree

3 files changed

+182
-7
lines changed

3 files changed

+182
-7
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ Optimizations
220220

221221
* GITHUB#15397: NumericComparator: immediately check whether a segment is competitive with the recorded bottom (Martijn van Groningen)
222222

223+
# GITHUB#15303: Speed up NumericUtils#{add,subtract} by operating on integers instead of bytes. (Kaival Parikh)
224+
223225
Bug Fixes
224226
---------------------
225227
* GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.benchmark.jmh;
18+
19+
import java.math.BigInteger;
20+
import java.util.Arrays;
21+
import java.util.concurrent.ThreadLocalRandom;
22+
import java.util.concurrent.TimeUnit;
23+
import org.apache.lucene.util.NumericUtils;
24+
import org.openjdk.jmh.annotations.Benchmark;
25+
import org.openjdk.jmh.annotations.BenchmarkMode;
26+
import org.openjdk.jmh.annotations.Fork;
27+
import org.openjdk.jmh.annotations.Level;
28+
import org.openjdk.jmh.annotations.Measurement;
29+
import org.openjdk.jmh.annotations.Mode;
30+
import org.openjdk.jmh.annotations.OutputTimeUnit;
31+
import org.openjdk.jmh.annotations.Param;
32+
import org.openjdk.jmh.annotations.Scope;
33+
import org.openjdk.jmh.annotations.Setup;
34+
import org.openjdk.jmh.annotations.State;
35+
import org.openjdk.jmh.annotations.Warmup;
36+
37+
@BenchmarkMode(Mode.Throughput)
38+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
39+
@State(Scope.Benchmark)
40+
// first iteration is complete garbage, so make sure we really warmup
41+
@Warmup(iterations = 4, time = 1)
42+
// real iterations. not useful to spend tons of time here, better to fork more
43+
@Measurement(iterations = 5, time = 1)
44+
// engage some noise reduction
45+
@Fork(
46+
value = 3,
47+
jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"})
48+
public class NumericUtilsBenchmark {
49+
@Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
50+
int size;
51+
52+
private byte[] subA;
53+
private byte[] subB;
54+
private byte[] subResult;
55+
private byte[] subExpected;
56+
57+
private byte[] addA;
58+
private byte[] addB;
59+
private byte[] addResult;
60+
private byte[] addExpected;
61+
62+
@Setup(Level.Iteration)
63+
public void subInit() {
64+
ThreadLocalRandom random = ThreadLocalRandom.current();
65+
66+
subA = new byte[size];
67+
subB = new byte[size];
68+
subResult = new byte[size];
69+
subExpected = new byte[size];
70+
71+
random.nextBytes(subA);
72+
random.nextBytes(subB);
73+
74+
// Treat as unsigned integers
75+
BigInteger aBig = new BigInteger(1, subA);
76+
BigInteger bBig = new BigInteger(1, subB);
77+
78+
// Swap a <-> b if a < b
79+
if (aBig.compareTo(bBig) < 0) {
80+
byte[] temp = subA;
81+
subA = subB;
82+
subB = temp;
83+
84+
BigInteger tempBig = aBig;
85+
aBig = bBig;
86+
bBig = tempBig;
87+
}
88+
89+
byte[] temp = aBig.subtract(bBig).toByteArray();
90+
if (temp.length == size + 1) { // BigInteger pads with extra 0 if MSB is 1
91+
assert temp[0] == 0;
92+
System.arraycopy(temp, 1, subExpected, 0, size);
93+
} else {
94+
System.arraycopy(temp, 0, subExpected, size - temp.length, temp.length);
95+
}
96+
}
97+
98+
@Setup(Level.Iteration)
99+
public void addInit() {
100+
ThreadLocalRandom random = ThreadLocalRandom.current();
101+
102+
addA = new byte[size];
103+
addB = new byte[size];
104+
addResult = new byte[size];
105+
addExpected = new byte[size];
106+
107+
random.nextBytes(addA);
108+
random.nextBytes(addB);
109+
110+
// Treat as unsigned integers
111+
BigInteger aBig = new BigInteger(1, addA);
112+
BigInteger bBig = new BigInteger(1, addB);
113+
114+
byte[] temp = aBig.add(bBig).toByteArray();
115+
if (temp.length == size + 1) { // BigInteger pads with extra 0 if MSB is 1
116+
if (temp[0] != 0) { // overflow
117+
addInit(); // re-init
118+
return;
119+
}
120+
System.arraycopy(temp, 1, addExpected, 0, size);
121+
} else {
122+
System.arraycopy(temp, 0, addExpected, size - temp.length, temp.length);
123+
}
124+
}
125+
126+
@Benchmark
127+
public void subtract() {
128+
NumericUtils.subtract(size, 0, subA, subB, subResult);
129+
assert Arrays.equals(subExpected, subResult);
130+
}
131+
132+
@Benchmark
133+
public void add() {
134+
NumericUtils.add(size, 0, addA, addB, addResult);
135+
assert Arrays.equals(addExpected, addResult);
136+
}
137+
}

lucene/core/src/java/org/apache/lucene/util/NumericUtils.java

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,35 @@ public static int sortableFloatBits(int bits) {
9494
public static void subtract(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
9595
int start = dim * bytesPerDim;
9696
int end = start + bytesPerDim;
97+
9798
int borrow = 0;
98-
for (int i = end - 1; i >= start; i--) {
99-
int diff = (a[i] & 0xff) - (b[i] & 0xff) - borrow;
99+
int i;
100+
101+
int limit = start + (bytesPerDim & ~3);
102+
for (i = end - 1; i >= limit; i--) {
103+
int diff = Byte.toUnsignedInt(a[i]) - Byte.toUnsignedInt(b[i]) - borrow;
100104
if (diff < 0) {
101-
diff += 256;
102105
borrow = 1;
103106
} else {
104107
borrow = 0;
105108
}
106109
result[i - start] = (byte) diff;
107110
}
111+
112+
for (i -= 3; i >= start; i -= 4) {
113+
int aInt = (int) BitUtil.VH_BE_INT.get(a, i);
114+
int bInt = (int) BitUtil.VH_BE_INT.get(b, i);
115+
116+
long diff = Integer.toUnsignedLong(aInt) - Integer.toUnsignedLong(bInt) - borrow;
117+
if (diff < 0) {
118+
borrow = 1;
119+
} else {
120+
borrow = 0;
121+
}
122+
123+
BitUtil.VH_BE_INT.set(result, i - start, (int) diff);
124+
}
125+
108126
if (borrow != 0) {
109127
throw new IllegalArgumentException("a < b");
110128
}
@@ -117,17 +135,35 @@ public static void subtract(int bytesPerDim, int dim, byte[] a, byte[] b, byte[]
117135
public static void add(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
118136
int start = dim * bytesPerDim;
119137
int end = start + bytesPerDim;
138+
120139
int carry = 0;
121-
for (int i = end - 1; i >= start; i--) {
122-
int digitSum = (a[i] & 0xff) + (b[i] & 0xff) + carry;
123-
if (digitSum > 255) {
124-
digitSum -= 256;
140+
int i;
141+
142+
int limit = start + (bytesPerDim & ~3);
143+
for (i = end - 1; i >= limit; i--) {
144+
int digitSum = Byte.toUnsignedInt(a[i]) + Byte.toUnsignedInt(b[i]) + carry;
145+
if (digitSum >= 256) {
125146
carry = 1;
126147
} else {
127148
carry = 0;
128149
}
129150
result[i - start] = (byte) digitSum;
130151
}
152+
153+
for (i -= 3; i >= start; i -= 4) {
154+
int aInt = (int) BitUtil.VH_BE_INT.get(a, i);
155+
int bInt = (int) BitUtil.VH_BE_INT.get(b, i);
156+
157+
long digitSum = Integer.toUnsignedLong(aInt) + Integer.toUnsignedLong(bInt) + carry;
158+
if (digitSum >= 0x100000000L) {
159+
carry = 1;
160+
} else {
161+
carry = 0;
162+
}
163+
164+
BitUtil.VH_BE_INT.set(result, i - start, (int) digitSum);
165+
}
166+
131167
if (carry != 0) {
132168
throw new IllegalArgumentException("a + b overflows bytesPerDim=" + bytesPerDim);
133169
}

0 commit comments

Comments
 (0)