Skip to content

Commit f4a9f11

Browse files
bench: final
1 parent fb64023 commit f4a9f11

8 files changed

Lines changed: 1472 additions & 338 deletions

File tree

bench/adapters/duckdb_adapter.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,18 @@ def query():
9191
result, time_ns = self._time_it(query)
9292
return BenchmarkResult("groupby_q5", time_ns, len(result))
9393

94+
def run_groupby_q6(self) -> BenchmarkResult:
95+
"""Q6: max(v1) - min(v2) group by id3"""
96+
t = self._get_table()
97+
98+
def query():
99+
return self._conn.execute(
100+
f"SELECT id3, MAX(v1) - MIN(v2) as range FROM {t} GROUP BY id3"
101+
).fetchdf()
102+
103+
result, time_ns = self._time_it(query)
104+
return BenchmarkResult("groupby_q6", time_ns, len(result))
105+
94106
def run_join_inner(self, right_path: Path) -> BenchmarkResult:
95107
"""Inner join on id1."""
96108
left = self._get_table("left")

bench/adapters/polars_adapter.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,18 @@ def query():
7979
result, time_ns = self._time_it(query)
8080
return BenchmarkResult("groupby_q5", time_ns, len(result))
8181

82+
def run_groupby_q6(self) -> BenchmarkResult:
83+
"""Q6: max(v1) - min(v2) group by id3"""
84+
df = self._get_table()
85+
86+
def query():
87+
return df.group_by("id3").agg(
88+
(pl.max("v1") - pl.min("v2")).alias("range")
89+
)
90+
91+
result, time_ns = self._time_it(query)
92+
return BenchmarkResult("groupby_q6", time_ns, len(result))
93+
8294
def run_join_inner(self, right_path: Path) -> BenchmarkResult:
8395
"""Inner join on id1."""
8496
left = self._get_table("left")

bench/adapters/questdb_adapter.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,18 @@ def query():
157157
result, time_ns = self._time_it(query)
158158
return BenchmarkResult("groupby_q5", time_ns, len(result))
159159

160+
def run_groupby_q6(self) -> BenchmarkResult:
161+
"""Q6: max(v1) - min(v2) group by id3"""
162+
t = self._get_table()
163+
164+
def query():
165+
with self._conn.cursor() as cur:
166+
cur.execute(f"SELECT id3, MAX(v1) - MIN(v2) as range FROM {t} GROUP BY id3")
167+
return cur.fetchall()
168+
169+
result, time_ns = self._time_it(query)
170+
return BenchmarkResult("groupby_q6", time_ns, len(result))
171+
160172
def run_join_inner(self, right_path: Path) -> BenchmarkResult:
161173
"""Inner join on id1."""
162174
raise NotImplementedError("QuestDB join benchmarks not implemented")

bench/adapters/rayforce_adapter.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def load_data(self, path: Path, table_name: str = "data") -> None:
111111
column_types = self._get_column_types(path)
112112

113113
# Use rayforce native CSV loading with column types
114-
rf_table = self._Table.from_csv(column_types, str(path)).select("*").execute()
114+
rf_table = self._Table.from_csv(column_types, str(path))
115115
rf_table.save(symbol_name)
116116
self._table_names[table_name] = symbol_name
117117

@@ -195,6 +195,12 @@ def run_groupby_q5(self) -> BenchmarkResult:
195195
query = f"(select {{v1: (sum v1) v2: (sum v2) v3: (sum v3) by: id3 from: {t}}})"
196196
return self._run_timed_query(query, "groupby_q5")
197197

198+
def run_groupby_q6(self) -> BenchmarkResult:
199+
"""Q6: max(v1) - min(v2) group by id3"""
200+
t = self._get_symbol()
201+
query = f"(select {{range: (- (max v1) (min v2)) by: id3 from: {t}}})"
202+
return self._run_timed_query(query, "groupby_q6")
203+
198204
def _load_table_from_csv(self, path: Path) -> object:
199205
"""Load CSV file using rayforce native Table.from_csv."""
200206
column_types = self._get_column_types(path)

bench/adapters/timescale_adapter.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,18 @@ def query():
158158
result, time_ns = self._time_it(query)
159159
return BenchmarkResult("groupby_q5", time_ns, len(result))
160160

161+
def run_groupby_q6(self) -> BenchmarkResult:
162+
"""Q6: max(v1) - min(v2) group by id3"""
163+
t = self._get_table()
164+
165+
def query():
166+
with self._conn.cursor() as cur:
167+
cur.execute(f"SELECT id3, MAX(v1) - MIN(v2) as range FROM {t} GROUP BY id3")
168+
return cur.fetchall()
169+
170+
result, time_ns = self._time_it(query)
171+
return BenchmarkResult("groupby_q6", time_ns, len(result))
172+
161173
def run_join_inner(self, right_path: Path) -> BenchmarkResult:
162174
"""Inner join on id1."""
163175
import io

bench/runner.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class BenchmarkRunner:
5353
"""Run benchmarks across multiple adapters."""
5454

5555
BENCHMARKS = {
56-
"groupby": ["groupby_q1", "groupby_q2", "groupby_q3", "groupby_q4", "groupby_q5"],
56+
"groupby": ["groupby_q1", "groupby_q2", "groupby_q3", "groupby_q4", "groupby_q5", "groupby_q6"],
5757
"join": ["join_inner", "join_left"],
5858
"sort": ["sort_single", "sort_multi"],
5959
}
@@ -222,7 +222,13 @@ def print_comparison(results: list[BenchmarkRun]) -> None:
222222
by_benchmark[run.benchmark] = {}
223223
by_benchmark[run.benchmark][run.adapter] = run
224224

225-
adapters = sorted(set(r.adapter for r in results))
225+
# Put rayforce first, then sort the rest
226+
all_adapters = set(r.adapter for r in results)
227+
adapters = []
228+
if "rayforce" in all_adapters:
229+
adapters.append("rayforce")
230+
all_adapters.remove("rayforce")
231+
adapters.extend(sorted(all_adapters))
226232

227233
print("\n" + "=" * 60)
228234
print("COMPARISON (median ms)")
@@ -235,27 +241,35 @@ def print_comparison(results: list[BenchmarkRun]) -> None:
235241
print(header)
236242
print("-" * 60)
237243

244+
# Track speedups relative to rayforce
245+
speedups: dict[str, list[float]] = {a: [] for a in adapters}
246+
238247
# Rows
239248
for bench_name, adapter_results in sorted(by_benchmark.items()):
240249
row = f"{bench_name:<15}"
241-
times = []
250+
rf_time = adapter_results.get("rayforce", None)
251+
rf_ms = rf_time.median_ms if rf_time else None
252+
242253
for adapter in adapters:
243254
if adapter in adapter_results:
244255
t = adapter_results[adapter].median_ms
245-
times.append((adapter, t))
246256
row += f" {t:>12.2f}"
257+
if rf_ms and rf_ms > 0:
258+
speedups[adapter].append(t / rf_ms)
247259
else:
248260
row += f" {'N/A':>12}"
249261
print(row)
250262

251-
# Mark fastest
252-
if times:
253-
fastest = min(times, key=lambda x: x[1])
254-
slowest = max(times, key=lambda x: x[1])
255-
if len(times) > 1:
256-
speedup = slowest[1] / fastest[1]
257-
print(f" → {fastest[0]} is {speedup:.1f}x faster than {slowest[0]}")
258-
263+
# Average speedup line
264+
print("-" * 60)
265+
row = f"{'(avg speedup)':<15}"
266+
for adapter in adapters:
267+
if speedups[adapter]:
268+
avg = sum(speedups[adapter]) / len(speedups[adapter])
269+
row += f" {avg:>11.2f}x"
270+
else:
271+
row += f" {'N/A':>12}"
272+
print(row)
259273
print("=" * 60)
260274

261275

0 commit comments

Comments
 (0)