Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
19fc08c
benchmarks for common io
tensor-ninja Feb 16, 2026
9114bf8
feat: add density ramp benchmark script and docs
craigpfeifer Feb 28, 2026
5b39234
added aws exec script
craigpfeifer Mar 6, 2026
6af109a
updated project deps
craigpfeifer Mar 6, 2026
f91b6be
fix: correct /var/empty ownership in Docker-path rootfs builds
craigpfeifer Mar 9, 2026
0ce2073
fix: copy rootfs internally before chown to avoid root-owned temp dir
craigpfeifer Mar 9, 2026
4dc7b25
fix: restore rootfs_dir ownership after Docker run to ensure temp dir…
craigpfeifer Mar 9, 2026
8e3a79d
fix: let Docker extract rootfs tar as root to preserve uid/gid
craigpfeifer Mar 9, 2026
cda015f
results dir
craigpfeifer Mar 9, 2026
275fbfe
results README
craigpfeifer Mar 9, 2026
96b9fc2
Merge branch 'CelestoAI:main' into feat/density-ramp-benchmark
craigpfeifer Mar 9, 2026
2abbe7e
added second test after rebasing to upstream
craigpfeifer Mar 9, 2026
0645b60
add GCP density ramp script and re-tested results
craigpfeifer Mar 9, 2026
0d66e61
fix: address CodeRabbit review comments on density ramp benchmark PR
craigpfeifer Mar 9, 2026
b002fb8
fix: bench_file_io bulk read uses dd to /dev/null, not cat
craigpfeifer Mar 9, 2026
4570006
moved results file to dir
craigpfeifer Mar 9, 2026
1869549
benchmarks: apply EC2 hardening to GCP density ramp script
craigpfeifer Mar 9, 2026
953965c
benchmarks: skip failed HTTP/SSE samples in network IO benchmark
craigpfeifer Mar 9, 2026
41dce88
fix: add missing os import in build.py for uid/gid preservation
craigpfeifer Mar 9, 2026
74bbdfe
benchmarks: surface vm.delete() failures on boot-error path
craigpfeifer Mar 9, 2026
2e8710c
benchmarks: preserve launch order and fix density counts in parallel …
craigpfeifer Mar 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added benchmarks/__init__.py
Empty file.
143 changes: 143 additions & 0 deletions benchmarks/bench_command_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Benchmark: per-command SSH round-trip latency.
Measures the overhead of vm.run() vs native subprocess.
"""

from __future__ import annotations

import subprocess
import sys

try:
from .helpers import (
format_stats,
is_sandbox_exec_available,
overhead_str,
print_header,
print_result,
print_subheader,
run_sandboxed,
stats_summary,
time_call_n,
)
except ImportError:
from helpers import ( # type: ignore[no-redef]
format_stats,
is_sandbox_exec_available,
overhead_str,
print_header,
print_result,
print_subheader,
run_sandboxed,
stats_summary,
time_call_n,
)

ITERATIONS = 100
WARMUP = 2


def _host_true() -> None:
subprocess.run(["true"], capture_output=True)


def _host_echo() -> None:
result = subprocess.run(["echo", "hello"], capture_output=True, text=True)
assert "hello" in result.stdout


def _sandboxed_true() -> None:
run_sandboxed("true")


def _sandboxed_echo() -> None:
result = run_sandboxed("echo hello")
assert "hello" in result.stdout


def run_benchmark() -> dict:
"""Run all command latency benchmarks and return results."""
from smolvm import SmolVM

results = {}

print_header("Command Latency Benchmark")
print(f" Iterations: {ITERATIONS}, Warmup: {WARMUP}")

# Host baseline
print_subheader("Host: subprocess.run(['true'])")
host_true_times = time_call_n(_host_true, ITERATIONS, warmup=WARMUP)
host_true_stats = stats_summary(host_true_times)
print_result("Stats", format_stats(host_true_stats))
results["host_true"] = host_true_stats

print_subheader("Host: subprocess.run(['echo', 'hello'])")
host_echo_times = time_call_n(_host_echo, ITERATIONS, warmup=WARMUP)
host_echo_stats = stats_summary(host_echo_times)
print_result("Stats", format_stats(host_echo_stats))
results["host_echo"] = host_echo_stats

# Sandbox baseline
if is_sandbox_exec_available():
print_subheader("Sandbox: sandbox-exec 'true'")
sandbox_true_times = time_call_n(_sandboxed_true, ITERATIONS, warmup=WARMUP)
sandbox_true_stats = stats_summary(sandbox_true_times)
print_result("Stats", format_stats(sandbox_true_stats))
results["sandbox_true"] = sandbox_true_stats

print_subheader("Sandbox: sandbox-exec 'echo hello'")
sandbox_echo_times = time_call_n(_sandboxed_echo, ITERATIONS, warmup=WARMUP)
sandbox_echo_stats = stats_summary(sandbox_echo_times)
print_result("Stats", format_stats(sandbox_echo_stats))
results["sandbox_echo"] = sandbox_echo_stats
else:
print("\n sandbox-exec not available (non-macOS), skipping sandbox baseline")
results["sandbox_true"] = None
results["sandbox_echo"] = None

# SmolVM
print_subheader("SmolVM: vm.run('true')")
print(" Starting VM...")
with SmolVM() as vm:
# Sanity check
sanity = vm.run("echo smolvm_sanity")
assert "smolvm_sanity" in sanity.stdout, f"Sanity failed: {sanity.stdout}"

def _vm_true():
vm.run("true")

def _vm_echo():
result = vm.run("echo hello")
assert "hello" in result.stdout

vm_true_times = time_call_n(_vm_true, ITERATIONS, warmup=WARMUP)
vm_true_stats = stats_summary(vm_true_times)
print_result("Stats", format_stats(vm_true_stats))
results["vm_true"] = vm_true_stats

print_subheader("SmolVM: vm.run('echo hello')")
vm_echo_times = time_call_n(_vm_echo, ITERATIONS, warmup=WARMUP)
vm_echo_stats = stats_summary(vm_echo_times)
print_result("Stats", format_stats(vm_echo_stats))
results["vm_echo"] = vm_echo_stats

# domparison
print_subheader("Comparison (p50)")
print_result(
"vm.run('true') vs host",
overhead_str(host_true_stats["p50"], vm_true_stats["p50"]),
)
print_result(
"vm.run('echo') vs host",
overhead_str(host_echo_stats["p50"], vm_echo_stats["p50"]),
)
if results.get("sandbox_true"):
print_result(
"vm.run('true') vs sandbox",
overhead_str(results["sandbox_true"]["p50"], vm_true_stats["p50"]),
)

return results


if __name__ == "__main__":
run_benchmark()
223 changes: 223 additions & 0 deletions benchmarks/bench_file_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
"""Benchmark: file read/write throughput inside the VM.

Simulates ralph's JSONL event logging, JSON job files, and session logs.
File IO inside the VM is native ext4 on virtio-blk, but getting results
out requires reading via vm.run("cat ...").
"""

from __future__ import annotations

import os
import sys
import tempfile

try:
from .helpers import (
format_stats,
overhead_str,
print_header,
print_result,
print_subheader,
stats_summary,
time_call,
)
except ImportError:
from helpers import ( # type: ignore[no-redef]
format_stats,
overhead_str,
print_header,
print_result,
print_subheader,
stats_summary,
time_call,
)

# A 1KB line simulating a JSONL event
_1KB_LINE = '{"ts":1234567890,"event":"prompt","session":"abc123","data":' + '"x' * 470 + '"}\n'
assert len(_1KB_LINE) >= 1000

_10MB_DATA_LINES = 10240 # ~10MB at 1KB per line


def _host_sequential_writes(n: int = 1000) -> float:
"""Append n x 1KB lines to a file on the host."""

def _do():
with tempfile.NamedTemporaryFile(mode="w", delete=True, suffix=".jsonl") as f:
for _ in range(n):
f.write(_1KB_LINE)
f.flush()

return time_call(_do)


def _host_bulk_write() -> float:
"""Write a single ~10MB file on the host."""
data = _1KB_LINE * _10MB_DATA_LINES

def _do():
with tempfile.NamedTemporaryFile(mode="w", delete=True) as f:
f.write(data)
f.flush()

return time_call(_do)


def _host_bulk_read() -> float:
"""Write then read back a ~10MB file on the host."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".dat") as f:
f.write(_1KB_LINE * _10MB_DATA_LINES)
path = f.name

def _do():
with open(path) as f:
_ = f.read()

t = time_call(_do)
os.unlink(path)
return t


def _host_many_small_files(n: int = 100) -> float:
"""Create n x 1KB files on the host."""

def _do():
tmpdir = tempfile.mkdtemp()
for i in range(n):
with open(os.path.join(tmpdir, f"job_{i}.json"), "w") as f:
f.write(_1KB_LINE)
# Cleanup
import shutil

shutil.rmtree(tmpdir)

return time_call(_do)


def run_benchmark() -> dict:
"""Run all file IO benchmarks and return results."""
from smolvm import SmolVM

results = {}

print_header("File IO Benchmark")

# baslines on host
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Typo: "baslines" should be "baselines".

Minor typo in the comment.

✏️ Fix
-    # baslines on host
+    # baselines on host
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# baslines on host
# baselines on host
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@benchmarks/bench_file_io.py` at line 105, Fix the typo in the inline comment
that reads "# baslines on host" in benchmarks/bench_file_io.py by changing
"baslines" to "baselines" so the comment reads "# baselines on host"; locate the
comment near the benchmarking setup (search for the exact string "baslines on
host") and update it accordingly.

print_subheader("Host: Sequential small writes (1000 x 1KB)")
host_seq_times = [_host_sequential_writes() for _ in range(5)]
host_seq_stats = stats_summary(host_seq_times)
print_result("Stats", format_stats(host_seq_stats))
results["host_seq_write"] = host_seq_stats

print_subheader("Host: Bulk write (~10MB)")
host_bulk_w_times = [_host_bulk_write() for _ in range(5)]
host_bulk_w_stats = stats_summary(host_bulk_w_times)
print_result("Stats", format_stats(host_bulk_w_stats))
results["host_bulk_write"] = host_bulk_w_stats

print_subheader("Host: Bulk read (~10MB)")
host_bulk_r_times = [_host_bulk_read() for _ in range(5)]
host_bulk_r_stats = stats_summary(host_bulk_r_times)
print_result("Stats", format_stats(host_bulk_r_stats))
results["host_bulk_read"] = host_bulk_r_stats

print_subheader("Host: Many small files (100 x 1KB)")
host_many_times = [_host_many_small_files() for _ in range(5)]
host_many_stats = stats_summary(host_many_times)
print_result("Stats", format_stats(host_many_stats))
results["host_many_files"] = host_many_stats

# SmolVM
print_subheader("SmolVM: Starting VM...")
with SmolVM() as vm:
# warmup with a sanity check
sanity = vm.run("echo smolvm_ready")
assert "smolvm_ready" in sanity.stdout

# sequential small writes
print_subheader("SmolVM: Sequential small writes (1000 x 1KB)")
# We write via a single vm.run command using a shell loop
write_cmd = (
"rm -f /tmp/bench.jsonl; "
"i=0; while [ $i -lt 1000 ]; do "
"printf '%s\\n' '" + _1KB_LINE.strip().replace("'", "'\\''") + "' >> /tmp/bench.jsonl; "
"i=$((i+1)); done"
)
vm_seq_times = []
for _ in range(3):
t = time_call(lambda: vm.run(write_cmd, timeout=120))
vm_seq_times.append(t)
vm_seq_stats = stats_summary(vm_seq_times)
print_result("Stats", format_stats(vm_seq_stats))
results["vm_seq_write"] = vm_seq_stats

# Bulk write
print_subheader("SmolVM: Bulk write (~10MB)")
# Use dd to write 10MB of data
bulk_write_cmd = "dd if=/dev/zero of=/tmp/bench_bulk.dat bs=1024 count=10240 2>&1"
vm_bulk_w_times = []
for _ in range(3):
t = time_call(lambda: vm.run(bulk_write_cmd, timeout=60))
vm_bulk_w_times.append(t)
vm_bulk_w_stats = stats_summary(vm_bulk_w_times)
print_result("Stats", format_stats(vm_bulk_w_stats))
results["vm_bulk_write"] = vm_bulk_w_stats

# Bulk read: use dd to /dev/null so only guest disk I/O is measured,
# not SSH/stdout transport overhead (cat would send ~10 MB through the
# command channel, distorting the result vs the host baseline).
print_subheader("SmolVM: Bulk read (~10MB, guest disk only)")
# First ensure the file exists
vm.run("dd if=/dev/zero of=/tmp/bench_read.dat bs=1024 count=10240 2>/dev/null", timeout=60)
vm_bulk_r_times = []
for _ in range(3):
t = time_call(lambda: vm.run("dd if=/tmp/bench_read.dat of=/dev/null bs=1024 2>/dev/null", timeout=60))
vm_bulk_r_times.append(t)
vm_bulk_r_stats = stats_summary(vm_bulk_r_times)
print_result("Stats", format_stats(vm_bulk_r_stats))
results["vm_bulk_read"] = vm_bulk_r_stats

# Many small files
print_subheader("SmolVM: Many small files (100 x 1KB)")
escaped_line = _1KB_LINE.strip().replace("'", "'\\''")
many_files_cmd = (
"rm -rf /tmp/bench_jobs; mkdir -p /tmp/bench_jobs; "
"i=0; while [ $i -lt 100 ]; do "
"printf '%s' '" + escaped_line + "'"
" > /tmp/bench_jobs/job_$i.json; "
"i=$((i+1)); done"
)
vm_many_times = []
for _ in range(3):
t = time_call(lambda: vm.run(many_files_cmd, timeout=120))
vm_many_times.append(t)
vm_many_stats = stats_summary(vm_many_times)
print_result("Stats", format_stats(vm_many_stats))
results["vm_many_files"] = vm_many_stats

# compare
print_subheader("Comparison (p50)")
print_result(
"Sequential writes: VM vs Host",
overhead_str(host_seq_stats["p50"], vm_seq_stats["p50"]),
)
print_result(
"Bulk write: VM vs Host",
overhead_str(host_bulk_w_stats["p50"], vm_bulk_w_stats["p50"]),
)
print_result(
"Bulk read: VM vs Host",
overhead_str(host_bulk_r_stats["p50"], vm_bulk_r_stats["p50"]),
)
print_result(
"Many files: VM vs Host",
overhead_str(host_many_stats["p50"], vm_many_stats["p50"]),
)

return results


if __name__ == "__main__":
if "benchmarks" in __file__:
sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent))
run_benchmark()
Loading