Skip to content

Commit 3823efd

Browse files
authored
feat(benchmarking): adding gas burner test (#3115)
* refactor: move spamoor benchmark into testify suite in test/e2e/benchmark - Create test/e2e/benchmark/ subpackage with SpamoorSuite (testify/suite) - Move spamoor smoke test into suite as TestSpamoorSmoke - Split helpers into focused files: traces.go, output.go, metrics.go - Introduce resultWriter for defer-based benchmark JSON output - Export shared symbols from evm_test_common.go for cross-package use - Restructure CI to fan-out benchmark jobs and fan-in publishing - Run benchmarks on PRs only when benchmark-related files change * fix: correct BENCH_JSON_OUTPUT path for spamoor benchmark go test sets the working directory to the package under test, so the env var should be relative to test/e2e/benchmark/, not test/e2e/. * fix: place package pattern before test binary flags in benchmark CI go test treats all arguments after an unknown flag (--evm-binary) as test binary args, so ./benchmark/ was never recognized as a package pattern. * fix: adjust evm-binary path for benchmark subpackage working directory go test sets the cwd to the package directory (test/e2e/benchmark/), so the binary path needs an extra parent traversal. * wip: erc20 benchmark test * fix: exclude benchmark subpackage from make test-e2e The benchmark package doesn't define the --binary flag that test-e2e passes. It has its own CI workflow so it doesn't need to run here. * fix: replace FilterLogs with header iteration and optimize spamoor config collectBlockMetrics hit reth's 20K FilterLogs limit at high tx volumes. Replace with direct header iteration over [startBlock, endBlock] and add Phase 1 metrics: non-empty ratio, block interval p50/p99, gas/block and tx/block p50/p99. Optimize spamoor configuration for 100ms block time: - --slot-duration 100ms, --startup-delay 0 on daemon - throughput=50 per 100ms slot (500 tx/s per spammer) - max_pending=50000 to avoid 3s block poll backpressure - 5 staggered spammers with 50K txs each Results: 55 MGas/s, 1414 TPS, 19.8% non-empty blocks (up from 6%). * fix: improve benchmark measurement window and reliability - Move startBlock capture after spammer creation to exclude warm-up - Replace 20s drain sleep with smart poll (waitForDrain) - Add deleteAllSpammers cleanup to handle stale spamoor DB entries - Lower trace sample rate to 10% to prevent Jaeger OOM * fix: address PR review feedback for benchmark suite - make reth tag configurable via EV_RETH_TAG env var (default pr-140) - fix OTLP config: remove duplicate env vars, use http/protobuf protocol - use require.Eventually for host readiness polling - rename requireHTTP to requireHostUp - use non-fatal logging in resultWriter.flush deferred context - fix stale doc comment (setupCommonEVMEnv -> SetupCommonEVMEnv) - rename loop variable to avoid shadowing testing.TB convention - add block/internal/executing/** to CI path trigger - remove unused require import from output.go * chore: specify http * chore: filter out benchmark tests from test-e2e * refactor: centralize reth config and lower ERC20 spammer count move EV_RETH_TAG resolution and rpc connection limits into setupEnv so all benchmark tests share the same reth configuration. lower ERC20 spammer count from 5 to 2 to reduce resource contention on local hardware while keeping the loop for easy scaling on dedicated infra. * chore: collect all traces at once * chore: self review * refactor: extract benchmark helpers to slim down ERC20 test body - add blockMetricsSummary with summarize(), log(), and entries() methods - add evNodeOverhead() for computing ProduceBlock vs ExecuteTxs overhead - add collectTraces() suite method to deduplicate trace collection pattern - add addEntries() convenience method on resultWriter - slim TestERC20Throughput from ~217 to ~119 lines - reuse collectTraces in TestSpamoorSmoke * docs: add detailed documentation to benchmark helper methods * ci: add ERC20 throughput benchmark job * chore: remove span assertions * chore: adding gas burner test
1 parent 3b3d5e7 commit 3823efd

2 files changed

Lines changed: 154 additions & 4 deletions

File tree

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//go:build evm
2+
3+
package benchmark
4+
5+
import (
6+
"context"
7+
"fmt"
8+
"time"
9+
10+
"github.com/celestiaorg/tastora/framework/docker/evstack/spamoor"
11+
)
12+
13+
// TestGasBurner measures gas throughput using a deterministic gasburner
14+
// workload. The result is tracked via BENCH_JSON_OUTPUT as seconds_per_gigagas
15+
// (lower is better) on the benchmark dashboard.
16+
func (s *SpamoorSuite) TestGasBurner() {
17+
const (
18+
numSpammers = 4
19+
countPerSpammer = 2500
20+
totalCount = numSpammers * countPerSpammer
21+
warmupTxs = 50
22+
serviceName = "ev-node-gasburner"
23+
waitTimeout = 5 * time.Minute
24+
)
25+
26+
t := s.T()
27+
ctx := t.Context()
28+
w := newResultWriter(t, "GasBurner")
29+
defer w.flush()
30+
31+
e := s.setupEnv(config{
32+
serviceName: serviceName,
33+
})
34+
api := e.spamoorAPI
35+
36+
s.Require().NoError(deleteAllSpammers(api), "failed to delete stale spammers")
37+
38+
gasburnerCfg := map[string]any{
39+
"gas_units_to_burn": 5_000_000,
40+
"total_count": countPerSpammer,
41+
"throughput": 25,
42+
"max_pending": 5000,
43+
"max_wallets": 500,
44+
"rebroadcast": 0,
45+
"base_fee": 20,
46+
"tip_fee": 5,
47+
"refill_amount": "5000000000000000000",
48+
"refill_balance": "2000000000000000000",
49+
"refill_interval": 300,
50+
}
51+
52+
for i := range numSpammers {
53+
name := fmt.Sprintf("bench-gasburner-%d", i)
54+
id, err := api.CreateSpammer(name, spamoor.ScenarioGasBurnerTX, gasburnerCfg, true)
55+
s.Require().NoError(err, "failed to create spammer %s", name)
56+
t.Cleanup(func() { _ = api.DeleteSpammer(id) })
57+
}
58+
59+
// wait for wallet prep and contract deployment to finish before
60+
// recording start block so warmup is excluded from the measurement.
61+
pollSentTotal := func() (float64, error) {
62+
metrics, mErr := api.GetMetrics()
63+
if mErr != nil {
64+
return 0, mErr
65+
}
66+
return sumCounter(metrics["spamoor_transactions_sent_total"]), nil
67+
}
68+
waitForMetricTarget(t, "spamoor_transactions_sent_total (warmup)", pollSentTotal, warmupTxs, waitTimeout)
69+
70+
startHeader, err := e.ethClient.HeaderByNumber(ctx, nil)
71+
s.Require().NoError(err, "failed to get start block header")
72+
startBlock := startHeader.Number.Uint64()
73+
loadStart := time.Now()
74+
t.Logf("start block: %d (after warmup)", startBlock)
75+
76+
// wait for all transactions to be sent
77+
waitForMetricTarget(t, "spamoor_transactions_sent_total", pollSentTotal, float64(totalCount), waitTimeout)
78+
79+
// wait for pending txs to drain
80+
drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
81+
defer drainCancel()
82+
waitForDrain(drainCtx, t.Logf, e.ethClient, 10)
83+
wallClock := time.Since(loadStart)
84+
85+
endHeader, err := e.ethClient.HeaderByNumber(ctx, nil)
86+
s.Require().NoError(err, "failed to get end block header")
87+
endBlock := endHeader.Number.Uint64()
88+
t.Logf("end block: %d (range %d blocks)", endBlock, endBlock-startBlock)
89+
90+
// collect block-level gas/tx metrics
91+
bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock)
92+
s.Require().NoError(err, "failed to collect block metrics")
93+
94+
summary := bm.summarize()
95+
s.Require().Greater(summary.SteadyState, time.Duration(0), "expected non-zero steady-state duration")
96+
summary.log(t, startBlock, endBlock, bm.TotalBlockCount, bm.BlockCount, wallClock)
97+
98+
// derive seconds_per_gigagas from the summary's MGas/s
99+
var secsPerGigagas float64
100+
if summary.AchievedMGas > 0 {
101+
// MGas/s -> Ggas/s = MGas/s / 1000, then invert
102+
secsPerGigagas = 1000.0 / summary.AchievedMGas
103+
}
104+
t.Logf("seconds_per_gigagas: %.4f", secsPerGigagas)
105+
106+
// collect and report traces
107+
traces := s.collectTraces(e, serviceName)
108+
109+
if overhead, ok := evNodeOverhead(traces.evNode); ok {
110+
t.Logf("ev-node overhead: %.1f%%", overhead)
111+
w.addEntry(entry{Name: "GasBurner - ev-node overhead", Unit: "%", Value: overhead})
112+
}
113+
114+
w.addEntries(summary.entries("GasBurner"))
115+
w.addSpans(traces.allSpans())
116+
w.addEntry(entry{
117+
Name: fmt.Sprintf("%s - seconds_per_gigagas", w.label),
118+
Unit: "s/Ggas",
119+
Value: secsPerGigagas,
120+
})
121+
}

test/e2e/benchmark/helpers.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ type blockMetricsSummary struct {
267267
AvgTx float64
268268
// BlocksPerSec is non-empty blocks / steady-state seconds.
269269
BlocksPerSec float64
270+
// AvgBlockInterval is the mean time between all consecutive blocks.
271+
AvgBlockInterval time.Duration
270272
// NonEmptyRatio is (non-empty blocks / total blocks) * 100.
271273
NonEmptyRatio float64
272274
}
@@ -287,6 +289,15 @@ func (m *blockMetrics) summarize() *blockMetricsSummary {
287289
achievedTPS = float64(m.TotalTxCount) / ss.Seconds()
288290
}
289291

292+
var avgBlockInterval time.Duration
293+
if len(m.BlockIntervals) > 0 {
294+
var total time.Duration
295+
for _, d := range m.BlockIntervals {
296+
total += d
297+
}
298+
avgBlockInterval = total / time.Duration(len(m.BlockIntervals))
299+
}
300+
290301
return &blockMetricsSummary{
291302
SteadyState: ss,
292303
AchievedMGas: mgasPerSec(m.TotalGasUsed, ss),
@@ -300,8 +311,9 @@ func (m *blockMetrics) summarize() *blockMetricsSummary {
300311
TxP99: txP99,
301312
AvgGas: m.avgGasPerBlock(),
302313
AvgTx: m.avgTxPerBlock(),
303-
BlocksPerSec: blocksPerSec,
304-
NonEmptyRatio: m.nonEmptyRatio(),
314+
BlocksPerSec: blocksPerSec,
315+
AvgBlockInterval: avgBlockInterval,
316+
NonEmptyRatio: m.nonEmptyRatio(),
305317
}
306318
}
307319

@@ -312,8 +324,8 @@ func (m *blockMetrics) summarize() *blockMetricsSummary {
312324
func (s *blockMetricsSummary) log(t testing.TB, startBlock, endBlock uint64, totalBlocks, nonEmptyBlocks int, wallClock time.Duration) {
313325
t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)",
314326
startBlock, endBlock, totalBlocks, nonEmptyBlocks, s.NonEmptyRatio)
315-
t.Logf("block intervals: p50=%s, p99=%s, max=%s",
316-
s.IntervalP50.Round(time.Millisecond), s.IntervalP99.Round(time.Millisecond), s.IntervalMax.Round(time.Millisecond))
327+
t.Logf("block intervals: avg=%s, p50=%s, p99=%s, max=%s",
328+
s.AvgBlockInterval.Round(time.Millisecond), s.IntervalP50.Round(time.Millisecond), s.IntervalP99.Round(time.Millisecond), s.IntervalMax.Round(time.Millisecond))
317329
t.Logf("gas/block (non-empty): avg=%.0f, p50=%.0f, p99=%.0f", s.AvgGas, s.GasP50, s.GasP99)
318330
t.Logf("tx/block (non-empty): avg=%.1f, p50=%.0f, p99=%.0f", s.AvgTx, s.TxP50, s.TxP99)
319331
t.Logf("throughput: %.2f MGas/s, %.1f TPS over %s steady-state (%s wall clock)",
@@ -332,6 +344,7 @@ func (s *blockMetricsSummary) entries(prefix string) []entry {
332344
{Name: prefix + " - avg tx/block", Unit: "count", Value: s.AvgTx},
333345
{Name: prefix + " - blocks/s", Unit: "blocks/s", Value: s.BlocksPerSec},
334346
{Name: prefix + " - non-empty block ratio", Unit: "%", Value: s.NonEmptyRatio},
347+
{Name: prefix + " - avg block interval", Unit: "ms", Value: float64(s.AvgBlockInterval.Milliseconds())},
335348
{Name: prefix + " - block interval p50", Unit: "ms", Value: float64(s.IntervalP50.Milliseconds())},
336349
{Name: prefix + " - block interval p99", Unit: "ms", Value: float64(s.IntervalP99.Milliseconds())},
337350
{Name: prefix + " - gas/block p50", Unit: "gas", Value: s.GasP50},
@@ -368,6 +381,22 @@ func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) {
368381
return (produceAvg - executeAvg) / produceAvg * 100, true
369382
}
370383

384+
// waitForMetricTarget polls a metric getter function every 2s until the
385+
// returned value >= target, or fails the test on timeout.
386+
func waitForMetricTarget(t testing.TB, name string, poll func() (float64, error), target float64, timeout time.Duration) {
387+
t.Helper()
388+
deadline := time.Now().Add(timeout)
389+
for time.Now().Before(deadline) {
390+
v, err := poll()
391+
if err == nil && v >= target {
392+
t.Logf("metric %s reached %.0f (target %.0f)", name, v, target)
393+
return
394+
}
395+
time.Sleep(2 * time.Second)
396+
}
397+
t.Fatalf("metric %s did not reach target %.0f within %v", name, target, timeout)
398+
}
399+
371400
// collectBlockMetrics iterates all headers in [startBlock, endBlock] to collect
372401
// gas and transaction metrics. Empty blocks are skipped for gas/tx aggregation
373402
// but included in block interval tracking.

0 commit comments

Comments
 (0)