From ed386588973f8c8e0d8c1960cb8a69b9d9215017 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Wed, 18 Feb 2026 14:35:44 -0800 Subject: [PATCH 1/3] Add BenchmarkShellStyleBuildTime using testing.B MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the manual TestShellStyleBuildTime timing into a proper Go 1.24 b.Loop() benchmark. Uses 1000 shell-style wildcard patterns merged onto a single field, producing an automaton with 7409 tables and up to 900 epsilons—a stress test for NFA traversal at scale. Also widen readWWords to accept testing.TB so it works from both tests and benchmarks. Co-Authored-By: Claude Opus 4.6 --- benchmarks_test.go | 4 +-- v2_bench_test.go | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/benchmarks_test.go b/benchmarks_test.go index 2f006f1..b046bc0 100644 --- a/benchmarks_test.go +++ b/benchmarks_test.go @@ -235,13 +235,13 @@ func (w *worder) next() []byte { return w.lines[w.index] } -func readWWords(t *testing.T) [][]byte { +func readWWords(t testing.TB) [][]byte { t.Helper() // that's a list from the Wordle source code with a few erased to get a prime number file, err := os.Open("testdata/wwords.txt") if err != nil { - t.Error("Can't open file: " + err.Error()) + t.Fatal("Can't open file: " + err.Error()) } defer func(file *os.File) { _ = file.Close() diff --git a/v2_bench_test.go b/v2_bench_test.go index 6e8f289..f88b014 100644 --- a/v2_bench_test.go +++ b/v2_bench_test.go @@ -4,6 +4,7 @@ package quamina import ( "fmt" + "math/rand" "testing" "time" ) @@ -71,3 +72,64 @@ func Benchmark8259Example(b *testing.B) { count := float64(b.N) fmt.Printf("%.0f/sec\n", count/elapsed) } + +func BenchmarkShellStyleBuildTime(b *testing.B) { + words := readWWords(b)[:1000] + + source := rand.NewSource(293591) + starWords := make([]string, 0, len(words)) + expandedWords := make([]string, 0, len(words)) + patterns := make([]string, 0, len(words)) + for _, word := range words { + //nolint:gosec + starAt := source.Int63() % 6 + starWord := string(word[:starAt]) + "*" + string(word[starAt:]) + expandedWord := string(word[:starAt]) + "ÉÉÉÉ" + string(word[starAt:]) + starWords = append(starWords, starWord) + expandedWords = append(expandedWords, expandedWord) + pattern := fmt.Sprintf(`{"x": [ {"shellstyle": "%s" } ] }`, starWord) + patterns = append(patterns, pattern) + } + + q, _ := New() + before := time.Now() + for i := range words { + err := q.AddPattern(starWords[i], patterns[i]) + if err != nil { + b.Fatal("AddP: " + err.Error()) + } + } + elapsed := time.Since(before).Seconds() + fmt.Printf("Patterns/sec: %.1f\n", float64(len(words))/elapsed) + fmt.Println(matcherStats(q.matcher.(*coreMatcher))) + + // Build events: original words and expanded words + type event struct { + json []byte + word string + } + events := make([]event, 0, len(words)*2) + for i, word := range words { + events = append(events, + event{[]byte(fmt.Sprintf(`{"x": "%s"}`, word)), string(word)}, + event{[]byte(fmt.Sprintf(`{"x": "%s"}`, expandedWords[i])), expandedWords[i]}, + ) + } + + b.ResetTimer() + b.ReportAllocs() + for b.Loop() { + for _, ev := range events { + matches, err := q.MatchesForEvent(ev.json) + if err != nil { + b.Fatal("M4E on " + ev.word) + } + if len(matches) == 0 { + b.Fatal("no matches for " + ev.word) + } + } + } + elapsed = float64(b.Elapsed().Seconds()) + count := float64(b.N) + fmt.Printf("%.0f events/sec\n", count*float64(len(events))/elapsed) +} From a014dc7dd75bf7e3e14b03ff5c6abb58d299c659 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Wed, 18 Feb 2026 14:38:15 -0800 Subject: [PATCH 2/3] Rename readWWords param to tb to satisfy thelper lint Co-Authored-By: Claude Opus 4.6 --- benchmarks_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks_test.go b/benchmarks_test.go index b046bc0..bc13606 100644 --- a/benchmarks_test.go +++ b/benchmarks_test.go @@ -235,13 +235,13 @@ func (w *worder) next() []byte { return w.lines[w.index] } -func readWWords(t testing.TB) [][]byte { - t.Helper() +func readWWords(tb testing.TB) [][]byte { + tb.Helper() // that's a list from the Wordle source code with a few erased to get a prime number file, err := os.Open("testdata/wwords.txt") if err != nil { - t.Fatal("Can't open file: " + err.Error()) + tb.Fatal("Can't open file: " + err.Error()) } defer func(file *os.File) { _ = file.Close() From a5470e0fae1215352f197d52f1acdd32125fd897 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Wed, 18 Feb 2026 14:44:40 -0800 Subject: [PATCH 3/3] Parameterize readWWords to stop reading early Add maxWords parameter so callers that only need a subset (1000 or 2000 words) don't read all 13K lines from wwords.txt. Pass 0 to read all words. Co-Authored-By: Claude Opus 4.6 --- anything_but_test.go | 2 +- benchmarks_test.go | 11 +++++++---- regexp_nfa_test.go | 2 +- shell_style_test.go | 2 +- small_table_test.go | 2 +- v2_bench_test.go | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/anything_but_test.go b/anything_but_test.go index 180228a..9754732 100644 --- a/anything_but_test.go +++ b/anything_but_test.go @@ -165,7 +165,7 @@ func TestAnythingButMatching(t *testing.T) { if err != nil { t.Error("AP: " + err.Error()) } - words := readWWords(t) + words := readWWords(t, 0) template := `{"a": "XX"}` problemTemplate := `{"a": XX}` for _, word := range problemWords { diff --git a/benchmarks_test.go b/benchmarks_test.go index bc13606..82a37cd 100644 --- a/benchmarks_test.go +++ b/benchmarks_test.go @@ -190,7 +190,7 @@ func TestBigShellStyle(t *testing.T) { // ~220K smallTables. Tried https://blog.twitch.tv/en/2019/04/10/go-memory-ballast-how-i-learnt-to-stop-worrying-and-love-the-heap/ // but it doesn't seem to help. func TestPatternAddition(t *testing.T) { - w := worder{0, readWWords(t)} + w := worder{0, readWWords(t, 0)} var msBefore, msAfter runtime.MemStats @@ -235,7 +235,9 @@ func (w *worder) next() []byte { return w.lines[w.index] } -func readWWords(tb testing.TB) [][]byte { +// readWWords reads up to maxWords words from testdata/wwords.txt. +// Pass 0 to read all words. +func readWWords(tb testing.TB, maxWords int) [][]byte { tb.Helper() // that's a list from the Wordle source code with a few erased to get a prime number @@ -250,11 +252,12 @@ func readWWords(tb testing.TB) [][]byte { buf := make([]byte, oneMeg) scanner.Buffer(buf, oneMeg) - lineCount := 0 var lines [][]byte for scanner.Scan() { - lineCount++ lines = append(lines, []byte(scanner.Text())) + if maxWords > 0 && len(lines) >= maxWords { + break + } } return lines } diff --git a/regexp_nfa_test.go b/regexp_nfa_test.go index dcb13ed..c8ae821 100644 --- a/regexp_nfa_test.go +++ b/regexp_nfa_test.go @@ -14,7 +14,7 @@ import ( // skinny RR: 3853.56/second with cache, 60.31 without, speedup 63.9 // func TestRRCacheEffectiveness(t *testing.T) { - words := readWWords(t)[:2000] + words := readWWords(t, 2000) re := "~p{L}+" pp := sharedNullPrinter var transitions []*fieldMatcher diff --git a/shell_style_test.go b/shell_style_test.go index 1ee097e..93bccbd 100644 --- a/shell_style_test.go +++ b/shell_style_test.go @@ -161,7 +161,7 @@ func TestShellStyleBuildTime(t *testing.T) { // automaton building or very slow (~2K/second) matching. The current version settles for the // latter. With a thousand patterns the automaton building is instant and the matching runs at // ~16K/second. I retain optimism that there is a path forward to win back the fast performance. - words := readWWords(t)[:1000] + words := readWWords(t, 1000) fmt.Printf("WC %d\n", len(words)) starWords := make([]string, 0, len(words)) diff --git a/small_table_test.go b/small_table_test.go index 91de286..2bd2b70 100644 --- a/small_table_test.go +++ b/small_table_test.go @@ -7,7 +7,7 @@ import ( ) func TestFAMergePerf(t *testing.T) { - words := readWWords(t) + words := readWWords(t, 0) patterns := make([]string, 0, len(words)) for _, word := range words { pattern := fmt.Sprintf(`{"x": [ "%s" ] }`, string(word)) diff --git a/v2_bench_test.go b/v2_bench_test.go index f88b014..930b578 100644 --- a/v2_bench_test.go +++ b/v2_bench_test.go @@ -74,7 +74,7 @@ func Benchmark8259Example(b *testing.B) { } func BenchmarkShellStyleBuildTime(b *testing.B) { - words := readWWords(b)[:1000] + words := readWWords(b, 1000) source := rand.NewSource(293591) starWords := make([]string, 0, len(words))