From ed386588973f8c8e0d8c1960cb8a69b9d9215017 Mon Sep 17 00:00:00 2001
From: Robert Sayre <sayrer@gmail.com>
Date: Wed, 18 Feb 2026 14:35:44 -0800
Subject: [PATCH 1/3] Add BenchmarkShellStyleBuildTime using testing.B
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert the manual TestShellStyleBuildTime timing into a proper
Go 1.24 b.Loop() benchmark. Uses 1000 shell-style wildcard patterns
merged onto a single field, producing an automaton with 7409 tables
and up to 900 epsilons—a stress test for NFA traversal at scale.

Also widen readWWords to accept testing.TB so it works from both
tests and benchmarks.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 benchmarks_test.go |  4 +--
 v2_bench_test.go   | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/benchmarks_test.go b/benchmarks_test.go
index 2f006f1..b046bc0 100644
--- a/benchmarks_test.go
+++ b/benchmarks_test.go
@@ -235,13 +235,13 @@ func (w *worder) next() []byte {
 	return w.lines[w.index]
 }
 
-func readWWords(t *testing.T) [][]byte {
+func readWWords(t testing.TB) [][]byte {
 	t.Helper()
 
 	// that's a list from the Wordle source code with a few erased to get a prime number
 	file, err := os.Open("testdata/wwords.txt")
 	if err != nil {
-		t.Error("Can't open file: " + err.Error())
+		t.Fatal("Can't open file: " + err.Error())
 	}
 	defer func(file *os.File) {
 		_ = file.Close()
diff --git a/v2_bench_test.go b/v2_bench_test.go
index 6e8f289..f88b014 100644
--- a/v2_bench_test.go
+++ b/v2_bench_test.go
@@ -4,6 +4,7 @@ package quamina
 
 import (
 	"fmt"
+	"math/rand"
 	"testing"
 	"time"
 )
@@ -71,3 +72,64 @@ func Benchmark8259Example(b *testing.B) {
 	count := float64(b.N)
 	fmt.Printf("%.0f/sec\n", count/elapsed)
 }
+
+func BenchmarkShellStyleBuildTime(b *testing.B) {
+	words := readWWords(b)[:1000]
+
+	source := rand.NewSource(293591)
+	starWords := make([]string, 0, len(words))
+	expandedWords := make([]string, 0, len(words))
+	patterns := make([]string, 0, len(words))
+	for _, word := range words {
+		//nolint:gosec
+		starAt := source.Int63() % 6
+		starWord := string(word[:starAt]) + "*" + string(word[starAt:])
+		expandedWord := string(word[:starAt]) + "ÉÉÉÉ" + string(word[starAt:])
+		starWords = append(starWords, starWord)
+		expandedWords = append(expandedWords, expandedWord)
+		pattern := fmt.Sprintf(`{"x": [ {"shellstyle": "%s" } ] }`, starWord)
+		patterns = append(patterns, pattern)
+	}
+
+	q, _ := New()
+	before := time.Now()
+	for i := range words {
+		err := q.AddPattern(starWords[i], patterns[i])
+		if err != nil {
+			b.Fatal("AddP: " + err.Error())
+		}
+	}
+	elapsed := time.Since(before).Seconds()
+	fmt.Printf("Patterns/sec: %.1f\n", float64(len(words))/elapsed)
+	fmt.Println(matcherStats(q.matcher.(*coreMatcher)))
+
+	// Build events: original words and expanded words
+	type event struct {
+		json []byte
+		word string
+	}
+	events := make([]event, 0, len(words)*2)
+	for i, word := range words {
+		events = append(events,
+			event{[]byte(fmt.Sprintf(`{"x": "%s"}`, word)), string(word)},
+			event{[]byte(fmt.Sprintf(`{"x": "%s"}`, expandedWords[i])), expandedWords[i]},
+		)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		for _, ev := range events {
+			matches, err := q.MatchesForEvent(ev.json)
+			if err != nil {
+				b.Fatal("M4E on " + ev.word)
+			}
+			if len(matches) == 0 {
+				b.Fatal("no matches for " + ev.word)
+			}
+		}
+	}
+	elapsed = float64(b.Elapsed().Seconds())
+	count := float64(b.N)
+	fmt.Printf("%.0f events/sec\n", count*float64(len(events))/elapsed)
+}

From a014dc7dd75bf7e3e14b03ff5c6abb58d299c659 Mon Sep 17 00:00:00 2001
From: Robert Sayre <sayrer@gmail.com>
Date: Wed, 18 Feb 2026 14:38:15 -0800
Subject: [PATCH 2/3] Rename readWWords param to tb to satisfy thelper lint

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 benchmarks_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmarks_test.go b/benchmarks_test.go
index b046bc0..bc13606 100644
--- a/benchmarks_test.go
+++ b/benchmarks_test.go
@@ -235,13 +235,13 @@ func (w *worder) next() []byte {
 	return w.lines[w.index]
 }
 
-func readWWords(t testing.TB) [][]byte {
-	t.Helper()
+func readWWords(tb testing.TB) [][]byte {
+	tb.Helper()
 
 	// that's a list from the Wordle source code with a few erased to get a prime number
 	file, err := os.Open("testdata/wwords.txt")
 	if err != nil {
-		t.Fatal("Can't open file: " + err.Error())
+		tb.Fatal("Can't open file: " + err.Error())
 	}
 	defer func(file *os.File) {
 		_ = file.Close()

From a5470e0fae1215352f197d52f1acdd32125fd897 Mon Sep 17 00:00:00 2001
From: Robert Sayre <sayrer@gmail.com>
Date: Wed, 18 Feb 2026 14:44:40 -0800
Subject: [PATCH 3/3] Parameterize readWWords to stop reading early

Add maxWords parameter so callers that only need a subset (1000 or
2000 words) don't read all 13K lines from wwords.txt. Pass 0 to
read all words.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 anything_but_test.go |  2 +-
 benchmarks_test.go   | 11 +++++++----
 regexp_nfa_test.go   |  2 +-
 shell_style_test.go  |  2 +-
 small_table_test.go  |  2 +-
 v2_bench_test.go     |  2 +-
 6 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/anything_but_test.go b/anything_but_test.go
index 180228a..9754732 100644
--- a/anything_but_test.go
+++ b/anything_but_test.go
@@ -165,7 +165,7 @@ func TestAnythingButMatching(t *testing.T) {
 	if err != nil {
 		t.Error("AP: " + err.Error())
 	}
-	words := readWWords(t)
+	words := readWWords(t, 0)
 	template := `{"a": "XX"}`
 	problemTemplate := `{"a": XX}`
 	for _, word := range problemWords {
diff --git a/benchmarks_test.go b/benchmarks_test.go
index bc13606..82a37cd 100644
--- a/benchmarks_test.go
+++ b/benchmarks_test.go
@@ -190,7 +190,7 @@ func TestBigShellStyle(t *testing.T) {
 // ~220K smallTables.  Tried https://blog.twitch.tv/en/2019/04/10/go-memory-ballast-how-i-learnt-to-stop-worrying-and-love-the-heap/
 // but it doesn't seem to help.
 func TestPatternAddition(t *testing.T) {
-	w := worder{0, readWWords(t)}
+	w := worder{0, readWWords(t, 0)}
 
 	var msBefore, msAfter runtime.MemStats
 
@@ -235,7 +235,9 @@ func (w *worder) next() []byte {
 	return w.lines[w.index]
 }
 
-func readWWords(tb testing.TB) [][]byte {
+// readWWords reads up to maxWords words from testdata/wwords.txt.
+// Pass 0 to read all words.
+func readWWords(tb testing.TB, maxWords int) [][]byte {
 	tb.Helper()
 
 	// that's a list from the Wordle source code with a few erased to get a prime number
@@ -250,11 +252,12 @@ func readWWords(tb testing.TB) [][]byte {
 	buf := make([]byte, oneMeg)
 	scanner.Buffer(buf, oneMeg)
 
-	lineCount := 0
 	var lines [][]byte
 	for scanner.Scan() {
-		lineCount++
 		lines = append(lines, []byte(scanner.Text()))
+		if maxWords > 0 && len(lines) >= maxWords {
+			break
+		}
 	}
 	return lines
 }
diff --git a/regexp_nfa_test.go b/regexp_nfa_test.go
index dcb13ed..c8ae821 100644
--- a/regexp_nfa_test.go
+++ b/regexp_nfa_test.go
@@ -14,7 +14,7 @@ import (
 // skinny  RR: 3853.56/second with cache, 60.31 without, speedup 63.9
 //
 func TestRRCacheEffectiveness(t *testing.T) {
-	words := readWWords(t)[:2000]
+	words := readWWords(t, 2000)
 	re := "~p{L}+"
 	pp := sharedNullPrinter
 	var transitions []*fieldMatcher
diff --git a/shell_style_test.go b/shell_style_test.go
index 1ee097e..93bccbd 100644
--- a/shell_style_test.go
+++ b/shell_style_test.go
@@ -161,7 +161,7 @@ func TestShellStyleBuildTime(t *testing.T) {
 	// automaton building or very slow (~2K/second) matching.  The current version settles for the
 	// latter. With a thousand patterns the automaton building is instant and the matching runs at
 	// ~16K/second.  I retain optimism that there is a path forward to win back the fast performance.
-	words := readWWords(t)[:1000]
+	words := readWWords(t, 1000)
 
 	fmt.Printf("WC %d\n", len(words))
 	starWords := make([]string, 0, len(words))
diff --git a/small_table_test.go b/small_table_test.go
index 91de286..2bd2b70 100644
--- a/small_table_test.go
+++ b/small_table_test.go
@@ -7,7 +7,7 @@ import (
 )
 
 func TestFAMergePerf(t *testing.T) {
-	words := readWWords(t)
+	words := readWWords(t, 0)
 	patterns := make([]string, 0, len(words))
 	for _, word := range words {
 		pattern := fmt.Sprintf(`{"x": [ "%s" ] }`, string(word))
diff --git a/v2_bench_test.go b/v2_bench_test.go
index f88b014..930b578 100644
--- a/v2_bench_test.go
+++ b/v2_bench_test.go
@@ -74,7 +74,7 @@ func Benchmark8259Example(b *testing.B) {
 }
 
 func BenchmarkShellStyleBuildTime(b *testing.B) {
-	words := readWWords(b)[:1000]
+	words := readWWords(b, 1000)
 
 	source := rand.NewSource(293591)
 	starWords := make([]string, 0, len(words))