timbray · timbray · Feb 19, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/anything_but_test.go b/anything_but_test.go
@@ -165,7 +165,7 @@ func TestAnythingButMatching(t *testing.T) {
 	if err != nil {
 		t.Error("AP: " + err.Error())
 	}
-	words := readWWords(t)
+	words := readWWords(t, 0)
 	template := `{"a": "XX"}`
 	problemTemplate := `{"a": XX}`
 	for _, word := range problemWords {

diff --git a/benchmarks_test.go b/benchmarks_test.go
@@ -190,7 +190,7 @@ func TestBigShellStyle(t *testing.T) {
 // ~220K smallTables.  Tried https://blog.twitch.tv/en/2019/04/10/go-memory-ballast-how-i-learnt-to-stop-worrying-and-love-the-heap/
 // but it doesn't seem to help.
 func TestPatternAddition(t *testing.T) {
-	w := worder{0, readWWords(t)}
+	w := worder{0, readWWords(t, 0)}
 
 	var msBefore, msAfter runtime.MemStats
 
@@ -235,13 +235,15 @@ func (w *worder) next() []byte {
 	return w.lines[w.index]
 }
 
-func readWWords(t *testing.T) [][]byte {
-	t.Helper()
+// readWWords reads up to maxWords words from testdata/wwords.txt.
+// Pass 0 to read all words.
+func readWWords(tb testing.TB, maxWords int) [][]byte {
+	tb.Helper()
 
 	// that's a list from the Wordle source code with a few erased to get a prime number
 	file, err := os.Open("testdata/wwords.txt")
 	if err != nil {
-		t.Error("Can't open file: " + err.Error())
+		tb.Fatal("Can't open file: " + err.Error())
 	}
 	defer func(file *os.File) {
 		_ = file.Close()
@@ -250,11 +252,12 @@ func readWWords(t *testing.T) [][]byte {
 	buf := make([]byte, oneMeg)
 	scanner.Buffer(buf, oneMeg)
 
-	lineCount := 0
 	var lines [][]byte
 	for scanner.Scan() {
-		lineCount++
 		lines = append(lines, []byte(scanner.Text()))
+		if maxWords > 0 && len(lines) >= maxWords {
+			break
+		}
 	}
 	return lines
 }
diff --git a/regexp_nfa_test.go b/regexp_nfa_test.go
@@ -14,7 +14,7 @@ import (
 // skinny  RR: 3853.56/second with cache, 60.31 without, speedup 63.9
 //
 func TestRRCacheEffectiveness(t *testing.T) {
-	words := readWWords(t)[:2000]
+	words := readWWords(t, 2000)
 	re := "~p{L}+"
 	pp := sharedNullPrinter
 	var transitions []*fieldMatcher

diff --git a/shell_style_test.go b/shell_style_test.go
@@ -161,7 +161,7 @@ func TestShellStyleBuildTime(t *testing.T) {
 	// automaton building or very slow (~2K/second) matching.  The current version settles for the
 	// latter. With a thousand patterns the automaton building is instant and the matching runs at
 	// ~16K/second.  I retain optimism that there is a path forward to win back the fast performance.
-	words := readWWords(t)[:1000]
+	words := readWWords(t, 1000)
 
 	fmt.Printf("WC %d\n", len(words))
 	starWords := make([]string, 0, len(words))

diff --git a/small_table_test.go b/small_table_test.go
@@ -7,7 +7,7 @@ import (
 )
 
 func TestFAMergePerf(t *testing.T) {
-	words := readWWords(t)
+	words := readWWords(t, 0)
 	patterns := make([]string, 0, len(words))
 	for _, word := range words {
 		pattern := fmt.Sprintf(`{"x": [ "%s" ] }`, string(word))

diff --git a/v2_bench_test.go b/v2_bench_test.go
@@ -4,6 +4,7 @@ package quamina
 
 import (
 	"fmt"
+	"math/rand"
 	"testing"
 	"time"
 )
@@ -71,3 +72,64 @@ func Benchmark8259Example(b *testing.B) {
 	count := float64(b.N)
 	fmt.Printf("%.0f/sec\n", count/elapsed)
 }
+
+func BenchmarkShellStyleBuildTime(b *testing.B) {
+	words := readWWords(b, 1000)
+
+	source := rand.NewSource(293591)
+	starWords := make([]string, 0, len(words))
+	expandedWords := make([]string, 0, len(words))
+	patterns := make([]string, 0, len(words))
+	for _, word := range words {
+		//nolint:gosec
+		starAt := source.Int63() % 6
+		starWord := string(word[:starAt]) + "*" + string(word[starAt:])
+		expandedWord := string(word[:starAt]) + "ÉÉÉÉ" + string(word[starAt:])
+		starWords = append(starWords, starWord)
+		expandedWords = append(expandedWords, expandedWord)
+		pattern := fmt.Sprintf(`{"x": [ {"shellstyle": "%s" } ] }`, starWord)
+		patterns = append(patterns, pattern)
+	}
+
+	q, _ := New()
+	before := time.Now()
+	for i := range words {
+		err := q.AddPattern(starWords[i], patterns[i])
+		if err != nil {
+			b.Fatal("AddP: " + err.Error())
+		}
+	}
+	elapsed := time.Since(before).Seconds()
+	fmt.Printf("Patterns/sec: %.1f\n", float64(len(words))/elapsed)
+	fmt.Println(matcherStats(q.matcher.(*coreMatcher)))
+
+	// Build events: original words and expanded words
+	type event struct {
+		json []byte
+		word string
+	}
+	events := make([]event, 0, len(words)*2)
+	for i, word := range words {
+		events = append(events,
+			event{[]byte(fmt.Sprintf(`{"x": "%s"}`, word)), string(word)},
+			event{[]byte(fmt.Sprintf(`{"x": "%s"}`, expandedWords[i])), expandedWords[i]},
+		)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		for _, ev := range events {
+			matches, err := q.MatchesForEvent(ev.json)
+			if err != nil {
+				b.Fatal("M4E on " + ev.word)
+			}
+			if len(matches) == 0 {
+				b.Fatal("no matches for " + ev.word)
+			}
+		}
+	}
+	elapsed = float64(b.Elapsed().Seconds())
+	count := float64(b.N)
+	fmt.Printf("%.0f events/sec\n", count*float64(len(events))/elapsed)
+}