From 6057b949cb2b636fe8ae98976f4a2e26ec99f741 Mon Sep 17 00:00:00 2001
From: Robert Sayre <sayrer@gmail.com>
Date: Tue, 10 Feb 2026 09:15:31 -0800
Subject: [PATCH 1/3] Add a benchmark targeting NFA DFA tradeoffs.

---
 regex_nfa_dfa_bench_test.go | 484 ++++++++++++++++++++++++++++++++++++
 1 file changed, 484 insertions(+)
 create mode 100644 regex_nfa_dfa_bench_test.go
diff --git a/regex_nfa_dfa_bench_test.go b/regex_nfa_dfa_bench_test.go
new file mode 100644
index 0000000..a4bbbff
--- /dev/null
+++ b/regex_nfa_dfa_bench_test.go
@@ -0,0 +1,484 @@
+package quamina
+
+import (
+	"fmt"
+	"math/rand"
+	"strings"
+	"testing"
+)
+
+// BenchmarkShellstyleSimpleWildcard exercises patterns like "a*b" where the
+// full DFA is tiny — just a handful of states. An eager nfa2dfa conversion
+// would trivially handle these and produce the fastest possible matcher, but
+// Quamina currently falls back to NFA traversal for shellstyle patterns.
+// This benchmark exists to show that simple wildcards deserve DFA treatment,
+// whether eager or lazy.
+func BenchmarkShellstyleSimpleWildcard(b *testing.B) {
+	// Simple prefix*suffix patterns — the DFA for each is ~3 states.
+	simplePatterns := []struct {
+		name       string
+		shellstyle string
+	}{
+		{"a*b", "a*b"},
+		{"foo*bar", "foo*bar"},
+		{"x*y*z", "x*y*z"},
+		{"he*lo", "he*lo"},
+	}
+
+	for _, sp := range simplePatterns {
+		b.Run(sp.name, func(b *testing.B) {
+			q, _ := New()
+			pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, sp.shellstyle)
+			if err := q.AddPattern(sp.name, pattern); err != nil {
+				b.Fatal(err)
+			}
+
+			// Build events that match — filler is lowercase ASCII.
+			rng := rand.New(rand.NewSource(42))
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				var buf strings.Builder
+				// For "a*b": produce "a<random chars>b"
+				// For "x*y*z": produce "x<random>y<random>z"
+				parts := strings.Split(sp.shellstyle, "*")
+				for j, part := range parts {
+					buf.WriteString(part)
+					if j < len(parts)-1 {
+						// random filler between fixed parts
+						for k := 0; k < 3+rng.Intn(15); k++ {
+							buf.WriteByte(byte('a' + rng.Intn(26)))
+						}
+					}
+				}
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			// Verify matches.
+			for i, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match for %s", i, event)
+				}
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match", i%poolSize)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkShellstyleNarrowInput creates shellstyle patterns whose wildcards can
+// match almost any Unicode codepoint, then benchmarks against input drawn from
+// a tiny slice of the alphabet. The eager DFA must construct states covering
+// the full Unicode byte space implied by "*". A demand-driven approach would
+// only need to materialize states for the bytes actually encountered, making
+// its effective state space proportional to the input alphabet rather than
+// the pattern alphabet.
+func BenchmarkShellstyleNarrowInput(b *testing.B) {
+	// Anchors are drawn from diverse Unicode blocks so the NFA's wildcard
+	// transitions must accommodate the full UTF-8 encoding range. But the
+	// text *between* the anchors in the input events only uses a narrow set.
+	type anchorSet struct {
+		name    string
+		anchors []string // characters that appear in patterns as fixed points around "*"
+	}
+
+	anchorSets := []anchorSet{
+		{
+			name:    "ascii_anchors",
+			anchors: []string{"X", "Y", "Z", "W", "Q"},
+		},
+		{
+			name:    "cjk_anchors",
+			anchors: []string{"東", "京", "北", "海", "山"},
+		},
+		{
+			name:    "mixed_script_anchors",
+			anchors: []string{"A", "Ω", "东", "🎯", "Й"},
+		},
+	}
+
+	// The narrow input alphabets — the characters that fill in between anchors.
+	type inputAlphabet struct {
+		name  string
+		chars []rune
+	}
+
+	inputAlphabets := []inputAlphabet{
+		{
+			name:  "digits_only",
+			chars: []rune("0123456789"),
+		},
+		{
+			name:  "lowercase_ascii",
+			chars: []rune("abcdefghijklmnopqrstuvwxyz"),
+		},
+		{
+			name:  "narrow_cjk",
+			chars: []rune("一二三四五六七八九十"),
+		},
+	}
+
+	for _, anchors := range anchorSets {
+		for _, alphabet := range inputAlphabets {
+			for _, patternCount := range []int{8, 32, 128} {
+				name := fmt.Sprintf("anchors=%s/input=%s/patterns=%d",
+					anchors.name, alphabet.name, patternCount)
+
+				b.Run(name, func(b *testing.B) {
+					q, _ := New()
+
+					// Build patterns like: *<anchor1>*<anchor2>*
+					// Each wildcard can match any Unicode, but input will
+					// only contain chars from the narrow alphabet.
+					type anchorPair struct{ a1, a2 string }
+					rng := rand.New(rand.NewSource(99))
+					pairs := make([]anchorPair, 0, patternCount)
+					for i := 0; i < patternCount; i++ {
+						a1 := anchors.anchors[rng.Intn(len(anchors.anchors))]
+						a2 := anchors.anchors[rng.Intn(len(anchors.anchors))]
+						pairs = append(pairs, anchorPair{a1, a2})
+						shellstyle := fmt.Sprintf("*%s*%s*", a1, a2)
+						pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+						if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+							b.Fatal(err)
+						}
+					}
+
+					// Build events whose values contain the anchor characters
+					// (so they match) surrounded by padding drawn exclusively
+					// from the narrow alphabet.
+					const poolSize = 32
+					events := make([][]byte, poolSize)
+					for i := range events {
+						var buf strings.Builder
+						// random narrow padding
+						for j := 0; j < 5+rng.Intn(10); j++ {
+							buf.WriteRune(alphabet.chars[rng.Intn(len(alphabet.chars))])
+						}
+						// insert two anchors from an actual pattern so the event is guaranteed to match
+						pair := pairs[rng.Intn(len(pairs))]
+						buf.WriteString(pair.a1)
+						for j := 0; j < 5+rng.Intn(10); j++ {
+							buf.WriteRune(alphabet.chars[rng.Intn(len(alphabet.chars))])
+						}
+						buf.WriteString(pair.a2)
+						for j := 0; j < 5+rng.Intn(10); j++ {
+							buf.WriteRune(alphabet.chars[rng.Intn(len(alphabet.chars))])
+						}
+						events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+					}
+
+					// Sanity check: at least some events should match.
+					matchCount := 0
+					for _, event := range events {
+						matches, err := q.MatchesForEvent(event)
+						if err != nil {
+							b.Fatal(err)
+						}
+						matchCount += len(matches)
+					}
+					if matchCount == 0 {
+						b.Fatal("no matches at all — check pattern/event construction")
+					}
+
+					b.ReportAllocs()
+					b.ResetTimer()
+					for i := 0; i < b.N; i++ {
+						matches, err := q.MatchesForEvent(events[i%poolSize])
+						if err != nil {
+							b.Fatal(err)
+						}
+						if len(matches) == 0 {
+							b.Fatalf("expected matches for event %d", i%poolSize)
+						}
+					}
+				})
+			}
+		}
+	}
+}
+
+// BenchmarkShellstyleWidePatternsScaling focuses specifically on the scaling
+// behavior as pattern count grows, with maximally broad patterns (every "*"
+// accepts all of Unicode) but input restricted to ASCII digits. This isolates
+// a demand-driven DFA's advantage: the cache only needs entries for ~10 distinct byte
+// values regardless of how many Unicode codepoints the pattern theoretically
+// permits.
+func BenchmarkShellstyleWidePatternsScaling(b *testing.B) {
+	digits := []rune("0123456789")
+
+	// Use anchors from multiple scripts to force the NFA to have transitions
+	// spanning the full UTF-8 byte range.
+	allAnchors := []string{
+		"A", "B", "C", "D", "E", // Latin
+		"Α", "Β", "Γ", "Δ", "Ε", // Greek
+		"東", "京", "北", "上", "大", // CJK
+		"🎯", "🚀", "🌟", "❤", "🎉", // Emoji
+		"Д", "Ж", "З", "И", "К", // Cyrillic
+	}
+
+	for _, patternCount := range []int{8, 16, 32, 64, 128, 256, 512} {
+		b.Run(fmt.Sprintf("patterns=%d", patternCount), func(b *testing.B) {
+			q, _ := New()
+			rng := rand.New(rand.NewSource(77))
+
+			type anchorPair struct{ a1, a2 string }
+			pairs := make([]anchorPair, 0, patternCount)
+			for i := 0; i < patternCount; i++ {
+				a1 := allAnchors[rng.Intn(len(allAnchors))]
+				a2 := allAnchors[rng.Intn(len(allAnchors))]
+				pairs = append(pairs, anchorPair{a1, a2})
+				shellstyle := fmt.Sprintf("*%s*%s*", a1, a2)
+				pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+				if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+					b.Fatal(err)
+				}
+			}
+
+			// Events use only ASCII digits as filler — the narrowest possible
+			// byte alphabet (10 distinct values, all single-byte).
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				var buf strings.Builder
+				// digit padding
+				for j := 0; j < 3+rng.Intn(5); j++ {
+					buf.WriteRune(digits[rng.Intn(len(digits))])
+				}
+				// two anchors from an actual pattern embedded in digit soup
+				pair := pairs[rng.Intn(len(pairs))]
+				buf.WriteString(pair.a1)
+				for j := 0; j < 3+rng.Intn(5); j++ {
+					buf.WriteRune(digits[rng.Intn(len(digits))])
+				}
+				buf.WriteString(pair.a2)
+				for j := 0; j < 3+rng.Intn(5); j++ {
+					buf.WriteRune(digits[rng.Intn(len(digits))])
+				}
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			matchCount := 0
+			for _, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				matchCount += len(matches)
+			}
+			if matchCount == 0 {
+				b.Fatal("no matches — check construction")
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("expected matches for event %d", i%poolSize)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkShellstyleSimpleWildcardScaling adds multiple simple patterns to
+// show that even a modest collection of small-DFA patterns benefits from DFA
+// conversion. Each pattern is independent (different prefix/suffix), so the
+// merged DFA stays small.
+func BenchmarkShellstyleSimpleWildcardScaling(b *testing.B) {
+	prefixes := "abcdefghijklmnopqrstuvwxyz"
+	suffixes := "zyxwvutsrqponmlkjihgfedcba"
+
+	for _, patternCount := range []int{1, 4, 8, 16, 26} {
+		b.Run(fmt.Sprintf("patterns=%d", patternCount), func(b *testing.B) {
+			q, _ := New()
+
+			for i := 0; i < patternCount; i++ {
+				shellstyle := fmt.Sprintf("%c*%c", prefixes[i], suffixes[i])
+				pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+				if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+					b.Fatal(err)
+				}
+			}
+
+			// Build events that match — each targets a random pattern.
+			rng := rand.New(rand.NewSource(42))
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				idx := rng.Intn(patternCount)
+				var buf strings.Builder
+				buf.WriteByte(prefixes[idx])
+				for j := 0; j < 5+rng.Intn(20); j++ {
+					buf.WriteByte(byte('a' + rng.Intn(26)))
+				}
+				buf.WriteByte(suffixes[idx])
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			// Verify at least some match.
+			matchCount := 0
+			for _, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				matchCount += len(matches)
+			}
+			if matchCount == 0 {
+				b.Fatal("no matches at all")
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match", i%poolSize)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkShellstyleZWJEmoji exercises NFA traversal on input containing
+// ZWJ (Zero Width Joiner) emoji sequences mixed with Japanese text. This is
+// a worst case for byte-level automaton traversal because:
+//
+//  1. ZWJ emoji sequences encode a single visible glyph as many codepoints
+//     joined by U+200D (ZWJ), producing 15-25+ bytes per "character".
+//  2. The ZWJ byte sequence (0xE2 0x80 0x8D) shares its leading byte 0xE2
+//     with CJK characters, hiragana, katakana, and other BMP codepoints,
+//     creating massive byte-level ambiguity in the NFA.
+//  3. Variation selectors (U+FE0F = 0xEF 0xB8 0x8F) add further multi-byte
+//     sequences that interleave with the emoji and Japanese text.
+//
+// The NFA must branch at nearly every byte because 0xE2 and 0xEF are
+// shared prefixes across many unrelated codepoints in the input.
+func BenchmarkShellstyleZWJEmoji(b *testing.B) {
+	// ZWJ emoji sequences — each is a single glyph but many bytes.
+	zwjEmoji := []string{
+		"👨\u200D👩\u200D👧\u200D👦", // family
+		"👩\u200D🚀",               // woman astronaut
+		"🏳\uFE0F\u200D🌈",         // rainbow flag
+		"👨\u200D💻",               // man technologist
+		"🧑\u200D🎤",               // singer
+		"👩\u200D🔬",               // woman scientist
+		"🐻\u200D❄\uFE0F",         // polar bear
+		"👁\uFE0F\u200D🗨\uFE0F",   // eye in speech bubble
+	}
+
+	// Japanese text that shares leading UTF-8 bytes with ZWJ sequences.
+	// Hiragana (U+3040-309F): 0xE3 0x81 0x80 - 0xE3 0x82 0x9F
+	// Katakana (U+30A0-30FF): 0xE3 0x82 0xA0 - 0xE3 0x83 0xBF
+	// CJK (U+4E00+):          0xE4-0xE9 ...
+	// All start with 0xE3/0xE4+ which the NFA cannot distinguish from
+	// 0xE2 (ZWJ prefix) without reading the second byte.
+	japaneseFiller := []string{
+		"東京都渋谷区",
+		"新宿駅前通り",
+		"こんにちは",
+		"カタカナテスト",
+		"令和七年",
+		"人工知能研究所",
+		"品川駅南口",
+		"秋葉原電気街",
+	}
+
+	// Patterns use ZWJ emoji as anchors with wildcards between them.
+	// The "*" must handle both Japanese multi-byte text and ZWJ byte
+	// sequences, forcing the NFA to branch heavily on shared leading bytes.
+	type benchCase struct {
+		name         string
+		patternCount int
+	}
+
+	cases := []benchCase{
+		{"patterns=4", 4},
+		{"patterns=8", 8},
+		{"patterns=16", 16},
+		{"patterns=32", 32},
+		{"patterns=64", 64},
+	}
+
+	for _, bc := range cases {
+		b.Run(bc.name, func(b *testing.B) {
+			q, _ := New()
+			rng := rand.New(rand.NewSource(2025))
+
+			type emojiPair struct{ e1, e2 string }
+			ePairs := make([]emojiPair, 0, bc.patternCount)
+			for i := 0; i < bc.patternCount; i++ {
+				e1 := zwjEmoji[rng.Intn(len(zwjEmoji))]
+				e2 := zwjEmoji[rng.Intn(len(zwjEmoji))]
+				ePairs = append(ePairs, emojiPair{e1, e2})
+				shellstyle := fmt.Sprintf("*%s*%s*", e1, e2)
+				pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+				if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+					b.Fatal(err)
+				}
+			}
+
+			// Events: Japanese filler interspersed with ZWJ emoji anchors.
+			// The NFA sees a stream of 0xE2, 0xE3, 0xE4, 0xEF bytes and
+			// must disambiguate at every step.
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				pair := ePairs[rng.Intn(len(ePairs))]
+				var buf strings.Builder
+				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
+				buf.WriteString(pair.e1)
+				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
+				buf.WriteString(pair.e2)
+				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			matchCount := 0
+			for _, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				matchCount += len(matches)
+			}
+			if matchCount == 0 {
+				b.Fatal("no matches — check pattern/event construction")
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match", i%poolSize)
+				}
+			}
+		})
+	}
+}

From 5d0cde100004e8e8c059a2b14e321e8e88e29a9d Mon Sep 17 00:00:00 2001
From: Robert Sayre <sayrer@gmail.com>
Date: Tue, 10 Feb 2026 09:36:24 -0800
Subject: [PATCH 2/3] Fix comment.

---
 regex_nfa_dfa_bench_test.go | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/regex_nfa_dfa_bench_test.go b/regex_nfa_dfa_bench_test.go
index a4bbbff..ca39e4a 100644
--- a/regex_nfa_dfa_bench_test.go
+++ b/regex_nfa_dfa_bench_test.go
@@ -389,12 +389,17 @@ func BenchmarkShellstyleZWJEmoji(b *testing.B) {
 		"👁\uFE0F\u200D🗨\uFE0F",   // eye in speech bubble
 	}
 
-	// Japanese text that shares leading UTF-8 bytes with ZWJ sequences.
+	// Japanese text using leading UTF-8 bytes near the ZWJ range.
 	// Hiragana (U+3040-309F): 0xE3 0x81 0x80 - 0xE3 0x82 0x9F
 	// Katakana (U+30A0-30FF): 0xE3 0x82 0xA0 - 0xE3 0x83 0xBF
 	// CJK (U+4E00+):          0xE4-0xE9 ...
-	// All start with 0xE3/0xE4+ which the NFA cannot distinguish from
-	// 0xE2 (ZWJ prefix) without reading the second byte.
+	// The ZWJ byte sequence (0xE2 0x80 0x8D) shares its leading byte
+	// 0xE2 with hundreds of other BMP codepoints (U+2000-U+2FFF), so
+	// the NFA cannot distinguish a ZWJ from other 0xE2-prefixed characters
+	// without reading the second and third bytes. Combined with the Japanese
+	// filler (0xE3, 0xE4+) and variation selectors (0xEF), the wildcard's
+	// self-loop must handle dense multi-byte traffic across several leading
+	// byte ranges.
 	japaneseFiller := []string{
 		"東京都渋谷区",
 		"新宿駅前通り",
@@ -427,12 +432,9 @@ func BenchmarkShellstyleZWJEmoji(b *testing.B) {
 			q, _ := New()
 			rng := rand.New(rand.NewSource(2025))
 
-			type emojiPair struct{ e1, e2 string }
-			ePairs := make([]emojiPair, 0, bc.patternCount)
 			for i := 0; i < bc.patternCount; i++ {
 				e1 := zwjEmoji[rng.Intn(len(zwjEmoji))]
 				e2 := zwjEmoji[rng.Intn(len(zwjEmoji))]
-				ePairs = append(ePairs, emojiPair{e1, e2})
 				shellstyle := fmt.Sprintf("*%s*%s*", e1, e2)
 				pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
 				if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
@@ -446,12 +448,11 @@ func BenchmarkShellstyleZWJEmoji(b *testing.B) {
 			const poolSize = 64
 			events := make([][]byte, poolSize)
 			for i := range events {
-				pair := ePairs[rng.Intn(len(ePairs))]
 				var buf strings.Builder
 				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
-				buf.WriteString(pair.e1)
+				buf.WriteString(zwjEmoji[rng.Intn(len(zwjEmoji))])
 				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
-				buf.WriteString(pair.e2)
+				buf.WriteString(zwjEmoji[rng.Intn(len(zwjEmoji))])
 				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
 				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
 			}

From 9f147a394092f65dc09b538e24c9425d1f611eac Mon Sep 17 00:00:00 2001
From: Robert Sayre <sayrer@gmail.com>
Date: Tue, 10 Feb 2026 09:39:46 -0800
Subject: [PATCH 3/3] Fix comment.

---
 regex_nfa_dfa_bench_test.go | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/regex_nfa_dfa_bench_test.go b/regex_nfa_dfa_bench_test.go
index ca39e4a..7b5b053 100644
--- a/regex_nfa_dfa_bench_test.go
+++ b/regex_nfa_dfa_bench_test.go
@@ -364,18 +364,24 @@ func BenchmarkShellstyleSimpleWildcardScaling(b *testing.B) {
 
 // BenchmarkShellstyleZWJEmoji exercises NFA traversal on input containing
 // ZWJ (Zero Width Joiner) emoji sequences mixed with Japanese text. This is
-// a worst case for byte-level automaton traversal because:
+// a demanding case for byte-level automaton traversal because:
 //
 //  1. ZWJ emoji sequences encode a single visible glyph as many codepoints
 //     joined by U+200D (ZWJ), producing 15-25+ bytes per "character".
 //  2. The ZWJ byte sequence (0xE2 0x80 0x8D) shares its leading byte 0xE2
-//     with CJK characters, hiragana, katakana, and other BMP codepoints,
-//     creating massive byte-level ambiguity in the NFA.
+//     with hundreds of other BMP codepoints (U+2000-U+2FFF), so the NFA
+//     cannot tell if 0xE2 begins a ZWJ or some unrelated character without
+//     reading the second and third bytes.
 //  3. Variation selectors (U+FE0F = 0xEF 0xB8 0x8F) add further multi-byte
 //     sequences that interleave with the emoji and Japanese text.
+//  4. The input mixes several dense leading-byte ranges (0xE2 for ZWJ,
+//     0xE3 for hiragana/katakana, 0xE4+ for CJK, 0xEF for variation
+//     selectors), so the wildcard's self-loop must track many active
+//     multi-byte paths simultaneously.
 //
-// The NFA must branch at nearly every byte because 0xE2 and 0xEF are
-// shared prefixes across many unrelated codepoints in the input.
+// The wildcard's self-loop faces heavy branching because 0xE2 alone is
+// the leading byte for hundreds of BMP codepoints (U+2000-U+2FFF),
+// and 0xEF covers another dense range including variation selectors.
 func BenchmarkShellstyleZWJEmoji(b *testing.B) {
 	// ZWJ emoji sequences — each is a single glyph but many bytes.
 	zwjEmoji := []string{