diff --git a/regex_nfa_dfa_bench_test.go b/regex_nfa_dfa_bench_test.go
new file mode 100644
index 0000000..7b5b053
--- /dev/null
+++ b/regex_nfa_dfa_bench_test.go
@@ -0,0 +1,491 @@
+package quamina
+
+import (
+	"fmt"
+	"math/rand"
+	"strings"
+	"testing"
+)
+
+// BenchmarkShellstyleSimpleWildcard exercises patterns like "a*b" where the
+// full DFA is tiny — just a handful of states. An eager nfa2dfa conversion
+// would trivially handle these and produce the fastest possible matcher, but
+// Quamina currently falls back to NFA traversal for shellstyle patterns.
+// This benchmark exists to show that simple wildcards deserve DFA treatment,
+// whether eager or lazy.
+func BenchmarkShellstyleSimpleWildcard(b *testing.B) {
+	// Simple prefix*suffix patterns — the DFA for each is ~3 states.
+	simplePatterns := []struct {
+		name       string
+		shellstyle string
+	}{
+		{"a*b", "a*b"},
+		{"foo*bar", "foo*bar"},
+		{"x*y*z", "x*y*z"},
+		{"he*lo", "he*lo"},
+	}
+
+	for _, sp := range simplePatterns {
+		b.Run(sp.name, func(b *testing.B) {
+			q, _ := New()
+			pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, sp.shellstyle)
+			if err := q.AddPattern(sp.name, pattern); err != nil {
+				b.Fatal(err)
+			}
+
+			// Build events that match — filler is lowercase ASCII.
+			rng := rand.New(rand.NewSource(42))
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				var buf strings.Builder
+				// For "a*b": produce "a<random chars>b"
+				// For "x*y*z": produce "x<random>y<random>z"
+				parts := strings.Split(sp.shellstyle, "*")
+				for j, part := range parts {
+					buf.WriteString(part)
+					if j < len(parts)-1 {
+						// random filler between fixed parts
+						for k := 0; k < 3+rng.Intn(15); k++ {
+							buf.WriteByte(byte('a' + rng.Intn(26)))
+						}
+					}
+				}
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			// Verify matches.
+			for i, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match for %s", i, event)
+				}
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match", i%poolSize)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkShellstyleNarrowInput creates shellstyle patterns whose wildcards can
+// match almost any Unicode codepoint, then benchmarks against input drawn from
+// a tiny slice of the alphabet. The eager DFA must construct states covering
+// the full Unicode byte space implied by "*". A demand-driven approach would
+// only need to materialize states for the bytes actually encountered, making
+// its effective state space proportional to the input alphabet rather than
+// the pattern alphabet.
+func BenchmarkShellstyleNarrowInput(b *testing.B) {
+	// Anchors are drawn from diverse Unicode blocks so the NFA's wildcard
+	// transitions must accommodate the full UTF-8 encoding range. But the
+	// text *between* the anchors in the input events only uses a narrow set.
+	type anchorSet struct {
+		name    string
+		anchors []string // characters that appear in patterns as fixed points around "*"
+	}
+
+	anchorSets := []anchorSet{
+		{
+			name:    "ascii_anchors",
+			anchors: []string{"X", "Y", "Z", "W", "Q"},
+		},
+		{
+			name:    "cjk_anchors",
+			anchors: []string{"東", "京", "北", "海", "山"},
+		},
+		{
+			name:    "mixed_script_anchors",
+			anchors: []string{"A", "Ω", "东", "🎯", "Й"},
+		},
+	}
+
+	// The narrow input alphabets — the characters that fill in between anchors.
+	type inputAlphabet struct {
+		name  string
+		chars []rune
+	}
+
+	inputAlphabets := []inputAlphabet{
+		{
+			name:  "digits_only",
+			chars: []rune("0123456789"),
+		},
+		{
+			name:  "lowercase_ascii",
+			chars: []rune("abcdefghijklmnopqrstuvwxyz"),
+		},
+		{
+			name:  "narrow_cjk",
+			chars: []rune("一二三四五六七八九十"),
+		},
+	}
+
+	for _, anchors := range anchorSets {
+		for _, alphabet := range inputAlphabets {
+			for _, patternCount := range []int{8, 32, 128} {
+				name := fmt.Sprintf("anchors=%s/input=%s/patterns=%d",
+					anchors.name, alphabet.name, patternCount)
+
+				b.Run(name, func(b *testing.B) {
+					q, _ := New()
+
+					// Build patterns like: *<anchor1>*<anchor2>*
+					// Each wildcard can match any Unicode, but input will
+					// only contain chars from the narrow alphabet.
+					type anchorPair struct{ a1, a2 string }
+					rng := rand.New(rand.NewSource(99))
+					pairs := make([]anchorPair, 0, patternCount)
+					for i := 0; i < patternCount; i++ {
+						a1 := anchors.anchors[rng.Intn(len(anchors.anchors))]
+						a2 := anchors.anchors[rng.Intn(len(anchors.anchors))]
+						pairs = append(pairs, anchorPair{a1, a2})
+						shellstyle := fmt.Sprintf("*%s*%s*", a1, a2)
+						pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+						if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+							b.Fatal(err)
+						}
+					}
+
+					// Build events whose values contain the anchor characters
+					// (so they match) surrounded by padding drawn exclusively
+					// from the narrow alphabet.
+					const poolSize = 32
+					events := make([][]byte, poolSize)
+					for i := range events {
+						var buf strings.Builder
+						// random narrow padding
+						for j := 0; j < 5+rng.Intn(10); j++ {
+							buf.WriteRune(alphabet.chars[rng.Intn(len(alphabet.chars))])
+						}
+						// insert two anchors from an actual pattern so the event is guaranteed to match
+						pair := pairs[rng.Intn(len(pairs))]
+						buf.WriteString(pair.a1)
+						for j := 0; j < 5+rng.Intn(10); j++ {
+							buf.WriteRune(alphabet.chars[rng.Intn(len(alphabet.chars))])
+						}
+						buf.WriteString(pair.a2)
+						for j := 0; j < 5+rng.Intn(10); j++ {
+							buf.WriteRune(alphabet.chars[rng.Intn(len(alphabet.chars))])
+						}
+						events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+					}
+
+					// Sanity check: at least some events should match.
+					matchCount := 0
+					for _, event := range events {
+						matches, err := q.MatchesForEvent(event)
+						if err != nil {
+							b.Fatal(err)
+						}
+						matchCount += len(matches)
+					}
+					if matchCount == 0 {
+						b.Fatal("no matches at all — check pattern/event construction")
+					}
+
+					b.ReportAllocs()
+					b.ResetTimer()
+					for i := 0; i < b.N; i++ {
+						matches, err := q.MatchesForEvent(events[i%poolSize])
+						if err != nil {
+							b.Fatal(err)
+						}
+						if len(matches) == 0 {
+							b.Fatalf("expected matches for event %d", i%poolSize)
+						}
+					}
+				})
+			}
+		}
+	}
+}
+
+// BenchmarkShellstyleWidePatternsScaling focuses specifically on the scaling
+// behavior as pattern count grows, with maximally broad patterns (every "*"
+// accepts all of Unicode) but input restricted to ASCII digits. This isolates
+// a demand-driven DFA's advantage: the cache only needs entries for ~10 distinct byte
+// values regardless of how many Unicode codepoints the pattern theoretically
+// permits.
+func BenchmarkShellstyleWidePatternsScaling(b *testing.B) {
+	digits := []rune("0123456789")
+
+	// Use anchors from multiple scripts to force the NFA to have transitions
+	// spanning the full UTF-8 byte range.
+	allAnchors := []string{
+		"A", "B", "C", "D", "E", // Latin
+		"Α", "Β", "Γ", "Δ", "Ε", // Greek
+		"東", "京", "北", "上", "大", // CJK
+		"🎯", "🚀", "🌟", "❤", "🎉", // Emoji
+		"Д", "Ж", "З", "И", "К", // Cyrillic
+	}
+
+	for _, patternCount := range []int{8, 16, 32, 64, 128, 256, 512} {
+		b.Run(fmt.Sprintf("patterns=%d", patternCount), func(b *testing.B) {
+			q, _ := New()
+			rng := rand.New(rand.NewSource(77))
+
+			type anchorPair struct{ a1, a2 string }
+			pairs := make([]anchorPair, 0, patternCount)
+			for i := 0; i < patternCount; i++ {
+				a1 := allAnchors[rng.Intn(len(allAnchors))]
+				a2 := allAnchors[rng.Intn(len(allAnchors))]
+				pairs = append(pairs, anchorPair{a1, a2})
+				shellstyle := fmt.Sprintf("*%s*%s*", a1, a2)
+				pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+				if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+					b.Fatal(err)
+				}
+			}
+
+			// Events use only ASCII digits as filler — the narrowest possible
+			// byte alphabet (10 distinct values, all single-byte).
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				var buf strings.Builder
+				// digit padding
+				for j := 0; j < 3+rng.Intn(5); j++ {
+					buf.WriteRune(digits[rng.Intn(len(digits))])
+				}
+				// two anchors from an actual pattern embedded in digit soup
+				pair := pairs[rng.Intn(len(pairs))]
+				buf.WriteString(pair.a1)
+				for j := 0; j < 3+rng.Intn(5); j++ {
+					buf.WriteRune(digits[rng.Intn(len(digits))])
+				}
+				buf.WriteString(pair.a2)
+				for j := 0; j < 3+rng.Intn(5); j++ {
+					buf.WriteRune(digits[rng.Intn(len(digits))])
+				}
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			matchCount := 0
+			for _, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				matchCount += len(matches)
+			}
+			if matchCount == 0 {
+				b.Fatal("no matches — check construction")
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("expected matches for event %d", i%poolSize)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkShellstyleSimpleWildcardScaling adds multiple simple patterns to
+// show that even a modest collection of small-DFA patterns benefits from DFA
+// conversion. Each pattern is independent (different prefix/suffix), so the
+// merged DFA stays small.
+func BenchmarkShellstyleSimpleWildcardScaling(b *testing.B) {
+	prefixes := "abcdefghijklmnopqrstuvwxyz"
+	suffixes := "zyxwvutsrqponmlkjihgfedcba"
+
+	for _, patternCount := range []int{1, 4, 8, 16, 26} {
+		b.Run(fmt.Sprintf("patterns=%d", patternCount), func(b *testing.B) {
+			q, _ := New()
+
+			for i := 0; i < patternCount; i++ {
+				shellstyle := fmt.Sprintf("%c*%c", prefixes[i], suffixes[i])
+				pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+				if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+					b.Fatal(err)
+				}
+			}
+
+			// Build events that match — each targets a random pattern.
+			rng := rand.New(rand.NewSource(42))
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				idx := rng.Intn(patternCount)
+				var buf strings.Builder
+				buf.WriteByte(prefixes[idx])
+				for j := 0; j < 5+rng.Intn(20); j++ {
+					buf.WriteByte(byte('a' + rng.Intn(26)))
+				}
+				buf.WriteByte(suffixes[idx])
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			// Verify at least some match.
+			matchCount := 0
+			for _, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				matchCount += len(matches)
+			}
+			if matchCount == 0 {
+				b.Fatal("no matches at all")
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match", i%poolSize)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkShellstyleZWJEmoji exercises NFA traversal on input containing
+// ZWJ (Zero Width Joiner) emoji sequences mixed with Japanese text. This is
+// a demanding case for byte-level automaton traversal because:
+//
+//  1. ZWJ emoji sequences encode a single visible glyph as many codepoints
+//     joined by U+200D (ZWJ), producing 15-25+ bytes per "character".
+//  2. The ZWJ byte sequence (0xE2 0x80 0x8D) shares its leading byte 0xE2
+//     with hundreds of other BMP codepoints (U+2000-U+2FFF), so the NFA
+//     cannot tell if 0xE2 begins a ZWJ or some unrelated character without
+//     reading the second and third bytes.
+//  3. Variation selectors (U+FE0F = 0xEF 0xB8 0x8F) add further multi-byte
+//     sequences that interleave with the emoji and Japanese text.
+//  4. The input mixes several dense leading-byte ranges (0xE2 for ZWJ,
+//     0xE3 for hiragana/katakana, 0xE4+ for CJK, 0xEF for variation
+//     selectors), so the wildcard's self-loop must track many active
+//     multi-byte paths simultaneously.
+//
+// The wildcard's self-loop faces heavy branching because 0xE2 alone is
+// the leading byte for hundreds of BMP codepoints (U+2000-U+2FFF),
+// and 0xEF covers another dense range including variation selectors.
+func BenchmarkShellstyleZWJEmoji(b *testing.B) {
+	// ZWJ emoji sequences — each is a single glyph but many bytes.
+	zwjEmoji := []string{
+		"👨\u200D👩\u200D👧\u200D👦", // family
+		"👩\u200D🚀",               // woman astronaut
+		"🏳\uFE0F\u200D🌈",         // rainbow flag
+		"👨\u200D💻",               // man technologist
+		"🧑\u200D🎤",               // singer
+		"👩\u200D🔬",               // woman scientist
+		"🐻\u200D❄\uFE0F",         // polar bear
+		"👁\uFE0F\u200D🗨\uFE0F",   // eye in speech bubble
+	}
+
+	// Japanese text using leading UTF-8 bytes near the ZWJ range.
+	// Hiragana (U+3040-309F): 0xE3 0x81 0x80 - 0xE3 0x82 0x9F
+	// Katakana (U+30A0-30FF): 0xE3 0x82 0xA0 - 0xE3 0x83 0xBF
+	// CJK (U+4E00+):          0xE4-0xE9 ...
+	// The ZWJ byte sequence (0xE2 0x80 0x8D) shares its leading byte
+	// 0xE2 with hundreds of other BMP codepoints (U+2000-U+2FFF), so
+	// the NFA cannot distinguish a ZWJ from other 0xE2-prefixed characters
+	// without reading the second and third bytes. Combined with the Japanese
+	// filler (0xE3, 0xE4+) and variation selectors (0xEF), the wildcard's
+	// self-loop must handle dense multi-byte traffic across several leading
+	// byte ranges.
+	japaneseFiller := []string{
+		"東京都渋谷区",
+		"新宿駅前通り",
+		"こんにちは",
+		"カタカナテスト",
+		"令和七年",
+		"人工知能研究所",
+		"品川駅南口",
+		"秋葉原電気街",
+	}
+
+	// Patterns use ZWJ emoji as anchors with wildcards between them.
+	// The "*" must handle both Japanese multi-byte text and ZWJ byte
+	// sequences, forcing the NFA to branch heavily on shared leading bytes.
+	type benchCase struct {
+		name         string
+		patternCount int
+	}
+
+	cases := []benchCase{
+		{"patterns=4", 4},
+		{"patterns=8", 8},
+		{"patterns=16", 16},
+		{"patterns=32", 32},
+		{"patterns=64", 64},
+	}
+
+	for _, bc := range cases {
+		b.Run(bc.name, func(b *testing.B) {
+			q, _ := New()
+			rng := rand.New(rand.NewSource(2025))
+
+			for i := 0; i < bc.patternCount; i++ {
+				e1 := zwjEmoji[rng.Intn(len(zwjEmoji))]
+				e2 := zwjEmoji[rng.Intn(len(zwjEmoji))]
+				shellstyle := fmt.Sprintf("*%s*%s*", e1, e2)
+				pattern := fmt.Sprintf(`{"val": [{"shellstyle": %q}]}`, shellstyle)
+				if err := q.AddPattern(fmt.Sprintf("p%d", i), pattern); err != nil {
+					b.Fatal(err)
+				}
+			}
+
+			// Events: Japanese filler interspersed with ZWJ emoji anchors.
+			// The NFA sees a stream of 0xE2, 0xE3, 0xE4, 0xEF bytes and
+			// must disambiguate at every step.
+			const poolSize = 64
+			events := make([][]byte, poolSize)
+			for i := range events {
+				var buf strings.Builder
+				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
+				buf.WriteString(zwjEmoji[rng.Intn(len(zwjEmoji))])
+				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
+				buf.WriteString(zwjEmoji[rng.Intn(len(zwjEmoji))])
+				buf.WriteString(japaneseFiller[rng.Intn(len(japaneseFiller))])
+				events[i] = []byte(fmt.Sprintf(`{"val": %q}`, buf.String()))
+			}
+
+			matchCount := 0
+			for _, event := range events {
+				matches, err := q.MatchesForEvent(event)
+				if err != nil {
+					b.Fatal(err)
+				}
+				matchCount += len(matches)
+			}
+			if matchCount == 0 {
+				b.Fatal("no matches — check pattern/event construction")
+			}
+
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				matches, err := q.MatchesForEvent(events[i%poolSize])
+				if err != nil {
+					b.Fatal(err)
+				}
+				if len(matches) == 0 {
+					b.Fatalf("event %d: no match", i%poolSize)
+				}
+			}
+		})
+	}
+}