Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,14 @@ func n2dNode(rawNStates []*faState, sList *stateLists) *faState {
nUnpacked[i] = unpackTable(nState.table)
}

// for each byte value
// Unpack the DFA table once, set all byte transitions, then pack once —
// the old code called addByteStep per byte which unpacked and repacked
// for each of up to 256 values. rawStates is allocated once and reset
// with [:0] each iteration to avoid per-byte-value slice allocation.
dfaUnpacked := unpackTable(dfaState.table)
rawStates := make([]*faState, 0, len(ingredients))
for utf8byte := 0; utf8byte < byteCeiling; utf8byte++ {
var rawStates []*faState
rawStates = rawStates[:0]

// for each of the unique states
for _, unpackedNState := range nUnpacked {
Expand All @@ -208,9 +213,10 @@ func n2dNode(rawNStates []*faState, sList *stateLists) *faState {
// if there were any transitions on this byte value
if len(rawStates) > 0 {
// recurse, get the DFA state for the transitions and plug it into this state
dfaState.table.addByteStep(byte(utf8byte), n2dNode(rawStates, sList))
dfaUnpacked[utf8byte] = n2dNode(rawStates, sList)
}
}
dfaState.table.pack(dfaUnpacked)

// load up transitions (build-time, allocation is fine)
seen := make(map[*fieldMatcher]bool)
Expand Down
26 changes: 26 additions & 0 deletions nfa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,32 @@ func TestNfa2Dfa(t *testing.T) {
shoulds: []string{"abc", "abcfoo"},
nopes: []string{"xabc", "abxbar"},
},
// multi-star patterns exercise intern() dedup more heavily
{
pattern: "*foo*",
shoulds: []string{"foo", "xfoo", "foox", "xfoox", "foofoofoo"},
nopes: []string{"bar", "fo", "ffo"},
},
{
pattern: "*foo*bar*",
shoulds: []string{"foobar", "xfooybar", "foobarbaz", "xxfooxxbarxx"},
nopes: []string{"barfoo", "foo", "bar", "fobar"},
},
{
pattern: "*a*b*c*",
shoulds: []string{"abc", "xaxbxcx", "abc123", "123abc", "aabbcc"},
nopes: []string{"ab", "ac", "bc", "cba"},
},
{
pattern: "*a*b*c*d*e*",
shoulds: []string{"abcde", "xaxbxcxdxex", "aabbccddee"},
nopes: []string{"abcd", "edcba", "abce"},
},
{
pattern: "*a*b*c*d*e*f*g*h*",
shoulds: []string{"abcdefgh", "xaxbxcxdxexfxgxhx"},
nopes: []string{"abcdefg", "hgfedcba"},
},
}
pp := newPrettyPrinter(4567)
transitions := []*fieldMatcher{}
Expand Down
76 changes: 46 additions & 30 deletions state_lists.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@ package quamina

import (
"cmp"
"encoding/binary"
"slices"
"unsafe"
)

// internEntry bundles the list and DFA state into one map value so that
// cache hits require a single map lookup instead of two.
type internEntry struct {
states []*faState
dfaState *faState
}

// The idea is that in we are going to be computing the epsilon closures of NFA states, which
// will be slices of states. There will be duplicate slices and we want to deduplicate. There's
// probably a more idiomatic and efficient way to do this.
type stateLists struct {
lists map[string][]*faState
dfaStates map[string]*faState
entries map[string]internEntry
// Scratch space reused across intern() calls
sortBuf []*faState // reusable sorted buffer
keyBuf []byte // reusable key bytes buffer
}

func newStateLists() *stateLists {
return &stateLists{
lists: make(map[string][]*faState),
dfaStates: make(map[string]*faState),
entries: make(map[string]internEntry),
}
}

Expand All @@ -27,40 +36,47 @@ func newStateLists() *stateLists {
// which either has already been computed for the set or is created and empty, and
// a boolean indicating whether the DFA state has already been computed or not.
func (sl *stateLists) intern(list []*faState) ([]*faState, *faState, bool) {
// dedupe the collection
uniquemap := make(map[*faState]bool)
// Dedupe using the global generation counter and faState.closureSetGen
// instead of allocating a map per call. Safe to reuse closureSetGen
// because nfa2Dfa runs after epsilon closure computation is complete.
closureGeneration++
gen := closureGeneration
sl.sortBuf = sl.sortBuf[:0]
for _, state := range list {
uniquemap[state] = true
}
uniques := make([]*faState, 0, len(uniquemap))
for unique := range uniquemap {
uniques = append(uniques, unique)
if state.closureSetGen != gen {
state.closureSetGen = gen
sl.sortBuf = append(sl.sortBuf, state)
}
}

// compute a key representing the set. Disclosure: My first use of an AI to help
// code. I had done this by Sprintf("%p")-ing the addresses and sorting/concatenating
// the strings. Which works fine but grabbing the raw bytes and pretending they're
// a string is going to produce keys that are exactly half the size
keyBytes := make([]byte, 0, len(uniques)*8)
slices.SortFunc(uniques, func(a, b *faState) int {
// compute a key representing the set
slices.SortFunc(sl.sortBuf, func(a, b *faState) int {
return cmp.Compare(uintptr(unsafe.Pointer(a)), uintptr(unsafe.Pointer(b)))
})

for _, state := range uniques {
addr := uintptr(unsafe.Pointer(state))
for i := 0; i < 8; i++ {
keyBytes = append(keyBytes, byte(addr>>(i*8)))
}
// Pre-size the key buffer and write pointers with PutUint64 instead of
// appending byte-by-byte, avoiding 8 append calls and bounds checks per state.
needed := len(sl.sortBuf) * 8
if cap(sl.keyBuf) < needed {
sl.keyBuf = make([]byte, needed)
} else {
sl.keyBuf = sl.keyBuf[:needed]
}
for i, state := range sl.sortBuf {
binary.LittleEndian.PutUint64(sl.keyBuf[i*8:], uint64(uintptr(unsafe.Pointer(state))))
}
key := string(keyBytes)

// either we have already seen this or not
list, exists := sl.lists[key]
if exists {
return list, sl.dfaStates[key], true
// string(sl.keyBuf) in a map lookup is optimized by the compiler to avoid allocation
if entry, exists := sl.entries[string(sl.keyBuf)]; exists {
return entry.states, entry.dfaState, true
}

// cache miss: allocate owned copies for the map
key := string(sl.keyBuf)
stored := make([]*faState, len(sl.sortBuf))
copy(stored, sl.sortBuf)

dfaState := &faState{table: newSmallTable()}
sl.lists[key] = uniques
sl.dfaStates[key] = dfaState
return uniques, dfaState, false
sl.entries[key] = internEntry{states: stored, dfaState: dfaState}
return stored, dfaState, false
}
37 changes: 37 additions & 0 deletions state_lists_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//go:build go1.24

package quamina

import (
"fmt"
"testing"
)

// BenchmarkNfa2Dfa measures the cost of nfa2Dfa conversion, where intern()
// in state_lists.go typically dominates. Patterns with more wildcards produce
// larger epsilon closures and more intern() calls.
func BenchmarkNfa2Dfa(b *testing.B) {
patterns := []struct {
name string
pattern string
}{
{"single_star", "*foo*"},
{"two_stars", "*foo*bar*"},
{"three_stars", "*a*b*c*"},
{"five_stars", "*a*b*c*d*e*"},
{"eight_stars", "*a*b*c*d*e*f*g*h*"},
}

pp := newPrettyPrinter(12345)
for _, tc := range patterns {
b.Run(tc.name, func(b *testing.B) {
nfa, _ := makeShellStyleFA([]byte(fmt.Sprintf(`"%s"`, tc.pattern)), pp)
epsilonClosure(nfa)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
nfa2Dfa(nfa)
}
})
}
}
Loading