diff --git a/anything_but_test.go b/anything_but_test.go index 180228a..9754732 100644 --- a/anything_but_test.go +++ b/anything_but_test.go @@ -165,7 +165,7 @@ func TestAnythingButMatching(t *testing.T) { if err != nil { t.Error("AP: " + err.Error()) } - words := readWWords(t) + words := readWWords(t, 0) template := `{"a": "XX"}` problemTemplate := `{"a": XX}` for _, word := range problemWords { diff --git a/benchmarks_test.go b/benchmarks_test.go index 2f006f1..82a37cd 100644 --- a/benchmarks_test.go +++ b/benchmarks_test.go @@ -190,7 +190,7 @@ func TestBigShellStyle(t *testing.T) { // ~220K smallTables. Tried https://blog.twitch.tv/en/2019/04/10/go-memory-ballast-how-i-learnt-to-stop-worrying-and-love-the-heap/ // but it doesn't seem to help. func TestPatternAddition(t *testing.T) { - w := worder{0, readWWords(t)} + w := worder{0, readWWords(t, 0)} var msBefore, msAfter runtime.MemStats @@ -235,13 +235,15 @@ func (w *worder) next() []byte { return w.lines[w.index] } -func readWWords(t *testing.T) [][]byte { - t.Helper() +// readWWords reads up to maxWords words from testdata/wwords.txt. +// Pass 0 to read all words. +func readWWords(tb testing.TB, maxWords int) [][]byte { + tb.Helper() // that's a list from the Wordle source code with a few erased to get a prime number file, err := os.Open("testdata/wwords.txt") if err != nil { - t.Error("Can't open file: " + err.Error()) + tb.Fatal("Can't open file: " + err.Error()) } defer func(file *os.File) { _ = file.Close() @@ -250,11 +252,12 @@ func readWWords(t *testing.T) [][]byte { buf := make([]byte, oneMeg) scanner.Buffer(buf, oneMeg) - lineCount := 0 var lines [][]byte for scanner.Scan() { - lineCount++ lines = append(lines, []byte(scanner.Text())) + if maxWords > 0 && len(lines) >= maxWords { + break + } } return lines } diff --git a/regexp_nfa_test.go b/regexp_nfa_test.go index dcb13ed..c8ae821 100644 --- a/regexp_nfa_test.go +++ b/regexp_nfa_test.go @@ -14,7 +14,7 @@ import ( // skinny RR: 3853.56/second with cache, 60.31 without, speedup 63.9 // func TestRRCacheEffectiveness(t *testing.T) { - words := readWWords(t)[:2000] + words := readWWords(t, 2000) re := "~p{L}+" pp := sharedNullPrinter var transitions []*fieldMatcher diff --git a/shell_style_test.go b/shell_style_test.go index 1ee097e..93bccbd 100644 --- a/shell_style_test.go +++ b/shell_style_test.go @@ -161,7 +161,7 @@ func TestShellStyleBuildTime(t *testing.T) { // automaton building or very slow (~2K/second) matching. The current version settles for the // latter. With a thousand patterns the automaton building is instant and the matching runs at // ~16K/second. I retain optimism that there is a path forward to win back the fast performance. - words := readWWords(t)[:1000] + words := readWWords(t, 1000) fmt.Printf("WC %d\n", len(words)) starWords := make([]string, 0, len(words)) diff --git a/small_table_test.go b/small_table_test.go index 91de286..2bd2b70 100644 --- a/small_table_test.go +++ b/small_table_test.go @@ -7,7 +7,7 @@ import ( ) func TestFAMergePerf(t *testing.T) { - words := readWWords(t) + words := readWWords(t, 0) patterns := make([]string, 0, len(words)) for _, word := range words { pattern := fmt.Sprintf(`{"x": [ "%s" ] }`, string(word)) diff --git a/v2_bench_test.go b/v2_bench_test.go index 6e8f289..930b578 100644 --- a/v2_bench_test.go +++ b/v2_bench_test.go @@ -4,6 +4,7 @@ package quamina import ( "fmt" + "math/rand" "testing" "time" ) @@ -71,3 +72,64 @@ func Benchmark8259Example(b *testing.B) { count := float64(b.N) fmt.Printf("%.0f/sec\n", count/elapsed) } + +func BenchmarkShellStyleBuildTime(b *testing.B) { + words := readWWords(b, 1000) + + source := rand.NewSource(293591) + starWords := make([]string, 0, len(words)) + expandedWords := make([]string, 0, len(words)) + patterns := make([]string, 0, len(words)) + for _, word := range words { + //nolint:gosec + starAt := source.Int63() % 6 + starWord := string(word[:starAt]) + "*" + string(word[starAt:]) + expandedWord := string(word[:starAt]) + "ÉÉÉÉ" + string(word[starAt:]) + starWords = append(starWords, starWord) + expandedWords = append(expandedWords, expandedWord) + pattern := fmt.Sprintf(`{"x": [ {"shellstyle": "%s" } ] }`, starWord) + patterns = append(patterns, pattern) + } + + q, _ := New() + before := time.Now() + for i := range words { + err := q.AddPattern(starWords[i], patterns[i]) + if err != nil { + b.Fatal("AddP: " + err.Error()) + } + } + elapsed := time.Since(before).Seconds() + fmt.Printf("Patterns/sec: %.1f\n", float64(len(words))/elapsed) + fmt.Println(matcherStats(q.matcher.(*coreMatcher))) + + // Build events: original words and expanded words + type event struct { + json []byte + word string + } + events := make([]event, 0, len(words)*2) + for i, word := range words { + events = append(events, + event{[]byte(fmt.Sprintf(`{"x": "%s"}`, word)), string(word)}, + event{[]byte(fmt.Sprintf(`{"x": "%s"}`, expandedWords[i])), expandedWords[i]}, + ) + } + + b.ResetTimer() + b.ReportAllocs() + for b.Loop() { + for _, ev := range events { + matches, err := q.MatchesForEvent(ev.json) + if err != nil { + b.Fatal("M4E on " + ev.word) + } + if len(matches) == 0 { + b.Fatal("no matches for " + ev.word) + } + } + } + elapsed = float64(b.Elapsed().Seconds()) + count := float64(b.N) + fmt.Printf("%.0f events/sec\n", count*float64(len(events))/elapsed) +}