Skip to content

Commit 37c1f17

Browse files
cyriltovenagouthamve
authored andcommitted
Fixes store duplicate label names and values. (#1790)
* Fixes store label name and values duplicate. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Normalize string unicity control Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Adds benchmark to compare the new unique algorithm Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
1 parent 5bd771f commit 37c1f17

File tree

7 files changed

+152
-33
lines changed

7 files changed

+152
-33
lines changed

pkg/chunk/chunk_store.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -204,17 +204,15 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, userID string, fro
204204
return nil, err
205205
}
206206

207-
var result []string
207+
var result UniqueStrings
208208
for _, entry := range entries {
209209
_, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
210210
if err != nil {
211211
return nil, err
212212
}
213-
result = append(result, string(labelValue))
213+
result.Add(string(labelValue))
214214
}
215-
sort.Strings(result)
216-
result = uniqueStrings(result)
217-
return result, nil
215+
return result.Strings(), nil
218216
}
219217

220218
// LabelNamesForMetricName retrieves all label names for a metric name.
@@ -462,7 +460,6 @@ func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
462460

463461
func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
464462
result := make([]string, 0, len(entries))
465-
466463
for _, entry := range entries {
467464
chunkKey, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
468465
if err != nil {
@@ -474,7 +471,6 @@ func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, mat
474471
}
475472
result = append(result, chunkKey)
476473
}
477-
478474
// Return ids sorted and deduped because they will be merged with other sets.
479475
sort.Strings(result)
480476
result = uniqueStrings(result)

pkg/chunk/chunk_store_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,3 +851,49 @@ func TestStoreMaxLookBack(t *testing.T) {
851851
require.Equal(t, 1, len(chunks))
852852
chunks[0].Through.Equal(now)
853853
}
854+
855+
func benchmarkParseIndexEntries(i int64, b *testing.B) {
856+
b.ReportAllocs()
857+
b.StopTimer()
858+
store := &store{}
859+
ctx := context.Background()
860+
entries := generateIndexEntries(i)
861+
matcher, err := labels.NewMatcher(labels.MatchRegexp, "", ".*")
862+
if err != nil {
863+
b.Fatal(err)
864+
}
865+
b.StartTimer()
866+
for n := 0; n < b.N; n++ {
867+
keys, err := store.parseIndexEntries(ctx, entries, matcher)
868+
if err != nil {
869+
b.Fatal(err)
870+
}
871+
if len(keys) != len(entries)/2 {
872+
b.Fatalf("expected keys:%d got:%d", len(entries)/2, len(keys))
873+
}
874+
}
875+
}
876+
877+
func BenchmarkParseIndexEntries500(b *testing.B) { benchmarkParseIndexEntries(500, b) }
878+
func BenchmarkParseIndexEntries2500(b *testing.B) { benchmarkParseIndexEntries(2500, b) }
879+
func BenchmarkParseIndexEntries10000(b *testing.B) { benchmarkParseIndexEntries(10000, b) }
880+
func BenchmarkParseIndexEntries50000(b *testing.B) { benchmarkParseIndexEntries(50000, b) }
881+
882+
func generateIndexEntries(n int64) []IndexEntry {
883+
res := make([]IndexEntry, 0, n)
884+
for i := int64(n - 1); i >= 0; i-- {
885+
labelValue := fmt.Sprintf("labelvalue%d", i%(n/2))
886+
chunkID := fmt.Sprintf("chunkid%d", i%(n/2))
887+
rangeValue := []byte{}
888+
rangeValue = append(rangeValue, []byte("component1")...)
889+
rangeValue = append(rangeValue, 0)
890+
rangeValue = append(rangeValue, []byte(labelValue)...)
891+
rangeValue = append(rangeValue, 0)
892+
rangeValue = append(rangeValue, []byte(chunkID)...)
893+
rangeValue = append(rangeValue, 0)
894+
res = append(res, IndexEntry{
895+
RangeValue: rangeValue,
896+
})
897+
}
898+
return res
899+
}

pkg/chunk/chunk_store_utils.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package chunk
22

33
import (
44
"context"
5-
"sort"
65
"sync"
76

87
"github.com/go-kit/kit/log/level"
@@ -38,18 +37,13 @@ func keysFromChunks(chunks []Chunk) []string {
3837
}
3938

4039
func labelNamesFromChunks(chunks []Chunk) []string {
41-
keys := map[string]struct{}{}
42-
var result []string
40+
var result UniqueStrings
4341
for _, c := range chunks {
4442
for _, l := range c.Metric {
45-
if _, ok := keys[string(l.Name)]; !ok {
46-
keys[string(l.Name)] = struct{}{}
47-
result = append(result, string(l.Name))
48-
}
43+
result.Add(string(l.Name))
4944
}
5045
}
51-
sort.Strings(result)
52-
return result
46+
return result.Strings()
5347
}
5448

5549
func filterChunksByUniqueFingerprint(chunks []Chunk) ([]Chunk, []string) {

pkg/chunk/composite_store.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,30 +100,30 @@ func (c compositeStore) Get(ctx context.Context, userID string, from, through mo
100100

101101
// LabelValuesForMetricName retrieves all label values for a single label name and metric name.
102102
func (c compositeStore) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
103-
var result []string
103+
var result UniqueStrings
104104
err := c.forStores(from, through, func(from, through model.Time, store Store) error {
105105
labelValues, err := store.LabelValuesForMetricName(ctx, userID, from, through, metricName, labelName)
106106
if err != nil {
107107
return err
108108
}
109-
result = append(result, labelValues...)
109+
result.Add(labelValues...)
110110
return nil
111111
})
112-
return result, err
112+
return result.Strings(), err
113113
}
114114

115115
// LabelNamesForMetricName retrieves all label names for a metric name.
116116
func (c compositeStore) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
117-
var result []string
117+
var result UniqueStrings
118118
err := c.forStores(from, through, func(from, through model.Time, store Store) error {
119119
labelNames, err := store.LabelNamesForMetricName(ctx, userID, from, through, metricName)
120120
if err != nil {
121121
return err
122122
}
123-
result = append(result, labelNames...)
123+
result.Add(labelNames...)
124124
return nil
125125
})
126-
return result, err
126+
return result.Strings(), err
127127
}
128128

129129
func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {

pkg/chunk/composite_store_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,64 @@ func TestCompositeStore(t *testing.T) {
180180
})
181181
}
182182
}
183+
184+
type mockStoreLabel struct {
185+
mockStore
186+
values []string
187+
}
188+
189+
func (m mockStoreLabel) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
190+
return m.values, nil
191+
}
192+
193+
func (m mockStoreLabel) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
194+
return m.values, nil
195+
}
196+
197+
func TestCompositeStoreLabels(t *testing.T) {
198+
t.Parallel()
199+
200+
cs := compositeStore{
201+
stores: []compositeStoreEntry{
202+
{model.TimeFromUnix(0), mockStore(1)},
203+
{model.TimeFromUnix(20), mockStoreLabel{mockStore(1), []string{"b", "c", "e"}}},
204+
{model.TimeFromUnix(40), mockStoreLabel{mockStore(1), []string{"a", "b", "c", "f"}}},
205+
},
206+
}
207+
208+
for i, tc := range []struct {
209+
from, through int64
210+
want []string
211+
}{
212+
{
213+
0, 10,
214+
nil,
215+
},
216+
{
217+
0, 30,
218+
[]string{"b", "c", "e"},
219+
},
220+
{
221+
0, 40,
222+
[]string{"a", "b", "c", "e", "f"},
223+
},
224+
} {
225+
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
226+
have, err := cs.LabelNamesForMetricName(context.Background(), "", model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), "")
227+
if err != nil {
228+
t.Fatalf("err - %s", err)
229+
}
230+
if !reflect.DeepEqual(tc.want, have) {
231+
t.Fatalf("wrong label names - %s", test.Diff(tc.want, have))
232+
}
233+
have, err = cs.LabelValuesForMetricName(context.Background(), "", model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), "", "")
234+
if err != nil {
235+
t.Fatalf("err - %s", err)
236+
}
237+
if !reflect.DeepEqual(tc.want, have) {
238+
t.Fatalf("wrong label values - %s", test.Diff(tc.want, have))
239+
}
240+
})
241+
}
242+
243+
}

pkg/chunk/series_store.go

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"fmt"
66
"net/http"
7-
"sort"
87

98
"github.com/go-kit/kit/log/level"
109
jsoniter "github.com/json-iterator/go"
@@ -406,23 +405,18 @@ func (c *seriesStore) lookupLabelNamesBySeries(ctx context.Context, from, throug
406405
return nil, err
407406
}
408407
level.Debug(log).Log("entries", len(entries))
409-
result := []string{model.MetricNameLabel}
410-
uniqueLabelNames := map[string]struct{}{model.MetricNameLabel: {}}
408+
409+
var result UniqueStrings
410+
result.Add(model.MetricNameLabel)
411411
for _, entry := range entries {
412412
lbs := []string{}
413413
err := jsoniter.ConfigFastest.Unmarshal(entry.Value, &lbs)
414414
if err != nil {
415415
return nil, err
416416
}
417-
for _, l := range lbs {
418-
if _, ok := uniqueLabelNames[l]; !ok {
419-
uniqueLabelNames[l] = struct{}{}
420-
result = append(result, l)
421-
}
422-
}
417+
result.Add(lbs...)
423418
}
424-
sort.Strings(result)
425-
return result, nil
419+
return result.Strings(), nil
426420
}
427421

428422
// Put implements ChunkStore

pkg/chunk/strings.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package chunk
22

3+
import "sort"
4+
35
func uniqueStrings(cs []string) []string {
46
if len(cs) == 0 {
57
return []string{}
@@ -57,3 +59,29 @@ func nWayIntersectStrings(sets [][]string) []string {
5759
return intersectStrings(left, right)
5860
}
5961
}
62+
63+
// UniqueStrings keeps a slice of unique strings.
64+
type UniqueStrings struct {
65+
values map[string]struct{}
66+
result []string
67+
}
68+
69+
// Add adds a new string, dropping duplicates.
70+
func (us *UniqueStrings) Add(strings ...string) {
71+
for _, s := range strings {
72+
if _, ok := us.values[s]; ok {
73+
continue
74+
}
75+
if us.values == nil {
76+
us.values = map[string]struct{}{}
77+
}
78+
us.values[s] = struct{}{}
79+
us.result = append(us.result, s)
80+
}
81+
}
82+
83+
// Strings returns the sorted sliced of unique strings.
84+
func (us UniqueStrings) Strings() []string {
85+
sort.Strings(us.result)
86+
return us.result
87+
}

0 commit comments

Comments
 (0)