Skip to content
2 changes: 1 addition & 1 deletion frac/processor/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ func NewSourcedNodeIterator(sourced node.Sourced, ti tokenIndex, tids []uint32,

func (s *SourcedNodeIterator) ConsumeTokenSource(lid node.LID) (uint32, bool, error) {
for !s.lastID.IsNull() && s.lastID.Less(lid) {
s.lastID, s.lastSource = s.sourcedNode.NextSourced()
s.lastID, s.lastSource = s.sourcedNode.NextSourcedGeq(lid)
}

exists := !s.lastID.IsNull() && s.lastID == lid
Expand Down
12 changes: 12 additions & 0 deletions frac/processor/aggregator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,18 @@ func (m *MockNode) NextSourced() (node.LID, uint32) {
return first.LID, first.Source
}

func (m *MockNode) NextSourcedGeq(minLID node.LID) (node.LID, uint32) {
for len(m.Pairs) > 0 && m.Pairs[0].LID.Less(minLID) {
m.Pairs = m.Pairs[1:]
}
if len(m.Pairs) == 0 {
return node.NullLID(), 0
}
first := m.Pairs[0]
m.Pairs = m.Pairs[1:]
return first.LID, first.Source
}

func TestTwoSourceAggregator(t *testing.T) {
r := require.New(t)

Expand Down
31 changes: 31 additions & 0 deletions frac/sealed/lids/iterator_asc.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/ozontech/seq-db/logger"
"github.com/ozontech/seq-db/node"
"github.com/ozontech/seq-db/util"
)

type IteratorAsc Cursor
Expand Down Expand Up @@ -72,3 +73,33 @@ func (it *IteratorAsc) Next() node.LID {
it.lids = it.lids[:i]
return node.NewLIDOrderAsc(lid)
}

// NextGeq returns the next (in reverse iteration order) LID that is <= maxLID.
func (it *IteratorAsc) NextGeq(nextID node.LID) node.LID {
for {
for len(it.lids) == 0 {
if !it.tryNextBlock {
return node.NewLIDOrderAsc(0)
}

it.loadNextLIDsBlock()
it.lids, it.tryNextBlock = it.narrowLIDsRange(it.lids, it.tryNextBlock)
it.counter.AddLIDsCount(len(it.lids))
}

// fast path: smallest remaining > nextID => skip entire block
if it.lids[0] > nextID.Unpack() {
it.lids = it.lids[:0]
continue
}

idx, found := util.GallopSearchLeq(it.lids, nextID.Unpack())
if found {
lid := it.lids[idx]
it.lids = it.lids[:idx]
return node.NewLIDOrderAsc(lid)
}

it.lids = it.lids[:0]
}
}
32 changes: 32 additions & 0 deletions frac/sealed/lids/iterator_desc.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/ozontech/seq-db/logger"
"github.com/ozontech/seq-db/node"
"github.com/ozontech/seq-db/util"
)

type IteratorDesc Cursor
Expand Down Expand Up @@ -72,3 +73,34 @@ func (it *IteratorDesc) Next() node.LID {
it.lids = it.lids[1:]
return node.NewLIDOrderDesc(lid)
}

// NextGeq finds next greater or equal
func (it *IteratorDesc) NextGeq(nextID node.LID) node.LID {
for {
for len(it.lids) == 0 {
if !it.tryNextBlock {
return node.NewLIDOrderDesc(math.MaxUint32)
}

it.loadNextLIDsBlock() // last chunk in block but not last for tid; need load next block
it.lids, it.tryNextBlock = it.narrowLIDsRange(it.lids, it.tryNextBlock)
it.counter.AddLIDsCount(len(it.lids)) // inc loaded LIDs count
}

// fast path: last LID < nextID => skip the entire block
if nextID.Unpack() > it.lids[len(it.lids)-1] {
it.lids = it.lids[:0]
continue
}

idx, found := util.GallopSearchGeq(it.lids, nextID.Unpack())
if found {
it.lids = it.lids[idx:]
lid := it.lids[0]
it.lids = it.lids[1:]
return node.NewLIDOrderDesc(lid)
}

it.lids = it.lids[:0]
}
}
51 changes: 51 additions & 0 deletions node/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ func newNodeStaticSize(size int) *staticAsc {
return &staticAsc{staticCursor: staticCursor{data: data}}
}

func newNodeStaticSizeFixedDelta(size int, start int, delta int) *staticAsc {
data, _ := GenerateFixedDelta(size, start, delta)
return &staticAsc{staticCursor: staticCursor{data: data}}
}

func Generate(n int) ([]uint32, uint32) {
v := make([]uint32, n)
last := uint32(1)
Expand All @@ -23,6 +28,16 @@ func Generate(n int) ([]uint32, uint32) {
return v, last
}

func GenerateFixedDelta(n, start, step int) ([]uint32, uint32) {
v := make([]uint32, n)
last := uint32(start)
for i := 0; i < len(v); i++ {
v[i] = last
last += uint32(step)
}
return v, last
}

func BenchmarkNot(b *testing.B) {
sizes := []int{1000, 10_000, 1_000_000}

Expand Down Expand Up @@ -157,6 +172,42 @@ func BenchmarkOrTree(b *testing.B) {
}
}

// BenchmarkOrTreeNextGeq checks the performance of NextGeq vs Next when no skipping occur and all node
// yield distinct values (no intersection between nodes)
func BenchmarkOrTreeNextGeq(b *testing.B) {
sizes := []int{1000, 10_000, 1_000_000}
// step is equal to total number of nodes, so that every node produces distinct values
step := 8

for _, s := range sizes {
b.Run(fmt.Sprintf("size=%d", s), func(b *testing.B) {
n1 := NewOr(
newNodeStaticSizeFixedDelta(s, 1, step),
newNodeStaticSizeFixedDelta(s, 5, step))
n2 := NewOr(
newNodeStaticSizeFixedDelta(s, 2, step),
newNodeStaticSizeFixedDelta(s, 6, step))
n3 := NewOr(
newNodeStaticSizeFixedDelta(s, 3, step),
newNodeStaticSizeFixedDelta(s, 8, step))
n4 := NewOr(
newNodeStaticSizeFixedDelta(s, 4, step),
newNodeStaticSizeFixedDelta(s, 7, step))
n12 := NewOr(n1, n2)
n34 := NewOr(n3, n4)
n := NewOr(n12, n34)
res := make([]uint32, 0, s*8)

for b.Loop() {
res = readAllIntoGeq(n, res)
}

assert.Equal(b, cap(res), s*8)

})
}
}

func BenchmarkComplex(b *testing.B) {
sizes := []int{1000, 10_000, 1_000_000}

Expand Down
24 changes: 24 additions & 0 deletions node/lid.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ func NewLIDOrderAsc(lid uint32) LID {
}
}

func NewLID(lid uint32, reverse bool) LID {
if reverse {
return NewLIDOrderAsc(lid)
} else {
return NewLIDOrderDesc(lid)
}
}

// Less compares two values. It also does an implicit null check, since we store math.MaxUint32 for null values.
// Which means if we call x.Less(y), then we now for sure that x is not null. Therefore, this Less call can work
// as both "null check + less" combo.
Expand All @@ -61,6 +69,22 @@ func (c LID) Eq(other LID) bool {
return c.lid == other.lid
}

func Max(left LID, right LID) LID {
if left.lid > right.lid {
return left
} else {
return right
}
}

func Min(left LID, right LID) LID {
if left.lid < right.lid {
return left
} else {
return right
}
}

func (c LID) Unpack() uint32 {
return c.lid ^ c.mask
}
Expand Down
5 changes: 5 additions & 0 deletions node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@ import (
type Node interface {
fmt.Stringer // for testing
Next() LID
// NextGeq returns next greater or equal (GEQ) lid. Currently, some nodes do not support it
// so the caller must check the output and be ready call it again if needed, like when using Next.
// Therefore, nextID is more like a hint.
NextGeq(nextID LID) LID
}

type Sourced interface {
fmt.Stringer // for testing
// aggregation need source
NextSourced() (id LID, source uint32)
NextSourcedGeq(nextLID LID) (id LID, source uint32)
}
30 changes: 28 additions & 2 deletions node/node_and.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,39 @@ func (n *nodeAnd) readRight() {
n.rightID = n.right.Next()
}

func (n *nodeAnd) readLeftGeq(nextID LID) {
n.leftID = n.left.NextGeq(nextID)
}

func (n *nodeAnd) readRightGeq(nextID LID) {
n.rightID = n.right.NextGeq(nextID)
}

func (n *nodeAnd) Next() LID {
for !n.leftID.IsNull() && !n.rightID.IsNull() && !n.leftID.Eq(n.rightID) {
for !n.rightID.IsNull() && n.leftID.Less(n.rightID) {
n.readLeft()
n.readLeftGeq(n.rightID)
}
for !n.rightID.IsNull() && n.rightID.Less(n.leftID) {
n.readRightGeq(n.leftID)
}
}
if n.leftID.IsNull() || n.rightID.IsNull() {
return NullLID()
}
cur := n.leftID
n.readLeft()
n.readRight()
return cur
}

func (n *nodeAnd) NextGeq(nextID LID) LID {
for !n.leftID.IsNull() && !n.rightID.IsNull() && !n.leftID.Eq(n.rightID) {
for !n.rightID.IsNull() && n.leftID.Less(n.rightID) {
n.readLeftGeq(Max(n.rightID, nextID))
}
for !n.rightID.IsNull() && n.rightID.Less(n.leftID) {
n.readRight()
n.readRightGeq(Max(n.leftID, nextID))
}
}
if n.leftID.IsNull() || n.rightID.IsNull() {
Expand Down
65 changes: 65 additions & 0 deletions node/node_and_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package node

import (
"math"
"math/rand/v2"
"testing"

"github.com/stretchr/testify/assert"
)

func TestNodeAnd_NextGeqAscending(t *testing.T) {
left := NewStatic([]uint32{1, 2, 7, 10, 20, 25, 26, 30, 50, 80, 90, 100}, false)
right := NewStatic([]uint32{1, 3, 4, 7, 9, 30, 40, 45, 60, 80, 110}, false)

node := NewAnd(left, right)

// Currently, nodes instantiate their state on creation, which will be fixed later.
// Thus, the first LID returned is the first from left and right
id := node.NextGeq(NewLIDOrderDesc(7))
assert.Equal(t, uint32(1), id.Unpack())

id = node.NextGeq(NewLIDOrderDesc(7))
assert.Equal(t, uint32(7), id.Unpack())

id = node.NextGeq(NewLIDOrderDesc(50))
assert.Equal(t, uint32(80), id.Unpack())

id = node.NextGeq(NewLIDOrderDesc(50))
assert.True(t, id.IsNull())
}

// TestNodeAnd_NextGeqCompatibility tests that just calling NextGeq with 0 passed as argument is equivalent to
// calling Next
func TestNodeAnd_NextGeqCompatibility(t *testing.T) {
for _, rev := range []bool{true, false} {
left := []uint32{rand.Uint32N(10)}
right := []uint32{rand.Uint32N(10)}

for i := 1; i < 1000; i++ {
left = append(left, left[i-1]+rand.Uint32N(10))
right = append(right, right[i-1]+rand.Uint32N(10))
}

node := NewAnd(NewStatic(left, rev), NewStatic(right, rev))
nodeGeq := NewAnd(NewStatic(left, rev), NewStatic(right, rev))

var zero uint32
if rev {
zero = math.MaxUint32
} else {
zero = 0
}

for {
lid := node.Next()
lidGeq := nodeGeq.NextGeq(NewLID(zero, rev))

assert.Equal(t, lid, lidGeq)

if lid.IsNull() {
break
}
}
}
}
4 changes: 4 additions & 0 deletions node/node_nand.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,7 @@ func (n *nodeNAnd) Next() LID {
}
return NullLID()
}

func (n *nodeNAnd) NextGeq(nextID LID) LID {
return n.Next()
}
Loading