diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index a01a6b8..6db0b49 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go: [ '1.15', 'stable' ] + go: [ '1.18', 'stable' ] name: Tests on Go ${{ matrix.go }} steps: - name: Checkout Repo diff --git a/Dockerfile b/Dockerfile index 2e315f1..e09da3c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.16-alpine3.13 AS build-go +FROM golang:1.18-alpine AS build-go ARG GIT_SSH_KEY ARG KNOWN_HOSTS_CONTENT diff --git a/README.md b/README.md index b4480ef..e6af323 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,27 @@ in that package. A standard Fibonacci heap providing the usual operations. Can be useful in executing Dijkstra or Prim's algorithms in the theoretically minimal time. Also useful as a general-purpose priority queue. The special thing about Fibonacci heaps versus other heap variants is the cheap decrease-key operation. This heap has a constant complexity for find minimum, insert and merge of two heaps, an amortized constant complexity for decrease key and O(log(n)) complexity for a deletion or dequeue minimum. In practice the constant factors are large, so Fibonacci heaps could be slower than Pairing heaps, depending on usage. Benchmarks - in the project subfolder. The heap has not been designed for thread-safety. +#### Binary and D-ary Heaps + +Generic, comparator-based heaps implemented with Go 1.18 generics. + +- NewHeap[T](compare func(T, T) int) *Heap[T] +- NewDaryHeap[T](d int, compare func(T, T) int) *DaryHeap[T] +- NewHeapFromSlice[T](values []T, compare func(T, T) int) *Heap[T] +- NewDaryHeapFromSlice[T](d int, values []T, compare func(T, T) int) *DaryHeap[T] +- (h *Heap[T]) Peek() (value T, ok bool) +- (h *DaryHeap[T]) Peek() (value T, ok bool) +- (h *Heap[T]) Pop() (value T, ok bool) +- (h *DaryHeap[T]) Pop() (value T, ok bool) + +Comparator contract: compare(a, b) should return -1 if a < b, 0 if a == b, and 1 if a > b. If the comparator orders values in ascending order, the heap behaves as a min-heap. To build a max-heap, invert the comparator (e.g., return -compare(a, b)). + +Goroutine-safety: Both heaps are safe for concurrent use by multiple goroutines. Internally they use sync.RWMutex to guard state. The comparator must be pure/non-blocking and must not call back into the heap (reentrant use would deadlock). + +Zero allocations: Push/Pop/Peek perform 0 allocations in steady state because the storage is a pre-allocated slice grown by append, and operations only mutate indices and swap elements in-place. Benchmarks use testing.B.ReportAllocs to validate 0 allocs/op. + +The D-ary heap is a generalization of the binary heap using a branching factor d. Indexing uses parent=(i-1)/d and children in [d*i+1, d*i+d]. + #### Range Tree Useful to determine if n-dimensional points fall within an n-dimensional range. @@ -149,7 +170,7 @@ interface and the most expensive operation in CPU profiling is the interface method which in turn calls into runtime.assertI2T. We need generics. #### Immutable B Tree -A btree based on two principles, immutability and concurrency. +A btree based on two principles, immutability and concurrency. Somewhat slow for single value lookups and puts, it is very fast for bulk operations. A persister can be injected to make this index persistent. @@ -185,8 +206,8 @@ operations are O(n) as you would expect. #### Simple Graph -A mutable, non-persistent undirected graph where parallel edges and self-loops are -not permitted. Operations to add an edge as well as retrieve the total number of +A mutable, non-persistent undirected graph where parallel edges and self-loops are +not permitted. Operations to add an edge as well as retrieve the total number of vertices/edges are O(1) while the operation to retrieve the vertices adjacent to a target is O(n). For more details see [wikipedia](https://en.wikipedia.org/wiki/Graph_(discrete_mathematics)#Simple_graph) diff --git a/go.mod b/go.mod index 3584b30..9b4bfa8 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,16 @@ module github.com/Workiva/go-datastructures -go 1.15 +go 1.18 require ( github.com/stretchr/testify v1.7.0 github.com/tinylib/msgp v1.1.5 ) + +require ( + github.com/davecgh/go-spew v1.1.0 // indirect + github.com/philhofer/fwd v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/objx v0.1.0 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) diff --git a/heap/binary.go b/heap/binary.go new file mode 100644 index 0000000..497ef55 --- /dev/null +++ b/heap/binary.go @@ -0,0 +1,130 @@ +/* +MIT License + +Copyright (c) 2021 Florimond Husquinet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* A generic implementation of a binary heap */ +package heap + +import "sync" + +type Heap[T any] struct { + mu sync.RWMutex + data []T + compare func(T, T) int +} + +// NewHeap constructs a heap using the provided comparator. +// The comparator should return -1 if a < b, 0 if a == b, and 1 if a > b. +// If compare orders values in ascending order, the heap behaves as a min-heap. +// To build a max-heap, invert the comparator (e.g., return -compare(a, b)). +func NewHeap[T any](compare func(T, T) int) *Heap[T] { + return &Heap[T]{ + data: make([]T, 0), + compare: compare, + } +} + +// NewHeapFromSlice builds a heap in O(n) from an initial slice. +func NewHeapFromSlice[T any](values []T, compare func(T, T) int) *Heap[T] { + h := &Heap[T]{ + data: append([]T(nil), values...), + compare: compare, + } + // heapify bottom-up + for i := (len(h.data) / 2) - 1; i >= 0; i-- { + h.sinkDown(i) + } + return h +} + +// Peek returns the top element without removing it. +func (h *Heap[T]) Peek() (value T, ok bool) { + h.mu.RLock() + defer h.mu.RUnlock() + if len(h.data) == 0 { + return value, false + } + return h.data[0], true +} + +func (h *Heap[T]) Len() int { + h.mu.RLock() + defer h.mu.RUnlock() + return len(h.data) +} + +func (h *Heap[T]) Push(value T) { + h.mu.Lock() + defer h.mu.Unlock() + h.data = append(h.data, value) + idx := len(h.data) - 1 + h.bubbleUp(idx) +} + +func (h *Heap[T]) Pop() (value T, ok bool) { + h.mu.Lock() + defer h.mu.Unlock() + n := len(h.data) + if n == 0 { + return value, false + } + top := h.data[0] + h.data[0] = h.data[n-1] + h.data = h.data[:n-1] + h.sinkDown(0) + return top, true +} + +// Min heap: if a node is less than its parent, swap them. +func (h *Heap[T]) bubbleUp(index int) { + if index == 0 { + return + } + var parent = (index - 1) / 2 + if h.compare(h.data[index], h.data[parent]) < 0 { + h.swap(index, parent) + h.bubbleUp(parent) + } +} + +// Min heap: if a node is greater than its children, swap the node with the smallest child. +func (h *Heap[T]) sinkDown(index int) { + n := len(h.data) + left := index*2 + 1 + right := index*2 + 2 + smallest := index + if left < n && h.compare(h.data[left], h.data[smallest]) < 0 { + smallest = left + } + if right < n && h.compare(h.data[right], h.data[smallest]) < 0 { + smallest = right + } + if smallest != index { + h.swap(index, smallest) + h.sinkDown(smallest) + } +} + +func (h *Heap[T]) swap(i, j int) { + h.data[i], h.data[j] = h.data[j], h.data[i] +} diff --git a/heap/binary_test.go b/heap/binary_test.go new file mode 100644 index 0000000..02114fd --- /dev/null +++ b/heap/binary_test.go @@ -0,0 +1,253 @@ +package heap + +import ( + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHeap(t *testing.T) { + h := NewHeap(func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + }) + + h.Push(10) + h.Push(15) + h.Push(1) + h.Push(5) + h.Push(9) + h.Push(7) + h.Push(2) + + value, ok := h.Pop() + assert.True(t, ok) + assert.Equal(t, 1, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 2, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 5, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 7, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 9, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 10, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 15, value) + + _, ok = h.Pop() + assert.False(t, ok) +} + +func BenchmarkBinaryVsDary2(b *testing.B) { + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + // Benchmark pushes then pops b.N elements for both heaps + b.Run("binary", func(b *testing.B) { + h := NewHeap[int](cmp) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + h.Push(i) + } + for i := 0; i < b.N; i++ { + h.Pop() + } + }) + b.Run("dary-2", func(b *testing.B) { + h := NewDaryHeap[int](2, cmp) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + h.Push(i) + } + for i := 0; i < b.N; i++ { + h.Pop() + } + }) +} + +func TestHeapifyAndPeek(t *testing.T) { + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + values := []int{5, 3, 9, 1, 4, 8, 2} + h := NewHeapFromSlice(values, cmp) + peek, ok := h.Peek() + assert.True(t, ok) + assert.Equal(t, 1, peek) + prev := -1 << 31 + for { + v, ok := h.Pop() + if !ok { + break + } + assert.GreaterOrEqual(t, v, prev) + prev = v + } +} +func BenchmarkBinaryVsDary2_Mixed(b *testing.B) { + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + // 50/50 push/pop random workload. + // Track heapSize instead of calling Len() in the hot loop to avoid lock overhead + // and to prevent Pop on an empty heap. + b.Run("binary-mixed-50-50", func(b *testing.B) { + rng := rand.New(rand.NewSource(1)) + h := NewHeap[int](cmp) + heapSize := 0 + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if rng.Intn(2) == 0 { + h.Push(i) + heapSize++ + } else if heapSize > 0 { + _, ok := h.Pop() + if ok { + heapSize-- + } + } else { + h.Push(i) + heapSize++ + } + } + }) + b.Run("dary2-mixed-50-50", func(b *testing.B) { + rng := rand.New(rand.NewSource(1)) + h := NewDaryHeap[int](2, cmp) + heapSize := 0 + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if rng.Intn(2) == 0 { + h.Push(i) + heapSize++ + } else if heapSize > 0 { + _, ok := h.Pop() + if ok { + heapSize-- + } + } else { + h.Push(i) + heapSize++ + } + } + }) +} + +func BenchmarkBinaryVsDary2_Heapify(b *testing.B) { + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + // Build from slice then pop all + const size = 10000 + values := make([]int, size) + for i := 0; i < size; i++ { + values[i] = size - i + } + b.Run("binary-heapify-then-pop", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + h := NewHeapFromSlice(values, cmp) + for j := 0; j < size; j++ { + h.Pop() + } + } + }) + b.Run("dary2-heapify-then-pop", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + h := NewDaryHeapFromSlice(2, values, cmp) + for j := 0; j < size; j++ { + h.Pop() + } + } + }) +} + +func BenchmarkBinaryVsDary2_Bursts(b *testing.B) { + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + const burst = 64 + b.Run("binary-bursts", func(b *testing.B) { + h := NewHeap[int](cmp) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i += burst { + for j := 0; j < burst; j++ { + h.Push(i + j) + } + for j := 0; j < burst; j++ { + h.Pop() + } + } + }) + b.Run("dary2-bursts", func(b *testing.B) { + h := NewDaryHeap[int](2, cmp) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i += burst { + for j := 0; j < burst; j++ { + h.Push(i + j) + } + for j := 0; j < burst; j++ { + h.Pop() + } + } + }) +} diff --git a/heap/dary.go b/heap/dary.go new file mode 100644 index 0000000..f40bf7b --- /dev/null +++ b/heap/dary.go @@ -0,0 +1,140 @@ +/* +MIT License + +Copyright (c) 2021 Florimond Husquinet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* +A generic implementation of a d-ary heap. + +The d-ary heap or d-heap is a priority queue data structure, a generalization +of the binary heap in which the nodes have d children instead of 2. +*/ +package heap + +import "sync" + +type DaryHeap[T any] struct { + mu sync.RWMutex + d int + data []T + compare func(T, T) int +} + +// NewDaryHeap constructs a d-ary heap using the provided comparator. +// The comparator should return -1 if a < b, 0 if a == b, and 1 if a > b. +// If compare orders values in ascending order, the heap behaves as a min-heap. +// To build a max-heap, invert the comparator (e.g., return -compare(a, b)). +func NewDaryHeap[T any](d int, compare func(T, T) int) *DaryHeap[T] { + return &DaryHeap[T]{ + d: d, + + data: make([]T, 0), + compare: compare, + } +} + +// NewDaryHeapFromSlice builds a d-ary heap in O(n) from an initial slice. +func NewDaryHeapFromSlice[T any](d int, values []T, compare func(T, T) int) *DaryHeap[T] { + h := &DaryHeap[T]{ + d: d, + data: append([]T(nil), values...), + compare: compare, + } + for i := (len(h.data) / 2) - 1; i >= 0; i-- { + h.sinkDown(i) + } + return h +} + +// Peek returns the top element without removing it. +func (h *DaryHeap[T]) Peek() (value T, ok bool) { + h.mu.RLock() + defer h.mu.RUnlock() + if len(h.data) == 0 { + return value, false + } + return h.data[0], true +} + +func (h *DaryHeap[T]) Len() int { + h.mu.RLock() + defer h.mu.RUnlock() + return len(h.data) +} + +func (h *DaryHeap[T]) Push(value T) { + h.mu.Lock() + defer h.mu.Unlock() + h.data = append(h.data, value) + idx := len(h.data) - 1 + h.bubbleUp(idx) +} + +func (h *DaryHeap[T]) Pop() (value T, ok bool) { + h.mu.Lock() + defer h.mu.Unlock() + n := len(h.data) + if n == 0 { + return value, false + } + top := h.data[0] + h.data[0] = h.data[n-1] + h.data = h.data[:n-1] + h.sinkDown(0) + return top, true +} + +// Min heap: if a node is less than its parent, swap them. +func (h *DaryHeap[T]) bubbleUp(index int) { + if index == 0 { + return + } + var parent = (index - 1) / h.d // Todo: make test fail if d is not 2 but you divide by 2 + if h.compare(h.data[index], h.data[parent]) < 0 { + h.swap(index, parent) + h.bubbleUp(parent) + } +} + +// Min heap: if a node is greater than its children, swap the node with the smallest child. +func (h *DaryHeap[T]) sinkDown(index int) { + smallest := index + first := h.d*index + 1 + last := first + h.d + n := len(h.data) + if last > n { + last = n + } + for child := first; child < last; child++ { + if h.compare(h.data[child], h.data[smallest]) < 0 { + smallest = child + } + } + if smallest != index { + h.swap(index, smallest) + h.sinkDown(smallest) + } +} + +func (h *DaryHeap[T]) swap(i, j int) { + h.data[i], h.data[j] = h.data[j], h.data[i] +} diff --git a/heap/dary_test.go b/heap/dary_test.go new file mode 100644 index 0000000..bbd5a94 --- /dev/null +++ b/heap/dary_test.go @@ -0,0 +1,92 @@ +package heap + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDaryHeap(t *testing.T) { + h := NewDaryHeap(6, func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + }) + + h.Push(10) + h.Push(1) + h.Push(5) + h.Push(9) + h.Push(7) + h.Push(2) + h.Push(15) + + h.Push(20) + + value, ok := h.Pop() + assert.True(t, ok) + assert.Equal(t, 1, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 2, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 5, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 7, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 9, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 10, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 15, value) + + value, ok = h.Pop() + assert.True(t, ok) + assert.Equal(t, 20, value) + + _, ok = h.Pop() + assert.False(t, ok) +} + +func TestDaryHeapVariantsAndHeapify(t *testing.T) { + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + for _, d := range []int{2, 3, 6, 8} { + values := []int{10, 1, 5, 9, 7, 2, 15, 20, 3, 3, -1} + h := NewDaryHeapFromSlice(d, values, cmp) + peek, ok := h.Peek() + assert.True(t, ok) + assert.Equal(t, -1, peek) + prev := -1 << 31 + for { + v, ok := h.Pop() + if !ok { + break + } + assert.GreaterOrEqual(t, v, prev) + prev = v + } + } +} diff --git a/heap/fuzz_test.go b/heap/fuzz_test.go new file mode 100644 index 0000000..dd59bb7 --- /dev/null +++ b/heap/fuzz_test.go @@ -0,0 +1,126 @@ +package heap + +// This file contains fuzz/property tests. We keep them separate from deterministic +// unit tests and benchmarks to: +// (1) make it explicit they run under the fuzzing engine (go test -run Fuzz -fuzz=...) +// (2) avoid mixing fuzz-specific helpers and seeds with regular unit tests +// (3) simplify CI configuration where fuzzing may be opt-in or longer-running + +import ( + "math/rand" + "testing" +) + +// helper to convert bytes to ints with negatives +func bytesToInts(data []byte) []int { + res := make([]int, len(data)) + for i, b := range data { + res[i] = int(int8(b)) + } + return res +} + +// FuzzHeapProperties validates ordering for binary heap across random inputs. +func FuzzHeapProperties(f *testing.F) { + seeds := [][]byte{ + {}, + {1}, + {5, 4, 3, 2, 1}, + {0, 0, 0}, + {251, 10, 254, 7, 7, 3}, // negative via int8 + } + for _, s := range seeds { + f.Add(s) + } + + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + + f.Fuzz(func(t *testing.T, data []byte) { + arr := bytesToInts(data) + rand.Shuffle(len(arr), func(i, j int) { arr[i], arr[j] = arr[j], arr[i] }) + h := NewHeap[int](cmp) + for _, v := range arr { + h.Push(v) + } + + if len(arr) == 0 { + if _, ok := h.Peek(); ok { + t.Fatalf("expected empty heap to have no peek") + } + } else { + min := arr[0] + for _, v := range arr[1:] { + if v < min { + min = v + } + } + if top, ok := h.Peek(); !ok || top != min { + t.Fatalf("peek mismatch: got %v %v, want %v true", top, ok, min) + } + } + + prevSet := false + var prev int + for { + v, ok := h.Pop() + if !ok { + break + } + if prevSet && v < prev { + t.Fatalf("heap order violated: %v < %v", v, prev) + } + prev = v + prevSet = true + } + }) +} + +// FuzzDaryHeapProperties validates ordering for several d across random inputs. +func FuzzDaryHeapProperties(f *testing.F) { + seeds := [][]byte{ + {}, {1}, {2, 1}, {3, 1, 2}, {255, 255, 0, 5}, + } + for _, s := range seeds { + f.Add(s) + } + + cmp := func(a, b int) int { + if a < b { + return -1 + } + if a > b { + return 1 + } + return 0 + } + + dVals := []int{2, 3, 4, 6, 8} + f.Fuzz(func(t *testing.T, data []byte) { + arr := bytesToInts(data) + rand.Shuffle(len(arr), func(i, j int) { arr[i], arr[j] = arr[j], arr[i] }) + for _, d := range dVals { + h := NewDaryHeapFromSlice[int](d, arr, cmp) + prevSet := false + var prev int + for { + v, ok := h.Pop() + if !ok { + break + } + if prevSet && v < prev { + t.Fatalf("d-ary(%d) heap order violated: %v < %v", d, v, prev) + } + prev = v + prevSet = true + } + } + }) +}