Skip to content

Commit ff9d962

Browse files
authored
Merge pull request #83 Optimize Vector.sum
Daily Perf Improver - Optimize Vector.sum with hardware-accelerated horizontal reduction
2 parents d9f1a40 + babc10c commit ff9d962

File tree

2 files changed

+49
-3
lines changed

2 files changed

+49
-3
lines changed

benchmarks/FsMath.Benchmarks/Vector.fs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,23 @@ type VectorBenchmarks() =
4646
let result = Vector.norm vector1
4747
GC.KeepAlive(result) // Prevents the result from being optimized away
4848

49+
[<Benchmark>]
50+
member _.Sum() =
51+
let result = Vector.sum vector1
52+
GC.KeepAlive(result) // Prevents the result from being optimized away
53+
54+
[<Benchmark>]
55+
member _.Product() =
56+
let result = Vector.product vector1
57+
GC.KeepAlive(result) // Prevents the result from being optimized away
58+
59+
[<Benchmark>]
60+
member _.Min() =
61+
let result = Vector.min vector1
62+
GC.KeepAlive(result) // Prevents the result from being optimized away
63+
64+
[<Benchmark>]
65+
member _.Max() =
66+
let result = Vector.max vector1
67+
GC.KeepAlive(result) // Prevents the result from being optimized away
68+

src/FsMath/SpanMath.fs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,36 @@ type SpanMath =
253253
static member inline sum<'T when 'T :> Numerics.INumber<'T>
254254
and 'T : (new: unit -> 'T)
255255
and 'T : struct
256-
and 'T :> ValueType>
256+
and 'T :> ValueType>
257257
(v:ReadOnlySpan<'T>) : 'T =
258-
let zero = LanguagePrimitives.GenericZero<'T>
259-
SpanINumberPrimitives.fold ( (+) , (+) , v , zero )
258+
if v.Length = 0 then
259+
LanguagePrimitives.GenericZero<'T>
260+
elif Numerics.Vector.IsHardwareAccelerated && v.Length >= Numerics.Vector<'T>.Count then
261+
let simdWidth = Numerics.Vector<'T>.Count
262+
let simdCount = v.Length / simdWidth
263+
let ceiling = simdWidth * simdCount
264+
265+
// SIMD accumulation
266+
let mutable accVec = Numerics.Vector<'T>.Zero
267+
268+
for i = 0 to simdCount - 1 do
269+
let srcIndex = i * simdWidth
270+
let vec = Numerics.Vector<'T>(v.Slice(srcIndex, simdWidth))
271+
accVec <- accVec + vec
272+
273+
// Horizontal reduction using Vector.Sum for optimized performance
274+
let mutable acc = Numerics.Vector.Sum(accVec)
275+
276+
// Tail
277+
for i = ceiling to v.Length - 1 do
278+
acc <- acc + v.[i]
279+
280+
acc
281+
else
282+
let mutable acc = LanguagePrimitives.GenericZero<'T>
283+
for i = 0 to v.Length - 1 do
284+
acc <- acc + v.[i]
285+
acc
260286

261287

262288
/// Computes the product of all elements in the vector.

0 commit comments

Comments
 (0)