Skip to content

Commit 1af172f

Browse files
authored
Add vectorized vany for 8 wide masks (#101)
* Add vectorized vany for 8 wide masks * Don't use avx vany if avx is not supported.
1 parent 7f982e0 commit 1af172f

File tree

1 file changed

+32
-2
lines changed

1 file changed

+32
-2
lines changed

src/llvm_intrin/masks.jl

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,14 +264,44 @@ end
264264
@inline zero_mask(::NativeTypes) = false
265265
@inline max_mask(::NativeTypes) = true
266266

267+
@generated function sext(
268+
::Type{Vec{W,I}},
269+
m::AbstractMask{W,U}
270+
) where {W,I<:IntegerTypesHW,U<:Union{UInt8,UInt16,UInt32,UInt64}}
271+
bits = 8sizeof(I)
272+
instrs = String[]
273+
truncate_mask!(instrs, '0', W, 0)
274+
push!(
275+
instrs,
276+
"%res = sext <$W x i1> %mask.0 to <$W x i$(bits)>\nret <$W x i$(bits)> %res"
277+
)
278+
gf = Expr(:call, GlobalRef(Core, :getfield), :m, 1, false)
279+
llvmc = Expr(
280+
:call,
281+
GlobalRef(Base, :llvmcall),
282+
join(instrs, "\n"),
283+
:(_Vec{$W,$I}),
284+
:(Tuple{$U}),
285+
gf
286+
)
287+
Expr(:block, Expr(:meta, :inline), Expr(:call, :Vec, llvmc))
288+
end
289+
290+
@inline function vany(m::AbstractMask)
291+
_vany(m, has_feature(Val(:x86_64_avx512f)) | (!has_feature(Val(:x86_64_avx))))
292+
end
293+
@inline function _vany(m::Mask{8}, ::False)
294+
x = reinterpret(Float32, sext(Vec{8, Int32}, m))
295+
ccall("llvm.x86.avx.vtestz.ps.256", llvmcall, Int32, (_Vec{8, Float32}, _Vec{8, Float32}), data(x), data(x)) == 0
296+
end
267297
for (U, W) in [(UInt8, 8), (UInt16, 16), (UInt32, 32), (UInt64, 64)]
268298
z = zero(U)
269299
tm = typemax(U)
270-
@eval @inline vany(m::AbstractMask{$W,$U}) = getfield(m, :u) != $z
300+
@eval @inline _vany(m::AbstractMask{$W,$U}, ::B) where B = getfield(m, :u) != $z
271301
@eval @inline vall(m::AbstractMask{$W,$U}) = getfield(m, :u) == $tm
272302
end
273303
# TODO: use vector reduction intrsincs
274-
@inline function vany(m::AbstractMask{W}) where {W}
304+
@inline function _vany(m::AbstractMask{W}, ::B) where {W, B}
275305
mm = getfield(max_mask(Val{W}()), :u)
276306
mu = getfield(m, :u)
277307
(mu & mm) !== zero(mu)

0 commit comments

Comments
 (0)