@@ -264,14 +264,44 @@ end
264264@inline zero_mask (:: NativeTypes ) = false
265265@inline max_mask (:: NativeTypes ) = true
266266
267+ @generated function sext (
268+ :: Type{Vec{W,I}} ,
269+ m:: AbstractMask{W,U}
270+ ) where {W,I<: IntegerTypesHW ,U<: Union{UInt8,UInt16,UInt32,UInt64} }
271+ bits = 8 sizeof (I)
272+ instrs = String[]
273+ truncate_mask! (instrs, ' 0' , W, 0 )
274+ push! (
275+ instrs,
276+ " %res = sext <$W x i1> %mask.0 to <$W x i$(bits) >\n ret <$W x i$(bits) > %res"
277+ )
278+ gf = Expr (:call , GlobalRef (Core, :getfield ), :m , 1 , false )
279+ llvmc = Expr (
280+ :call ,
281+ GlobalRef (Base, :llvmcall ),
282+ join (instrs, " \n " ),
283+ :(_Vec{$ W,$ I}),
284+ :(Tuple{$ U}),
285+ gf
286+ )
287+ Expr (:block , Expr (:meta , :inline ), Expr (:call , :Vec , llvmc))
288+ end
289+
290+ @inline function vany (m:: AbstractMask )
291+ _vany (m, has_feature (Val (:x86_64_avx512f )) | (! has_feature (Val (:x86_64_avx ))))
292+ end
293+ @inline function _vany (m:: Mask{8} , :: False )
294+ x = reinterpret (Float32, sext (Vec{8 , Int32}, m))
295+ ccall (" llvm.x86.avx.vtestz.ps.256" , llvmcall, Int32, (_Vec{8 , Float32}, _Vec{8 , Float32}), data (x), data (x)) == 0
296+ end
267297for (U, W) in [(UInt8, 8 ), (UInt16, 16 ), (UInt32, 32 ), (UInt64, 64 )]
268298 z = zero (U)
269299 tm = typemax (U)
270- @eval @inline vany (m:: AbstractMask{$W,$U} ) = getfield (m, :u ) != $ z
300+ @eval @inline _vany (m:: AbstractMask{$W,$U} , :: B ) where B = getfield (m, :u ) != $ z
271301 @eval @inline vall (m:: AbstractMask{$W,$U} ) = getfield (m, :u ) == $ tm
272302end
273303# TODO : use vector reduction intrsincs
274- @inline function vany (m:: AbstractMask{W} ) where {W}
304+ @inline function _vany (m:: AbstractMask{W} , :: B ) where {W, B }
275305 mm = getfield (max_mask (Val {W} ()), :u )
276306 mu = getfield (m, :u )
277307 (mu & mm) != = zero (mu)
0 commit comments