From 80b145f6cbb9899b96ab0c50baf6c4018755b4a2 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 16 Oct 2025 15:39:54 -0500 Subject: [PATCH 1/9] NEO v25.35.35096 --- Project.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 7107e98d..57aea78a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "oneAPI" uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" authors = ["Tim Besard "] -version = "2.4.0" +version = "2.4.1" [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" @@ -41,14 +41,14 @@ GPUCompiler = "1.5" GPUToolbox = "0.1, 0.2, 0.3, 1" KernelAbstractions = "0.9.1" LLVM = "6, 7, 8, 9" -NEO_jll = "=25.31.34666" +NEO_jll = "=25.35.35096" Preferences = "1" SPIRVIntrinsics = "0.2" SPIRV_LLVM_Translator_jll = "20" SpecialFunctions = "1.3, 2" StaticArrays = "1" julia = "1.10" -oneAPI_Level_Zero_Loader_jll = "1.22" +oneAPI_Level_Zero_Loader_jll = "1.24" oneAPI_Support_jll = "0.9.2" [extras] From d12615778f056521e0750223c703d0083cb97ec6 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 28 Oct 2025 09:36:41 -0500 Subject: [PATCH 2/9] Bump SPIRV_Tools --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 57aea78a..2c66f38d 100644 --- a/Project.toml +++ b/Project.toml @@ -44,7 +44,8 @@ LLVM = "6, 7, 8, 9" NEO_jll = "=25.35.35096" Preferences = "1" SPIRVIntrinsics = "0.2" -SPIRV_LLVM_Translator_jll = "20" +SPIRV_LLVM_Translator_jll = "21" +SPIRV_Tools_jll = "2025.4.0" SpecialFunctions = "1.3, 2" StaticArrays = "1" julia = "1.10" From eca38bfc8c8f35c2596276c27b76dfe0203290cf Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 28 Oct 2025 09:58:44 -0500 Subject: [PATCH 3/9] SPIRVIntrinsics 0.3 --- Project.toml | 2 +- src/array.jl | 4 ++-- src/compiler/execution.jl | 2 +- test/execution.jl | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 2c66f38d..6d53bbba 100644 --- a/Project.toml +++ b/Project.toml @@ -43,7 +43,7 @@ KernelAbstractions = "0.9.1" LLVM = "6, 7, 8, 9" NEO_jll = "=25.35.35096" Preferences = "1" -SPIRVIntrinsics = "0.2" +SPIRVIntrinsics = "0.3" SPIRV_LLVM_Translator_jll = "21" SPIRV_Tools_jll = "2025.4.0" SpecialFunctions = "1.3, 2" diff --git a/src/array.jl b/src/array.jl index edc6b449..d576cdb7 100644 --- a/src/array.jl +++ b/src/array.jl @@ -279,8 +279,8 @@ end ## interop with GPU arrays -function Base.unsafe_convert(::Type{oneDeviceArray{T,N,AS.Global}}, a::oneArray{T,N}) where {T,N} - oneDeviceArray{T,N,AS.Global}(size(a), reinterpret(LLVMPtr{T,AS.Global}, pointer(a)), +function Base.unsafe_convert(::Type{oneDeviceArray{T,N,AS.CrossWorkgroup}}, a::oneArray{T,N}) where {T,N} + oneDeviceArray{T,N,AS.CrossWorkgroup}(size(a), reinterpret(LLVMPtr{T,AS.CrossWorkgroup}, pointer(a)), a.maxsize - a.offset*Base.elsize(a)) end diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl index 7101eaae..6503a9b0 100644 --- a/src/compiler/execution.jl +++ b/src/compiler/execution.jl @@ -88,7 +88,7 @@ Adapt.adapt_storage(to::KernelAdaptor, p::ZePtr{T}) where {T} = reinterpret(Ptr{ # convert oneAPI host arrays to device arrays Adapt.adapt_storage(::KernelAdaptor, xs::oneArray{T,N}) where {T,N} = - Base.unsafe_convert(oneDeviceArray{T,N,AS.Global}, xs) + Base.unsafe_convert(oneDeviceArray{T,N,AS.CrossWorkgroup}, xs) # Base.RefValue isn't GPU compatible, so provide a compatible alternative. # TODO: port improvements from CUDA.jl diff --git a/test/execution.jl b/test/execution.jl index 596d0d8f..592c95ca 100644 --- a/test/execution.jl +++ b/test/execution.jl @@ -606,7 +606,7 @@ end # conversions from integers to pointers resulted in lost memory stores function kernel(ptr) - ptr = reinterpret(Core.LLVMPtr{Float32, AS.Global}, ptr) + ptr = reinterpret(Core.LLVMPtr{Float32, AS.CrossWorkgroup}, ptr) unsafe_store!(ptr, 42) return end From d3157272ae9964a4e1b21ec48b45eab94fd088aa Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 28 Oct 2025 10:08:31 -0500 Subject: [PATCH 4/9] SPIRVIntrinsics 0.4 --- Project.toml | 4 ++-- src/compiler/compilation.jl | 3 ++- src/oneAPI.jl | 1 + src/oneAPIKernels.jl | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 6d53bbba..0d78c2c4 100644 --- a/Project.toml +++ b/Project.toml @@ -37,13 +37,13 @@ Adapt = "4" CEnum = "0.4, 0.5" ExprTools = "0.1" GPUArrays = "11.2.1" -GPUCompiler = "1.5" +GPUCompiler = "1.6" GPUToolbox = "0.1, 0.2, 0.3, 1" KernelAbstractions = "0.9.1" LLVM = "6, 7, 8, 9" NEO_jll = "=25.35.35096" Preferences = "1" -SPIRVIntrinsics = "0.3" +SPIRVIntrinsics = "0.4" SPIRV_LLVM_Translator_jll = "21" SPIRV_Tools_jll = "2025.4.0" SpecialFunctions = "1.3, 2" diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl index 5fbcb9c9..36477995 100644 --- a/src/compiler/compilation.jl +++ b/src/compiler/compilation.jl @@ -6,7 +6,8 @@ const oneAPICompilerJob = CompilerJob{SPIRVCompilerTarget,oneAPICompilerParams} GPUCompiler.runtime_module(::oneAPICompilerJob) = oneAPI -GPUCompiler.method_table(::oneAPICompilerJob) = method_table +GPUCompiler.method_table_view(job::oneAPICompilerJob) = + GPUCompiler.StackedMethodTable(job.world, method_table, SPIRVIntrinsics.method_table) # filter out OpenCL built-ins # TODO: eagerly lower these using the translator API diff --git a/src/oneAPI.jl b/src/oneAPI.jl index b7f8b527..9e39fa9f 100644 --- a/src/oneAPI.jl +++ b/src/oneAPI.jl @@ -30,6 +30,7 @@ functional() = oneL0.functional[] import SPIRVIntrinsics SPIRVIntrinsics.@import_all SPIRVIntrinsics.@reexport_public +Base.Experimental.@MethodTable(method_table) include("device/runtime.jl") include("device/array.jl") include("device/quirks.jl") diff --git a/src/oneAPIKernels.jl b/src/oneAPIKernels.jl index 66729b57..54fcfe30 100644 --- a/src/oneAPIKernels.jl +++ b/src/oneAPIKernels.jl @@ -1,7 +1,7 @@ module oneAPIKernels using ..oneAPI -using ..oneAPI: @device_override +using ..oneAPI: @device_override, SPIRVIntrinsics, method_table import KernelAbstractions as KA From 4e198dd4ad4c5bc26962774fa4b7df647ab6a69e Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 28 Oct 2025 10:18:11 -0500 Subject: [PATCH 5/9] SPIRVIntrinsics 0.5 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0d78c2c4..8a6a2375 100644 --- a/Project.toml +++ b/Project.toml @@ -43,7 +43,7 @@ KernelAbstractions = "0.9.1" LLVM = "6, 7, 8, 9" NEO_jll = "=25.35.35096" Preferences = "1" -SPIRVIntrinsics = "0.4" +SPIRVIntrinsics = "0.5" SPIRV_LLVM_Translator_jll = "21" SPIRV_Tools_jll = "2025.4.0" SpecialFunctions = "1.3, 2" From eb2acb1819a0bcc9b75891928f0c2f8218338931 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 28 Oct 2025 13:14:11 -0500 Subject: [PATCH 6/9] opencl_builtins -> known_intrinsics --- src/compiler/compilation.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl index 36477995..aeb83f40 100644 --- a/src/compiler/compilation.jl +++ b/src/compiler/compilation.jl @@ -15,7 +15,8 @@ GPUCompiler.isintrinsic(job::oneAPICompilerJob, fn::String) = invoke(GPUCompiler.isintrinsic, Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(fn)}, job, fn) || - in(fn, opencl_builtins) + in(fn, known_intrinsics) || + contains(fn, "__spirv_") function GPUCompiler.finish_module!(job::oneAPICompilerJob, mod::LLVM.Module, entry::LLVM.Function) From cc070411e3a64089ae9b44635fa0c38cc3d231e1 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 28 Oct 2025 14:07:56 -0500 Subject: [PATCH 7/9] Fix barrier --- README.md | 2 +- src/mapreduce.jl | 2 +- src/oneAPIKernels.jl | 2 +- test/device/intrinsics.jl | 4 ++-- test/execution.jl | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8b87ccc6..f4ebdf60 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ translator](https://github.com/KhronosGroup/SPIRV-LLVM-Translator): ```julia julia> function kernel() - barrier() + barrier(0) return end diff --git a/src/mapreduce.jl b/src/mapreduce.jl index 7f3d2e55..fd1a2c71 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -16,7 +16,7 @@ # perform a reduction d = 1 while d < items - barrier() + barrier(0) index = 2 * d * (item-1) + 1 @inbounds if index <= items other_val = if index + d <= items diff --git a/src/oneAPIKernels.jl b/src/oneAPIKernels.jl index 54fcfe30..2fd144ad 100644 --- a/src/oneAPIKernels.jl +++ b/src/oneAPIKernels.jl @@ -161,7 +161,7 @@ end ## Synchronization and Printing @device_override @inline function KA.__synchronize() - barrier() + barrier(0) end @device_override @inline function KA.__print(args...) diff --git a/test/device/intrinsics.jl b/test/device/intrinsics.jl index 713d55ef..5e5605ef 100644 --- a/test/device/intrinsics.jl +++ b/test/device/intrinsics.jl @@ -226,7 +226,7 @@ end s[t] = d[t] s2[t] = 2*d[t] - barrier() + barrier(0) d[t] = s[tr] return @@ -252,7 +252,7 @@ end s[t] = d[t] s2[t] = d[t] - barrier() + barrier(0) d[t] = s[tr] return diff --git a/test/execution.jl b/test/execution.jl index 592c95ca..cd3db014 100644 --- a/test/execution.jl +++ b/test/execution.jl @@ -569,18 +569,18 @@ end r[tx] = r_[tx] - barrier() + barrier(0) for j=1:n if tx == 1 r[j] = r[j] / 2f0 end - barrier() + barrier(0) if tx > j && tx <= 4 r[tx] = r[tx] - 2f0*r[j] end - barrier() + barrier(0) end if bx == 1 From 5681011c51060ef855ac9014a0d9908cf0328476 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 30 Oct 2025 08:49:02 -0500 Subject: [PATCH 8/9] Fix checkbounds override for Int128 --- src/device/quirks.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/device/quirks.jl b/src/device/quirks.jl index ab532f40..837a03be 100644 --- a/src/device/quirks.jl +++ b/src/device/quirks.jl @@ -60,10 +60,9 @@ end # From Metal.jl to avoid widemul and Int128 @static if VERSION >= v"1.12.0-DEV.1736" # Partially reverts JuliaLang/julia PR #56750 - let BitInteger64 = Union{Int64, UInt64} - @device_override function Base.checkbounds(::Type{Bool}, v::StepRange{<:BitInteger64, <:BitInteger64}, i::BitInteger64) - @inline - return checkindex(Bool, eachindex(IndexLinear(), v), i) - end + const BitInteger64 = Union{Int64, UInt64} + @device_override function Base.checkbounds(::Type{Bool}, v::StepRange{<:BitInteger64, <:BitInteger64}, i::BitInteger64) + @inline + return checkindex(Bool, eachindex(IndexLinear(), v), i) end end From 88ee7ca9b2058ba6fddc242b530d0a4c80e3122b Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 31 Oct 2025 09:06:17 -0500 Subject: [PATCH 9/9] Add div(::Float32, ::Float32) override --- src/device/quirks.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/device/quirks.jl b/src/device/quirks.jl index 837a03be..987922a5 100644 --- a/src/device/quirks.jl +++ b/src/device/quirks.jl @@ -65,4 +65,8 @@ end @inline return checkindex(Bool, eachindex(IndexLinear(), v), i) end + + # Less accurate division for Float32 than Base Julia which relies on Float64 + # https://github.com/JuliaLang/julia/pull/49637 + @device_override Base.div(x::Float32, y::Float32) = trunc(x / y) end