From e628aa4ca079421f38e6fd7bea2998d85224cb7e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 3 Nov 2025 17:36:30 +0100 Subject: [PATCH 1/3] Add test for accumulate issue --- test/test.jl | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/test/test.jl b/test/test.jl index 241deb5c8..8e3bd7136 100644 --- a/test/test.jl +++ b/test/test.jl @@ -371,5 +371,54 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk end end + # from https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 + @kernel function unaliased_accumulate!(output, input, n) + i, j = @index(Global, NTuple) + + for k in j:n + output[i, j] += input[i, k] + end + end + + @kernel function unaliased_accumulate_local!(output, input, n) + i, j = @index(Global, NTuple) + + # Use local accumulator + sum_val = zero(eltype(output)) + for k in j:n + sum_val += input[i, k] + end + output[i, j] = sum_val + end + + @testset "unaliased accumulate" begin + backend = Backend() + N = 8 + M = 5 + + input = Float32[i + k for i in 1:M, k in 1:N] + + reference = zeros(Float32, M, N) + for i in 1:M + for j in 1:N + for k in j:N + reference[i, j] += input[i, k] + end + end + end + + # Allocate device arrays + input = adapt(backend, input) + output = KernelAbstractions.zeros(backend, Float32, M, N) + + # Perform accumulation + unaliased_accumulate!(backend)(output, input, N; ndrange = size(output)) + @test adapt(Array, output) == reference + + fill!(output, 0) + unaliased_accumulate_local!(backend)(output, input, N; ndrange = size(output)) + @test adapt(Array, output) == reference + end + return end From 653637bb82df53b25fe6fcf7e34df9018b0d91fa Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 3 Nov 2025 17:42:42 +0100 Subject: [PATCH 2/3] disable alias-scope due to misscompilations on 1.11 --- src/macros.jl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 86d5b0bcb..257e1cc49 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -90,7 +90,10 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices) pushfirst!(def[:args], :__ctx__) new_stmts = Expr[] body = MacroTools.flatten(def[:body]) - push!(new_stmts, Expr(:aliasscope)) + # On 1.11 and later having this aliasscope causes issues + # even with kernels that don't use `@Const` on arguments + # See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 + # push!(new_stmts, Expr(:aliasscope)) if !unsafe_indices push!(new_stmts, :(__active_lane__ = $__validindex(__ctx__))) end @@ -105,7 +108,7 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices) if force_inbounds push!(new_stmts, Expr(:inbounds, :pop)) end - push!(new_stmts, Expr(:popaliasscope)) + # push!(new_stmts, Expr(:popaliasscope)) push!(new_stmts, :(return nothing)) def[:body] = Expr( :let, @@ -132,7 +135,10 @@ function transform_cpu!(def, constargs, force_inbounds) pushfirst!(def[:args], :__ctx__) new_stmts = Expr[] body = MacroTools.flatten(def[:body]) - push!(new_stmts, Expr(:aliasscope)) + # On 1.11 and later having this aliasscope causes issues + # even with kernels that don't use `@Const` on arguments + # See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 + # push!(new_stmts, Expr(:aliasscope)) if force_inbounds push!(new_stmts, Expr(:inbounds, true)) end @@ -140,7 +146,7 @@ function transform_cpu!(def, constargs, force_inbounds) if force_inbounds push!(new_stmts, Expr(:inbounds, :pop)) end - push!(new_stmts, Expr(:popaliasscope)) + # push!(new_stmts, Expr(:popaliasscope)) push!(new_stmts, :(return nothing)) def[:body] = Expr( :let, From 218abb9cfd130e066b2b7117543b7ab0642ff54f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 3 Nov 2025 18:03:08 +0100 Subject: [PATCH 3/3] use aliasscope when user uses Const --- src/macros.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 257e1cc49..696008727 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -87,13 +87,14 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices) push!(let_constargs, :($arg = $constify($arg))) end end + has_constargs = !isempty(let_constargs) pushfirst!(def[:args], :__ctx__) new_stmts = Expr[] body = MacroTools.flatten(def[:body]) # On 1.11 and later having this aliasscope causes issues # even with kernels that don't use `@Const` on arguments # See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 - # push!(new_stmts, Expr(:aliasscope)) + has_constargs && push!(new_stmts, Expr(:aliasscope)) if !unsafe_indices push!(new_stmts, :(__active_lane__ = $__validindex(__ctx__))) end @@ -108,7 +109,7 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices) if force_inbounds push!(new_stmts, Expr(:inbounds, :pop)) end - # push!(new_stmts, Expr(:popaliasscope)) + has_constargs && push!(new_stmts, Expr(:popaliasscope)) push!(new_stmts, :(return nothing)) def[:body] = Expr( :let, @@ -132,13 +133,14 @@ function transform_cpu!(def, constargs, force_inbounds) push!(let_constargs, :($arg = $constify($arg))) end end + has_constargs = !isempty(let_constargs) pushfirst!(def[:args], :__ctx__) new_stmts = Expr[] body = MacroTools.flatten(def[:body]) # On 1.11 and later having this aliasscope causes issues # even with kernels that don't use `@Const` on arguments # See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 - # push!(new_stmts, Expr(:aliasscope)) + has_constargs && push!(new_stmts, Expr(:aliasscope)) if force_inbounds push!(new_stmts, Expr(:inbounds, true)) end @@ -146,7 +148,7 @@ function transform_cpu!(def, constargs, force_inbounds) if force_inbounds push!(new_stmts, Expr(:inbounds, :pop)) end - # push!(new_stmts, Expr(:popaliasscope)) + has_constargs && push!(new_stmts, Expr(:popaliasscope)) push!(new_stmts, :(return nothing)) def[:body] = Expr( :let,