From dc0746d3c6cc40adf758c62e651843022ac69fcf Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 6 Oct 2025 12:28:27 -0300 Subject: [PATCH 01/26] Update Test.yml --- .github/workflows/Test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index c54588de..fbe12588 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -150,7 +150,7 @@ jobs: uses: julia-actions/julia-runtest@v1 if: runner.os != 'Windows' with: - test_args: '--quickfail --platform=pocl' + test_args: '--quickfail --platform=pocl --verbose' - name: Setup BusyBox if: runner.os == 'Windows' From 630c9ac80a054c0f154824fa7bb8250c1b407524 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 6 Oct 2025 18:31:54 -0300 Subject: [PATCH 02/26] Maybe it's a ram issue --- .github/workflows/Test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index fbe12588..f5dbdd11 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -150,7 +150,7 @@ jobs: uses: julia-actions/julia-runtest@v1 if: runner.os != 'Windows' with: - test_args: '--quickfail --platform=pocl --verbose' + test_args: '--quickfail --platform=pocl --verbose --jobs=2' - name: Setup BusyBox if: runner.os == 'Windows' From dbdf54be9622690df83908f36dbe7eeb820e5e63 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 6 Oct 2025 18:30:06 -0300 Subject: [PATCH 03/26] Maybe it's a virtualization issue --- .buildkite/pipeline.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 02476bb5..bbd4e885 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -21,3 +21,40 @@ steps: env: OCL_ICD_FILENAMES: "libnvidia-opencl.so.1" timeout_in_minutes: 60 + + - group: "macos" + key: "julia" + steps: + - label: "{{matrix.julia}} macos {{matrix.arch}}" + plugins: + - JuliaCI/julia#v1: + version: {{matrix.julia}} + - JuliaCI/julia-coverage#v1: + codecov: true + commands: | + julia --project -e ' + using Pkg + + println("--- :julia: Instantiating project") + Pkg.add("pocl_jll") + Pkg.add("InteractiveUtils") + Pkg.develop(path="lib/intrinsics") + + println("+++ :julia: Running tests") + using InteractiveUtils + InteractiveUtils.versioninfo() + Pkg.test(; coverage=true, test_args=`--platform=pocl --verbose --jobs=2`)' + agents: + queue: "juliaecosystem" + os: "macos" + arch: {{matrix.arch}} + if: build.message !~ /\[skip tests\]/ + timeout_in_minutes: 120 + matrix: + setup: + julia: + - "1.10" + - "1.12" + arch: + - "x86_64" + - "aarch64" From 5182b0e889f99b3f92ad8377a6e7fdce9c230d06 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Thu, 23 Oct 2025 13:01:58 -0300 Subject: [PATCH 04/26] Fewer tests --- .github/workflows/Test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index f5dbdd11..b57085ab 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -23,9 +23,9 @@ jobs: fail-fast: false matrix: version: ['1.10', '1.12'] - os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-15, macOS-15-intel, windows-2025] + os: [ubuntu-24.04-arm, macOS-15, macOS-15-intel, windows-2025] arch: [x64, arm64] - pocl: [jll, local] + pocl: [jll] memory_backend: [usm, svm, buffer] exclude: # unsupported combinations From 2507586e6de0931b31f7b87edb231d8ab771a226 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Thu, 23 Oct 2025 13:23:03 -0300 Subject: [PATCH 05/26] fdsg --- .buildkite/pipeline.yml | 62 ++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index bbd4e885..9d2066c2 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -25,36 +25,36 @@ steps: - group: "macos" key: "julia" steps: - - label: "{{matrix.julia}} macos {{matrix.arch}}" - plugins: - - JuliaCI/julia#v1: - version: {{matrix.julia}} - - JuliaCI/julia-coverage#v1: - codecov: true - commands: | - julia --project -e ' - using Pkg + - label: "{{matrix.julia}} macos {{matrix.arch}}" + plugins: + - JuliaCI/julia#v1: + version: {{matrix.julia}} + - JuliaCI/julia-coverage#v1: + codecov: true + commands: | + julia --project -e ' + using Pkg - println("--- :julia: Instantiating project") - Pkg.add("pocl_jll") - Pkg.add("InteractiveUtils") - Pkg.develop(path="lib/intrinsics") + println("--- :julia: Instantiating project") + Pkg.add("pocl_jll") + Pkg.add("InteractiveUtils") + Pkg.develop(path="lib/intrinsics") - println("+++ :julia: Running tests") - using InteractiveUtils - InteractiveUtils.versioninfo() - Pkg.test(; coverage=true, test_args=`--platform=pocl --verbose --jobs=2`)' - agents: - queue: "juliaecosystem" - os: "macos" - arch: {{matrix.arch}} - if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 120 - matrix: - setup: - julia: - - "1.10" - - "1.12" - arch: - - "x86_64" - - "aarch64" + println("+++ :julia: Running tests") + using InteractiveUtils + InteractiveUtils.versioninfo() + Pkg.test(; coverage=true, test_args=`--platform=pocl --verbose --jobs=2`)' + agents: + queue: "juliaecosystem" + os: "macos" + arch: {{matrix.arch}} + if: build.message !~ /\[skip tests\]/ + timeout_in_minutes: 120 + matrix: + setup: + julia: + - "1.10" + - "1.12" + arch: + - "x86_64" + - "aarch64" From 0a06dade6bde89354badec364da91114829f1147 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Thu, 23 Oct 2025 13:24:08 -0300 Subject: [PATCH 06/26] jg --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 9d2066c2..755c5324 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -28,7 +28,7 @@ steps: - label: "{{matrix.julia}} macos {{matrix.arch}}" plugins: - JuliaCI/julia#v1: - version: {{matrix.julia}} + version: "{{matrix.julia}}" - JuliaCI/julia-coverage#v1: codecov: true commands: | From abdc7ca8c94b0e18f7d13848b3c1b8e3e2a82e22 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Thu, 23 Oct 2025 13:24:58 -0300 Subject: [PATCH 07/26] yifri --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 755c5324..47123461 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -47,7 +47,7 @@ steps: agents: queue: "juliaecosystem" os: "macos" - arch: {{matrix.arch}} + arch: "{{matrix.arch}}" if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 120 matrix: From 3fd52f744bec9a9d708aced0a17a153dbb598358 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 31 Oct 2025 15:05:47 -0300 Subject: [PATCH 08/26] Test --- .github/workflows/Test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index b57085ab..d68d4ac7 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -151,6 +151,8 @@ jobs: if: runner.os != 'Windows' with: test_args: '--quickfail --platform=pocl --verbose --jobs=2' + env: + POCL_WORK_GROUP_METHOD: cbs - name: Setup BusyBox if: runner.os == 'Windows' From fa0aba58813992cfc679bc7314a8f4fb64cfc319 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 31 Oct 2025 17:03:59 -0300 Subject: [PATCH 09/26] jutcdf --- test/runtests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 6fd437f2..59210b4d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -46,7 +46,9 @@ do_verbose, _ = extract_flag!(ARGS, "--verbose") do_quickfail, _ = extract_flag!(ARGS, "--quickfail") include("setup.jl") # make sure everything is precompiled + @info "System information:\n" * sprint(io->OpenCL.versioninfo(io)) +@info read(`launchctl limit maxfiles`, String) @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." From c05588b7c54a8a451d023a473673180df0156d41 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 31 Oct 2025 17:06:17 -0300 Subject: [PATCH 10/26] htydc --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 59210b4d..1d71bd83 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -48,7 +48,7 @@ do_quickfail, _ = extract_flag!(ARGS, "--quickfail") include("setup.jl") # make sure everything is precompiled @info "System information:\n" * sprint(io->OpenCL.versioninfo(io)) -@info read(`launchctl limit maxfiles`, String) +Sys.isapple() && @info read(`launchctl limit maxfiles`, String) @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." From c446dff55b19e16ea736d79a5811f17caf711f31 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 1 Nov 2025 20:09:43 -0300 Subject: [PATCH 11/26] tfeag --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index 1d71bd83..3e4f69b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -49,6 +49,7 @@ include("setup.jl") # make sure everything is precompiled @info "System information:\n" * sprint(io->OpenCL.versioninfo(io)) Sys.isapple() && @info read(`launchctl limit maxfiles`, String) +Sys.isapple() && @info read(`ulimit -a`, String) @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." From 85e8cb6b343f74ea35b098bb606a1cbebfe30f40 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 1 Nov 2025 20:18:07 -0300 Subject: [PATCH 12/26] netnhrs --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index 3e4f69b0..2402b123 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -50,6 +50,7 @@ include("setup.jl") # make sure everything is precompiled @info "System information:\n" * sprint(io->OpenCL.versioninfo(io)) Sys.isapple() && @info read(`launchctl limit maxfiles`, String) Sys.isapple() && @info read(`ulimit -a`, String) +Sys.isapple() && @info read(`df -h`, String) @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." From 6dccab3a60ec5287b9c083176367858b2dcb454d Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 1 Nov 2025 20:31:39 -0300 Subject: [PATCH 13/26] uyf --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index 2402b123..a4aba831 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -51,6 +51,7 @@ include("setup.jl") # make sure everything is precompiled Sys.isapple() && @info read(`launchctl limit maxfiles`, String) Sys.isapple() && @info read(`ulimit -a`, String) Sys.isapple() && @info read(`df -h`, String) +Sys.isapple() && @info read(`vm_stat`, String) @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." From 5dfdb234417b77164a7a102d783fff683308b063 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 1 Nov 2025 21:46:09 -0300 Subject: [PATCH 14/26] 8y7g8 --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index a4aba831..17f848ac 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -50,7 +50,7 @@ include("setup.jl") # make sure everything is precompiled @info "System information:\n" * sprint(io->OpenCL.versioninfo(io)) Sys.isapple() && @info read(`launchctl limit maxfiles`, String) Sys.isapple() && @info read(`ulimit -a`, String) -Sys.isapple() && @info read(`df -h`, String) +Sys.isapple() && @info read(`df -YIh`, String) Sys.isapple() && @info read(`vm_stat`, String) @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." From 7cdca80e04c13b4b3657b2f465d8a986b3b8c97e Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 1 Nov 2025 22:03:07 -0300 Subject: [PATCH 15/26] hyrxdjutdyr --- test/intrinsics.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/intrinsics.jl b/test/intrinsics.jl index 69150e5e..1876af70 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -18,7 +18,7 @@ const simd_ns = [2, 3, 4, 8, 16] @testset "intrinsics" begin @testset "barrier" begin - +@warn "Barrier" @on_device barrier(OpenCL.LOCAL_MEM_FENCE) @on_device barrier(OpenCL.GLOBAL_MEM_FENCE) @on_device barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE) @@ -40,6 +40,7 @@ cl.memory_backend() isa cl.SVMBackend && @on_device work_group_barrier(OpenCL.LO end @testset "mem_fence" begin +@warn "mem_fence" @on_device mem_fence(OpenCL.LOCAL_MEM_FENCE) @on_device mem_fence(OpenCL.GLOBAL_MEM_FENCE) @@ -56,6 +57,7 @@ end end @testset "atomic_work_item_fence" begin +@warn "atomic_work_item_fence" @on_device atomic_work_item_fence(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_order_relaxed, OpenCL.memory_scope_work_item) @on_device atomic_work_item_fence(OpenCL.GLOBAL_MEM_FENCE, OpenCL.memory_order_acquire, OpenCL.memory_scope_work_group) @@ -67,6 +69,7 @@ cl.memory_backend() isa cl.SVMBackend && @on_device atomic_work_item_fence(OpenC end @testset "math" begin +@warn "math" @testset "unary - $T" for T in float_types @testset "$f" for f in [ @@ -104,6 +107,7 @@ end hypot, (^), ] + @warn "binary - $T, $f" x = rand(T) y = rand(T) broken = ispocl && T == Float16 && f == atan @@ -115,6 +119,7 @@ end @testset "$f" for f in [ fma, ] + @warn "ternary - $T, $f" x = rand(T) y = rand(T) z = rand(T) @@ -131,6 +136,8 @@ end OpenCL.rint, OpenCL.rsqrt, ] + @warn "OpenCL-specific unary - $T, $f" + x = rand(T) broken = ispocl && T == Float16 && !(f in [OpenCL.rint, OpenCL.rsqrt]) @test call_on_device(f, x) isa Real broken = broken # Just check it doesn't error @@ -149,6 +156,8 @@ end OpenCL.nextafter, OpenCL.powr, ] + @warn "OpenCL-specific binary - $T, $f" + x = rand(T) y = rand(T) broken = ispocl && T == Float16 && !(f in [OpenCL.maxmag, OpenCL.minmag]) @@ -162,6 +171,8 @@ end x = rand(T) y = rand(T) z = rand(T) + @warn "OpenCL-specific ternary - $T, $f" + @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z end From dcbd9700953463b57b490168592d38af98e93da9 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 1 Nov 2025 22:34:09 -0300 Subject: [PATCH 16/26] faevv --- test/intrinsics.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/test/intrinsics.jl b/test/intrinsics.jl index 1876af70..ff7d330e 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -18,23 +18,38 @@ const simd_ns = [2, 3, 4, 8, 16] @testset "intrinsics" begin @testset "barrier" begin -@warn "Barrier" +@warn "Barrier Local Mem fence" @on_device barrier(OpenCL.LOCAL_MEM_FENCE) +@warn "Barrier global Mem fence" @on_device barrier(OpenCL.GLOBAL_MEM_FENCE) +@warn "Barrier both Mem fence" @on_device barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE) +@warn "WorkGroup Barrier Local Mem fence" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE) +@warn "WorkGroup Barrier global Mem fence" @on_device work_group_barrier(OpenCL.GLOBAL_MEM_FENCE) +@warn "WorkGroup Barrier image Mem fence" @on_device work_group_barrier(OpenCL.IMAGE_MEM_FENCE) +@warn "Barrier" +@warn "WorkGroup Barrier L/G Mem fence" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE) +@warn "WorkGroup Barrier L/I Mem fence" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.IMAGE_MEM_FENCE) +@warn "WorkGroup Barrier L/G/I Mem fence" @on_device work_group_barrier(OpenCL.GLOBAL_MEM_FENCE | OpenCL.LOCAL_MEM_FENCE | OpenCL.IMAGE_MEM_FENCE) +@warn "Barrier" +@warn "WorkGroup Barrier Local Mem fence, work item scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_work_item) +@warn "WorkGroup Barrier Local Mem fence, workgroup scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_work_group) +@warn "WorkGroup Barrier Local Mem fence, device scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_device) +@warn "Skipped" cl.memory_backend() isa cl.SVMBackend && @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_all_svm_devices) +@warn "WorkGroup Barrier Local Mem fence, subgroup scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_sub_group) end @@ -171,7 +186,7 @@ end x = rand(T) y = rand(T) z = rand(T) - @warn "OpenCL-specific ternary - $T, $f" + @warn "OpenCL-specific ternary - $T" @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z end From da8eb1020860fad17b71c224dbb03cfe7e9003a5 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 1 Nov 2025 22:46:04 -0300 Subject: [PATCH 17/26] utf --- test/intrinsics.jl | 50 ++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/test/intrinsics.jl b/test/intrinsics.jl index ff7d330e..9efd9ded 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -18,39 +18,23 @@ const simd_ns = [2, 3, 4, 8, 16] @testset "intrinsics" begin @testset "barrier" begin -@warn "Barrier Local Mem fence" -@on_device barrier(OpenCL.LOCAL_MEM_FENCE) -@warn "Barrier global Mem fence" -@on_device barrier(OpenCL.GLOBAL_MEM_FENCE) -@warn "Barrier both Mem fence" -@on_device barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE) - -@warn "WorkGroup Barrier Local Mem fence" -@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE) -@warn "WorkGroup Barrier global Mem fence" -@on_device work_group_barrier(OpenCL.GLOBAL_MEM_FENCE) -@warn "WorkGroup Barrier image Mem fence" -@on_device work_group_barrier(OpenCL.IMAGE_MEM_FENCE) - -@warn "Barrier" -@warn "WorkGroup Barrier L/G Mem fence" -@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE) -@warn "WorkGroup Barrier L/I Mem fence" -@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.IMAGE_MEM_FENCE) -@warn "WorkGroup Barrier L/G/I Mem fence" -@on_device work_group_barrier(OpenCL.GLOBAL_MEM_FENCE | OpenCL.LOCAL_MEM_FENCE | OpenCL.IMAGE_MEM_FENCE) - -@warn "Barrier" -@warn "WorkGroup Barrier Local Mem fence, work item scope" -@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_work_item) -@warn "WorkGroup Barrier Local Mem fence, workgroup scope" -@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_work_group) -@warn "WorkGroup Barrier Local Mem fence, device scope" -@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_device) -@warn "Skipped" -cl.memory_backend() isa cl.SVMBackend && @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_all_svm_devices) -@warn "WorkGroup Barrier Local Mem fence, subgroup scope" -@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_sub_group) +@time "Barrier Local Mem fence" @on_device barrier(OpenCL.LOCAL_MEM_FENCE) +@time "Barrier global Mem fence" @on_device barrier(OpenCL.GLOBAL_MEM_FENCE) +@time "Barrier both Mem fence" @on_device barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE) + +@time "WorkGroup Barrier Local Mem fence" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE) +@time "WorkGroup Barrier global Mem fence" @on_device work_group_barrier(OpenCL.GLOBAL_MEM_FENCE) +@time "WorkGroup Barrier image Mem fence" @on_device work_group_barrier(OpenCL.IMAGE_MEM_FENCE) + +@time "WorkGroup Barrier L/G Mem fence" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE) +@time "WorkGroup Barrier L/I Mem fence" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.IMAGE_MEM_FENCE) +@time "WorkGroup Barrier L/G/I Mem fence" @on_device work_group_barrier(OpenCL.GLOBAL_MEM_FENCE | OpenCL.LOCAL_MEM_FENCE | OpenCL.IMAGE_MEM_FENCE) + +@time "WorkGroup Barrier Local Mem fence, work item scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_work_item) +@time "WorkGroup Barrier Local Mem fence, workgroup scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_work_group) +@time "WorkGroup Barrier Local Mem fence, device scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_device) +@time "Skipped" cl.memory_backend() isa cl.SVMBackend && @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_all_svm_devices) +@time "WorkGroup Barrier Local Mem fence, subgroup scope" @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_sub_group) end From 75afde500c8684fe1a13f98d52ea71f8b5f14963 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:11:32 -0400 Subject: [PATCH 18/26] POCL 7.0 --- test/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index 57ae7ff9..a7103ec5 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -24,4 +24,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd" [compat] -pocl_jll = "7.0" +pocl_jll = "~7.0" From 6a49f8eb7c8b7a302d5a65171c42424492f0c46c Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:53:29 -0400 Subject: [PATCH 19/26] uiygf --- test/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index a7103ec5..57ae7ff9 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -24,4 +24,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd" [compat] -pocl_jll = "~7.0" +pocl_jll = "7.0" From 634e64fb6ff5a4335b590e07722fb308f1ddb84f Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 3 Nov 2025 15:31:29 -0400 Subject: [PATCH 20/26] Windows --- .github/workflows/Test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index d68d4ac7..d95b2c40 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: version: ['1.10', '1.12'] - os: [ubuntu-24.04-arm, macOS-15, macOS-15-intel, windows-2025] + os: [ubuntu-24.04-arm, macOS-15, macOS-15-intel, windows-2022] arch: [x64, arm64] pocl: [jll] memory_backend: [usm, svm, buffer] @@ -31,7 +31,7 @@ jobs: # unsupported combinations - os: ubuntu-24.04 arch: arm64 - - os: windows-2025 + - os: windows-2022 arch: arm64 - os: ubuntu-24.04-arm arch: x64 @@ -45,7 +45,7 @@ jobs: pocl: local - os: macOS-15 pocl: local - - os: windows-2025 + - os: windows-2022 pocl: local # Remove these exclusions once macOS CI hangs are fixed - os: macOS-15-intel From bad16c2bfb996bcb08abcf2ce360b8333e18999a Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:47:15 -0400 Subject: [PATCH 21/26] Test newer macos --- .github/workflows/Test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index d95b2c40..9399d743 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -15,7 +15,7 @@ jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.memory_backend }} - PoCL ${{ matrix.pocl }} runs-on: ${{ matrix.os }} - timeout-minutes: 100 + timeout-minutes: 120 permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created actions: write contents: read @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: version: ['1.10', '1.12'] - os: [ubuntu-24.04-arm, macOS-15, macOS-15-intel, windows-2022] + os: [ubuntu-24.04-arm, macOS-26, macOS-15-intel, windows-2022] arch: [x64, arm64] pocl: [jll] memory_backend: [usm, svm, buffer] @@ -36,14 +36,14 @@ jobs: - os: ubuntu-24.04-arm arch: x64 # macOS 13 is Intel-only, while macOS 14+ only support Apple Silicon - - os: macOS-15 + - os: macOS-26 arch: x64 - os: macOS-15-intel arch: arm64 # we only test building PoCL on Linux - os: macOS-15-intel pocl: local - - os: macOS-15 + - os: macOS-26 pocl: local - os: windows-2022 pocl: local @@ -52,9 +52,9 @@ jobs: memory_backend: svm - os: macOS-15-intel memory_backend: buffer - - os: macOS-15 + - os: macOS-26 memory_backend: svm - - os: macOS-15 + - os: macOS-26 memory_backend: buffer steps: - name: Checkout OpenCL.jl From d14942ee7aa13de58403c3a2ba236c583d56e10b Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 25 Nov 2025 22:46:20 -0400 Subject: [PATCH 22/26] More macos --- .github/workflows/Test.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index 9399d743..87bc0b53 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: version: ['1.10', '1.12'] - os: [ubuntu-24.04-arm, macOS-26, macOS-15-intel, windows-2022] + os: [ubuntu-24.04-arm, macOS-14, macOS-15, macOS-26, macOS-15-intel, windows-2022] arch: [x64, arm64] pocl: [jll] memory_backend: [usm, svm, buffer] @@ -36,6 +36,10 @@ jobs: - os: ubuntu-24.04-arm arch: x64 # macOS 13 is Intel-only, while macOS 14+ only support Apple Silicon + - os: macOS-14 + arch: x64 + - os: macOS-15 + arch: x64 - os: macOS-26 arch: x64 - os: macOS-15-intel @@ -43,6 +47,10 @@ jobs: # we only test building PoCL on Linux - os: macOS-15-intel pocl: local + - os: macOS-14 + pocl: local + - os: macOS-15 + pocl: local - os: macOS-26 pocl: local - os: windows-2022 @@ -52,6 +60,14 @@ jobs: memory_backend: svm - os: macOS-15-intel memory_backend: buffer + - os: macOS-14 + memory_backend: svm + - os: macOS-14 + memory_backend: buffer + - os: macOS-15 + memory_backend: svm + - os: macOS-15 + memory_backend: buffer - os: macOS-26 memory_backend: svm - os: macOS-26 From bd3c137aafb62918c2c299a04e24c31fba6aa96b Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Wed, 26 Nov 2025 13:59:08 -0400 Subject: [PATCH 23/26] sergev --- .buildkite/pipeline.yml | 2 +- .github/workflows/Test.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 47123461..91cf3583 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -32,7 +32,7 @@ steps: - JuliaCI/julia-coverage#v1: codecov: true commands: | - julia --project -e ' + POCL_DEBUG=err julia --project -e ' using Pkg println("--- :julia: Instantiating project") diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index 87bc0b53..77d1350a 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -15,7 +15,7 @@ jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.memory_backend }} - PoCL ${{ matrix.pocl }} runs-on: ${{ matrix.os }} - timeout-minutes: 120 + timeout-minutes: 180 permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created actions: write contents: read @@ -168,7 +168,7 @@ jobs: with: test_args: '--quickfail --platform=pocl --verbose --jobs=2' env: - POCL_WORK_GROUP_METHOD: cbs + POCL_DEBUG: err - name: Setup BusyBox if: runner.os == 'Windows' From a15d0754b4dbbb5c8dd695476d29bdda2bb8a59f Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 1 Dec 2025 16:31:38 -0400 Subject: [PATCH 24/26] jkfh --- test/runtests.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 17f848ac..96e8764f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -53,6 +53,10 @@ Sys.isapple() && @info read(`ulimit -a`, String) Sys.isapple() && @info read(`df -YIh`, String) Sys.isapple() && @info read(`vm_stat`, String) +if Sys.isapple() + run(`sysctl hw.optional`) +end + @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." # choose tests From a7fb1df4aba273297ab10b3fbe288d192e6fd359 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:14:43 -0400 Subject: [PATCH 25/26] Allow selecting queue --- lib/cl/event.jl | 24 ++++++++++++------------ lib/cl/kernel.jl | 12 ++++++------ lib/cl/memory/buffer.jl | 30 ++++++++++++++++++------------ lib/cl/memory/usm.jl | 3 ++- 4 files changed, 38 insertions(+), 31 deletions(-) diff --git a/lib/cl/event.jl b/lib/cl/event.jl index 42a914cf..ca1edf2a 100644 --- a/lib/cl/event.jl +++ b/lib/cl/event.jl @@ -158,46 +158,46 @@ function Base.wait(evts::Vector{AbstractEvent}) return evts end -function enqueue_marker_with_wait_list(wait_for::Vector{AbstractEvent}) +function enqueue_marker_with_wait_list(wait_for::Vector{AbstractEvent}; queue::CmdQueue = queue()) n_wait_events = cl_uint(length(wait_for)) wait_evt_ids = [evt.id for evt in wait_for] ret_evt = Ref{cl_event}() - clEnqueueMarkerWithWaitList(queue(), n_wait_events, + clEnqueueMarkerWithWaitList(queue, n_wait_events, isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, ret_evt) @return_event ret_evt[] end -function enqueue_barrier_with_wait_list(wait_for::Vector{AbstractEvent}) +function enqueue_barrier_with_wait_list(wait_for::Vector{AbstractEvent}; queue::CmdQueue = queue()) n_wait_events = cl_uint(length(wait_for)) wait_evt_ids = [evt.id for evt in wait_for] ret_evt = Ref{cl_event}() - clEnqueueBarrierWithWaitList(queue(), n_wait_events, + clEnqueueBarrierWithWaitList(queue, n_wait_events, isempty(wait_evt_ids) ? C_NULL : wait_evt_ids, ret_evt) @return_event ret_evt[] end -function enqueue_marker() +function enqueue_marker(; queue::CmdQueue = queue()) evt = Ref{cl_event}() - clEnqueueMarker(queue(), evt) + clEnqueueMarker(queue, evt) @return_event evt[] end @deprecate enqueue_marker enqueue_marker_with_wait_list -function enqueue_wait_for_events(wait_for::Vector{T}) where {T<:AbstractEvent} +function enqueue_wait_for_events(wait_for::Vector{T}; queue::CmdQueue = queue()) where {T<:AbstractEvent} wait_evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve wait_for begin - clEnqueueWaitForEvents(queue(), length(wait_for), wait_evt_ids) + clEnqueueWaitForEvents(queue, length(wait_for), wait_evt_ids) end end -function enqueue_wait_for_events(wait_for::AbstractEvent) - enqueue_wait_for_events([wait_for]) +function enqueue_wait_for_events(wait_for::AbstractEvent; queue::CmdQueue = queue()) + enqueue_wait_for_events([wait_for]; queue) end -function enqueue_barrier() - clEnqueueBarrier(queue()) +function enqueue_barrier(; queue::CmdQueue = queue()) + clEnqueueBarrier(queue) return end @deprecate enqueue_barrier enqueue_barrier_with_wait_list diff --git a/lib/cl/kernel.jl b/lib/cl/kernel.jl index d0243130..a7a44b54 100644 --- a/lib/cl/kernel.jl +++ b/lib/cl/kernel.jl @@ -158,7 +158,7 @@ end function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing; global_work_offset=nothing, wait_on::Vector{Event}=Event[], - rng_state=false, nargs=nothing) + rng_state=false, nargs=nothing, queue::CmdQueue = queue()) max_work_dim = device().max_work_item_dims work_dim = length(global_work_size) if work_dim > max_work_dim @@ -224,12 +224,12 @@ function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing; end ret_event = Ref{cl_event}() - clEnqueueNDRangeKernel(queue(), k, work_dim, goffset, gsize, lsize, + clEnqueueNDRangeKernel(queue, k, work_dim, goffset, gsize, lsize, n_events, wait_event_ids, ret_event) return Event(ret_event[], retain=false) end -function enqueue_task(k::Kernel; wait_for=nothing) +function enqueue_task(k::Kernel; wait_for=nothing, queue::CmdQueue = queue()) n_evts = 0 evt_ids = C_NULL #TODO: this should be split out into its own function @@ -244,7 +244,7 @@ function enqueue_task(k::Kernel; wait_for=nothing) end end ret_event = Ref{cl_event}() - clEnqueueTask(queue(), k, n_evts, evt_ids, ret_event) + clEnqueueTask(queue, k, n_evts, evt_ids, ret_event) return ret_event[] end @@ -252,7 +252,7 @@ function call( k::Kernel, args...; global_size = (1,), local_size = nothing, global_work_offset = nothing, wait_on::Vector{Event} = Event[], indirect_memory::Vector{AbstractMemory} = AbstractMemory[], - rng_state=false, + rng_state=false, queue::CmdQueue = queue() ) set_args!(k, args...) if !isempty(indirect_memory) @@ -306,7 +306,7 @@ function call( clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(usm_pointers), usm_pointers) end end - enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on, rng_state, nargs=length(args)) + enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on, rng_state, nargs=length(args), queue) end # From `julia/base/reflection.jl`, adjusted to add specialization on `t`. diff --git a/lib/cl/memory/buffer.jl b/lib/cl/memory/buffer.jl index dfd198d8..b9e265af 100644 --- a/lib/cl/memory/buffer.jl +++ b/lib/cl/memory/buffer.jl @@ -84,12 +84,13 @@ end # reading from buffer to host array, return an event function enqueue_read(dst::Ptr, src::Buffer, src_off::Int, nbytes::Int; - blocking::Bool=false, wait_for::Vector{Event}=Event[]) + blocking::Bool=false, wait_for::Vector{Event}=Event[], + queue::CmdQueue = queue()) n_evts = length(wait_for) evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve wait_for begin ret_evt = Ref{cl_event}() - clEnqueueReadBuffer(queue(), src, blocking, src_off, nbytes, dst, + clEnqueueReadBuffer(queue, src, blocking, src_off, nbytes, dst, n_evts, evt_ids, ret_evt) @return_nanny_event(ret_evt[], dst) end @@ -99,12 +100,13 @@ enqueue_read(dst::Ptr, src::Buffer, nbytes; kwargs...) = # writing from host array to buffer, return an event function enqueue_write(dst::Buffer, dst_off::Int, src::Ptr, nbytes::Int; - blocking::Bool=false, wait_for::Vector{Event}=Event[]) + blocking::Bool=false, wait_for::Vector{Event}=Event[], + queue::CmdQueue = queue()) n_evts = length(wait_for) evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve wait_for begin ret_evt = Ref{cl_event}() - clEnqueueWriteBuffer(queue(), dst, blocking, dst_off, nbytes, src, + clEnqueueWriteBuffer(queue, dst, blocking, dst_off, nbytes, src, n_evts, evt_ids, ret_evt) @return_nanny_event(ret_evt[], dst) end @@ -115,12 +117,13 @@ enqueue_write(dst::Buffer, src::Ptr, nbytes; kwargs...) = # copying between two buffers, return an event function enqueue_copy(dst::Buffer, dst_off::Int, src::Buffer, src_off::Int, nbytes::Int; blocking::Bool=false, - wait_for::Vector{Event}=Event[]) + wait_for::Vector{Event}=Event[], + queue::CmdQueue = queue()) n_evts = length(wait_for) evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve wait_for begin ret_evt = Ref{cl_event}() - clEnqueueCopyBuffer(queue(), src, dst, src_off, dst_off, nbytes, + clEnqueueCopyBuffer(queue, src, dst, src_off, dst_off, nbytes, n_evts, evt_ids, ret_evt) @return_event ret_evt[] end @@ -130,7 +133,8 @@ enqueue_copy(dst::Buffer, src::Buffer, N; kwargs...) = # map a buffer into the host address space, returning a pointer and an event function enqueue_map(buf::Buffer, offset::Integer, nbytes::Int, flags=:rw; - blocking::Bool=false, wait_for::Vector{Event}=Event[]) + blocking::Bool=false, wait_for::Vector{Event}=Event[], + queue::CmdQueue = queue()) flags = if flags == :rw CL_MAP_READ | CL_MAP_WRITE elseif flags == :r @@ -146,7 +150,7 @@ function enqueue_map(buf::Buffer, offset::Integer, nbytes::Int, flags=:rw; evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve wait_for begin status = Ref{Cint}() - ptr = clEnqueueMapBuffer(queue(), buf, blocking, flags, offset, nbytes, + ptr = clEnqueueMapBuffer(queue, buf, blocking, flags, offset, nbytes, n_evts, evt_ids, ret_evt, status) if status[] != CL_SUCCESS throw(CLError(status[])) @@ -159,19 +163,21 @@ enqueue_map(buf::Buffer, nbytes::Int, flags=:rw; kwargs...) = enqueue_map(buf, 0, nbytes, flags; kwargs...) # unmap a buffer, return an event -function enqueue_unmap(buf::Buffer, ptr::Ptr; wait_for::Vector{Event}=Event[]) +function enqueue_unmap(buf::Buffer, ptr::Ptr; wait_for::Vector{Event}=Event[], + queue::CmdQueue = queue()) n_evts = length(wait_for) evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve wait_for begin ret_evt = Ref{cl_event}() - clEnqueueUnmapMemObject(queue(), buf, ptr, n_evts, evt_ids, ret_evt) + clEnqueueUnmapMemObject(queue, buf, ptr, n_evts, evt_ids, ret_evt) return Event(ret_evt[]) end end # fill a buffer with a pattern, returning an event function enqueue_fill(buf::Buffer, offset::Integer, pattern::T, N::Integer; - wait_for::Vector{Event}=Event[]) where {T} + wait_for::Vector{Event}=Event[], + queue::CmdQueue = queue()) where {T} nbytes = N * sizeof(T) nbytes_pattern = sizeof(T) @assert nbytes_pattern > 0 @@ -179,7 +185,7 @@ function enqueue_fill(buf::Buffer, offset::Integer, pattern::T, N::Integer; evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve begin ret_evt = Ref{cl_event}() - clEnqueueFillBuffer(queue(), buf, [pattern], + clEnqueueFillBuffer(queue, buf, [pattern], nbytes_pattern, offset, nbytes, n_evts, evt_ids, ret_evt) @return_event ret_evt[] diff --git a/lib/cl/memory/usm.jl b/lib/cl/memory/usm.jl index a12bc52f..a182cd35 100644 --- a/lib/cl/memory/usm.jl +++ b/lib/cl/memory/usm.jl @@ -176,6 +176,7 @@ end # fill a buffer with a pattern, returning an event function enqueue_usm_fill(ptr::Union{Ptr, CLPtr}, pattern::T, N::Integer; + queue::CmdQueue = queue(), wait_for::Vector{Event}=Event[]) where {T} nbytes = N * sizeof(T) pattern_size = sizeof(T) @@ -183,7 +184,7 @@ function enqueue_usm_fill(ptr::Union{Ptr, CLPtr}, pattern::T, N::Integer; evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for] GC.@preserve wait_for begin ret_evt = Ref{cl_event}() - clEnqueueMemFillINTEL(queue(), ptr, Ref(pattern), + clEnqueueMemFillINTEL(queue, ptr, Ref(pattern), pattern_size, nbytes, n_evts, evt_ids, ret_evt) @return_event ret_evt[] From d77e639906146bef20281ca51ac8926719e9eb1f Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:14:47 -0400 Subject: [PATCH 26/26] gfb --- test/setup.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/setup.jl b/test/setup.jl index 90337d36..1d5062e0 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -155,9 +155,13 @@ macro on_device(ex...) $code return end - - @opencl $(kwargs...) $kernel() - cl.finish(cl.queue()) + @info "Pre-queue" + queue = cl.queue() + @info "Post-queue $(queue)" + @opencl queue $(kwargs...) $kernel() + @info "Post-launch" + cl.finish(queue) + @info "Post-sync" end end) end