Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,40 @@ steps:
env:
OCL_ICD_FILENAMES: "libnvidia-opencl.so.1"
timeout_in_minutes: 60

- group: "macos"
key: "julia"
steps:
- label: "{{matrix.julia}} macos {{matrix.arch}}"
plugins:
- JuliaCI/julia#v1:
version: "{{matrix.julia}}"
- JuliaCI/julia-coverage#v1:
codecov: true
commands: |
POCL_DEBUG=err julia --project -e '
using Pkg

println("--- :julia: Instantiating project")
Pkg.add("pocl_jll")
Pkg.add("InteractiveUtils")
Pkg.develop(path="lib/intrinsics")

println("+++ :julia: Running tests")
using InteractiveUtils
InteractiveUtils.versioninfo()
Pkg.test(; coverage=true, test_args=`--platform=pocl --verbose --jobs=2`)'
agents:
queue: "juliaecosystem"
os: "macos"
arch: "{{matrix.arch}}"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 120
matrix:
setup:
julia:
- "1.10"
- "1.12"
arch:
- "x86_64"
- "aarch64"
30 changes: 24 additions & 6 deletions .github/workflows/Test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,63 @@ jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.memory_backend }} - PoCL ${{ matrix.pocl }}
runs-on: ${{ matrix.os }}
timeout-minutes: 100
timeout-minutes: 180
permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
actions: write
contents: read
strategy:
fail-fast: false
matrix:
version: ['1.10', '1.12']
os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-15, macOS-15-intel, windows-2025]
os: [ubuntu-24.04-arm, macOS-14, macOS-15, macOS-26, macOS-15-intel, windows-2022]
arch: [x64, arm64]
pocl: [jll, local]
pocl: [jll]
memory_backend: [usm, svm, buffer]
exclude:
# unsupported combinations
- os: ubuntu-24.04
arch: arm64
- os: windows-2025
- os: windows-2022
arch: arm64
- os: ubuntu-24.04-arm
arch: x64
# macOS 13 is Intel-only, while macOS 14+ only support Apple Silicon
- os: macOS-14
arch: x64
- os: macOS-15
arch: x64
- os: macOS-26
arch: x64
- os: macOS-15-intel
arch: arm64
# we only test building PoCL on Linux
- os: macOS-15-intel
pocl: local
- os: macOS-14
pocl: local
- os: macOS-15
pocl: local
- os: windows-2025
- os: macOS-26
pocl: local
- os: windows-2022
pocl: local
# Remove these exclusions once macOS CI hangs are fixed
- os: macOS-15-intel
memory_backend: svm
- os: macOS-15-intel
memory_backend: buffer
- os: macOS-14
memory_backend: svm
- os: macOS-14
memory_backend: buffer
- os: macOS-15
memory_backend: svm
- os: macOS-15
memory_backend: buffer
- os: macOS-26
memory_backend: svm
- os: macOS-26
memory_backend: buffer
steps:
- name: Checkout OpenCL.jl
uses: actions/checkout@v5
Expand Down Expand Up @@ -150,7 +166,9 @@ jobs:
uses: julia-actions/julia-runtest@v1
if: runner.os != 'Windows'
with:
test_args: '--quickfail --platform=pocl'
test_args: '--quickfail --platform=pocl --verbose --jobs=2'
env:
POCL_DEBUG: err

- name: Setup BusyBox
if: runner.os == 'Windows'
Expand Down
24 changes: 12 additions & 12 deletions lib/cl/event.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,46 +158,46 @@ function Base.wait(evts::Vector{AbstractEvent})
return evts
end

function enqueue_marker_with_wait_list(wait_for::Vector{AbstractEvent})
function enqueue_marker_with_wait_list(wait_for::Vector{AbstractEvent}; queue::CmdQueue = queue())
n_wait_events = cl_uint(length(wait_for))
wait_evt_ids = [evt.id for evt in wait_for]
ret_evt = Ref{cl_event}()
clEnqueueMarkerWithWaitList(queue(), n_wait_events,
clEnqueueMarkerWithWaitList(queue, n_wait_events,
isempty(wait_evt_ids) ? C_NULL : wait_evt_ids,
ret_evt)
@return_event ret_evt[]
end

function enqueue_barrier_with_wait_list(wait_for::Vector{AbstractEvent})
function enqueue_barrier_with_wait_list(wait_for::Vector{AbstractEvent}; queue::CmdQueue = queue())
n_wait_events = cl_uint(length(wait_for))
wait_evt_ids = [evt.id for evt in wait_for]
ret_evt = Ref{cl_event}()
clEnqueueBarrierWithWaitList(queue(), n_wait_events,
clEnqueueBarrierWithWaitList(queue, n_wait_events,
isempty(wait_evt_ids) ? C_NULL : wait_evt_ids,
ret_evt)
@return_event ret_evt[]
end

function enqueue_marker()
function enqueue_marker(; queue::CmdQueue = queue())
evt = Ref{cl_event}()
clEnqueueMarker(queue(), evt)
clEnqueueMarker(queue, evt)
@return_event evt[]
end
@deprecate enqueue_marker enqueue_marker_with_wait_list

function enqueue_wait_for_events(wait_for::Vector{T}) where {T<:AbstractEvent}
function enqueue_wait_for_events(wait_for::Vector{T}; queue::CmdQueue = queue()) where {T<:AbstractEvent}
wait_evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
clEnqueueWaitForEvents(queue(), length(wait_for), wait_evt_ids)
clEnqueueWaitForEvents(queue, length(wait_for), wait_evt_ids)
end
end

function enqueue_wait_for_events(wait_for::AbstractEvent)
enqueue_wait_for_events([wait_for])
function enqueue_wait_for_events(wait_for::AbstractEvent; queue::CmdQueue = queue())
enqueue_wait_for_events([wait_for]; queue)
end

function enqueue_barrier()
clEnqueueBarrier(queue())
function enqueue_barrier(; queue::CmdQueue = queue())
clEnqueueBarrier(queue)
return
end
@deprecate enqueue_barrier enqueue_barrier_with_wait_list
Expand Down
12 changes: 6 additions & 6 deletions lib/cl/kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ end

function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing;
global_work_offset=nothing, wait_on::Vector{Event}=Event[],
rng_state=false, nargs=nothing)
rng_state=false, nargs=nothing, queue::CmdQueue = queue())
max_work_dim = device().max_work_item_dims
work_dim = length(global_work_size)
if work_dim > max_work_dim
Expand Down Expand Up @@ -224,12 +224,12 @@ function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing;
end

ret_event = Ref{cl_event}()
clEnqueueNDRangeKernel(queue(), k, work_dim, goffset, gsize, lsize,
clEnqueueNDRangeKernel(queue, k, work_dim, goffset, gsize, lsize,
n_events, wait_event_ids, ret_event)
return Event(ret_event[], retain=false)
end

function enqueue_task(k::Kernel; wait_for=nothing)
function enqueue_task(k::Kernel; wait_for=nothing, queue::CmdQueue = queue())
n_evts = 0
evt_ids = C_NULL
#TODO: this should be split out into its own function
Expand All @@ -244,15 +244,15 @@ function enqueue_task(k::Kernel; wait_for=nothing)
end
end
ret_event = Ref{cl_event}()
clEnqueueTask(queue(), k, n_evts, evt_ids, ret_event)
clEnqueueTask(queue, k, n_evts, evt_ids, ret_event)
return ret_event[]
end

function call(
k::Kernel, args...; global_size = (1,), local_size = nothing,
global_work_offset = nothing, wait_on::Vector{Event} = Event[],
indirect_memory::Vector{AbstractMemory} = AbstractMemory[],
rng_state=false,
rng_state=false, queue::CmdQueue = queue()
)
set_args!(k, args...)
if !isempty(indirect_memory)
Expand Down Expand Up @@ -306,7 +306,7 @@ function call(
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(usm_pointers), usm_pointers)
end
end
enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on, rng_state, nargs=length(args))
enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on, rng_state, nargs=length(args), queue)
end

# From `julia/base/reflection.jl`, adjusted to add specialization on `t`.
Expand Down
30 changes: 18 additions & 12 deletions lib/cl/memory/buffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ end

# reading from buffer to host array, return an event
function enqueue_read(dst::Ptr, src::Buffer, src_off::Int, nbytes::Int;
blocking::Bool=false, wait_for::Vector{Event}=Event[])
blocking::Bool=false, wait_for::Vector{Event}=Event[],
queue::CmdQueue = queue())
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
ret_evt = Ref{cl_event}()
clEnqueueReadBuffer(queue(), src, blocking, src_off, nbytes, dst,
clEnqueueReadBuffer(queue, src, blocking, src_off, nbytes, dst,
n_evts, evt_ids, ret_evt)
@return_nanny_event(ret_evt[], dst)
end
Expand All @@ -99,12 +100,13 @@ enqueue_read(dst::Ptr, src::Buffer, nbytes; kwargs...) =

# writing from host array to buffer, return an event
function enqueue_write(dst::Buffer, dst_off::Int, src::Ptr, nbytes::Int;
blocking::Bool=false, wait_for::Vector{Event}=Event[])
blocking::Bool=false, wait_for::Vector{Event}=Event[],
queue::CmdQueue = queue())
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
ret_evt = Ref{cl_event}()
clEnqueueWriteBuffer(queue(), dst, blocking, dst_off, nbytes, src,
clEnqueueWriteBuffer(queue, dst, blocking, dst_off, nbytes, src,
n_evts, evt_ids, ret_evt)
@return_nanny_event(ret_evt[], dst)
end
Expand All @@ -115,12 +117,13 @@ enqueue_write(dst::Buffer, src::Ptr, nbytes; kwargs...) =
# copying between two buffers, return an event
function enqueue_copy(dst::Buffer, dst_off::Int, src::Buffer, src_off::Int,
nbytes::Int; blocking::Bool=false,
wait_for::Vector{Event}=Event[])
wait_for::Vector{Event}=Event[],
queue::CmdQueue = queue())
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
ret_evt = Ref{cl_event}()
clEnqueueCopyBuffer(queue(), src, dst, src_off, dst_off, nbytes,
clEnqueueCopyBuffer(queue, src, dst, src_off, dst_off, nbytes,
n_evts, evt_ids, ret_evt)
@return_event ret_evt[]
end
Expand All @@ -130,7 +133,8 @@ enqueue_copy(dst::Buffer, src::Buffer, N; kwargs...) =

# map a buffer into the host address space, returning a pointer and an event
function enqueue_map(buf::Buffer, offset::Integer, nbytes::Int, flags=:rw;
blocking::Bool=false, wait_for::Vector{Event}=Event[])
blocking::Bool=false, wait_for::Vector{Event}=Event[],
queue::CmdQueue = queue())
flags = if flags == :rw
CL_MAP_READ | CL_MAP_WRITE
elseif flags == :r
Expand All @@ -146,7 +150,7 @@ function enqueue_map(buf::Buffer, offset::Integer, nbytes::Int, flags=:rw;
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
status = Ref{Cint}()
ptr = clEnqueueMapBuffer(queue(), buf, blocking, flags, offset, nbytes,
ptr = clEnqueueMapBuffer(queue, buf, blocking, flags, offset, nbytes,
n_evts, evt_ids, ret_evt, status)
if status[] != CL_SUCCESS
throw(CLError(status[]))
Expand All @@ -159,27 +163,29 @@ enqueue_map(buf::Buffer, nbytes::Int, flags=:rw; kwargs...) =
enqueue_map(buf, 0, nbytes, flags; kwargs...)

# unmap a buffer, return an event
function enqueue_unmap(buf::Buffer, ptr::Ptr; wait_for::Vector{Event}=Event[])
function enqueue_unmap(buf::Buffer, ptr::Ptr; wait_for::Vector{Event}=Event[],
queue::CmdQueue = queue())
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
ret_evt = Ref{cl_event}()
clEnqueueUnmapMemObject(queue(), buf, ptr, n_evts, evt_ids, ret_evt)
clEnqueueUnmapMemObject(queue, buf, ptr, n_evts, evt_ids, ret_evt)
return Event(ret_evt[])
end
end

# fill a buffer with a pattern, returning an event
function enqueue_fill(buf::Buffer, offset::Integer, pattern::T, N::Integer;
wait_for::Vector{Event}=Event[]) where {T}
wait_for::Vector{Event}=Event[],
queue::CmdQueue = queue()) where {T}
nbytes = N * sizeof(T)
nbytes_pattern = sizeof(T)
@assert nbytes_pattern > 0
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve begin
ret_evt = Ref{cl_event}()
clEnqueueFillBuffer(queue(), buf, [pattern],
clEnqueueFillBuffer(queue, buf, [pattern],
nbytes_pattern, offset, nbytes,
n_evts, evt_ids, ret_evt)
@return_event ret_evt[]
Expand Down
3 changes: 2 additions & 1 deletion lib/cl/memory/usm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,15 @@ end

# fill a buffer with a pattern, returning an event
function enqueue_usm_fill(ptr::Union{Ptr, CLPtr}, pattern::T, N::Integer;
queue::CmdQueue = queue(),
wait_for::Vector{Event}=Event[]) where {T}
nbytes = N * sizeof(T)
pattern_size = sizeof(T)
n_evts = length(wait_for)
evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
GC.@preserve wait_for begin
ret_evt = Ref{cl_event}()
clEnqueueMemFillINTEL(queue(), ptr, Ref(pattern),
clEnqueueMemFillINTEL(queue, ptr, Ref(pattern),
pattern_size, nbytes,
n_evts, evt_ids, ret_evt)
@return_event ret_evt[]
Expand Down
Loading
Loading