Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ using Random

if !haskey(ENV, "KA_BACKEND")
const BACKEND = CPU()
const Ts = (Float32, Float64)
else
backend = ENV["KA_BACKEND"]
if backend == "CPU"
const BACKEND = CPU()
const Ts = (Float32, Float64)
elseif backend == "CUDA"
using CUDA
const BACKEND = CUDABackend()
const Ts = (Float16, Float32, Float64)
else
error("Backend $backend not recognized")
end
Expand All @@ -31,7 +34,7 @@ end
SUITE["saxpy"] = BenchmarkGroup()

let static = BenchmarkGroup()
for T in (Float16, Float32, Float64)
for T in Ts
dtype = BenchmarkGroup()
for N in (64, 256, 512, 1024, 2048, 4096, 16384, 32768, 65536, 262144, 1048576)
dtype[N] = @benchmarkable begin
Expand All @@ -49,7 +52,7 @@ let static = BenchmarkGroup()
end

let default = BenchmarkGroup()
for T in (Float16, Float32, Float64)
for T in Ts
dtype = BenchmarkGroup()
for N in (64, 256, 512, 1024, 2048, 4096, 16384, 32768, 65536, 262144, 1048576)
dtype[N] = @benchmarkable begin
Expand Down
7 changes: 7 additions & 0 deletions src/pocl/compiler/compilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ end
supports_fp16 = "cl_khr_fp16" in dev.extensions
supports_fp64 = "cl_khr_fp64" in dev.extensions

if !supports_fp64
@warn_once "Device does not support double precision floating point operations" dev
end
if !supports_fp16
@warn_once "Device does not support half precision floating point operations" dev
end

# create GPUCompiler objects
target = SPIRVCompilerTarget(; supports_fp16, supports_fp64, version = v"1.2", kwargs...)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we assert here?

params = OpenCLCompilerParams()
Expand Down
Loading