diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl index a82dfbcf46f43..062ed02252eec 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image.cl +++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl @@ -58,74 +58,74 @@ int __clc__sampled_image3d_unpack_sampler(__ocl_sampled_image3d_ro_t) __asm( // NVVM helpers struct out_16 -__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.trap"); +__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_trap_s"); struct out_16 __nvvm_suld_2d_v4i16_trap_s(long, int, - int) __asm("llvm.nvvm.suld.2d.v4i16.trap"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_trap"); struct out_16 __nvvm_suld_3d_v4i16_trap_s(long, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i16.trap"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_trap"); struct out_32 -__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.trap"); +__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_trap"); struct out_32 __nvvm_suld_2d_v4i32_trap_s(long, int, - int) __asm("llvm.nvvm.suld.2d.v4i32.trap"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_trap"); struct out_32 __nvvm_suld_3d_v4i32_trap_s(long, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i32.trap"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_trap"); struct out_16 -__nvvm_suld_1d_v4i16_clamp_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.clamp"); +__nvvm_suld_1d_v4i16_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp"); struct out_16 __nvvm_suld_2d_v4i16_clamp_s(long, int, - int) __asm("llvm.nvvm.suld.2d.v4i16.clamp"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp"); struct out_16 __nvvm_suld_3d_v4i16_clamp_s(long, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i16.clamp"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp"); struct out_32 -__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.clamp"); +__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp"); struct out_32 __nvvm_suld_2d_v4i32_clamp_s(long, int, - int) __asm("llvm.nvvm.suld.2d.v4i32.clamp"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp"); struct out_32 __nvvm_suld_3d_v4i32_clamp_s(long, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i32.clamp"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp"); struct out_16 -__nvvm_suld_1d_v4i16_zero_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.zero"); +__nvvm_suld_1d_v4i16_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_zero"); struct out_16 __nvvm_suld_2d_v4i16_zero_s(long, int, - int) __asm("llvm.nvvm.suld.2d.v4i16.zero"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_zero"); struct out_16 __nvvm_suld_3d_v4i16_zero_s(long, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i16.zero"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_zero"); struct out_32 -__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.zero"); +__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_zero"); struct out_32 __nvvm_suld_2d_v4i32_zero_s(long, int, - int) __asm("llvm.nvvm.suld.2d.v4i32.zero"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_zero"); struct out_32 __nvvm_suld_3d_v4i32_zero_s(long, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i32.zero"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_zero"); struct out_16 __nvvm_suld_1d_v4i16_clamp(read_only image1d_t, - int) __asm("llvm.nvvm.suld.1d.v4i16.clamp"); + int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp"); struct out_16 __nvvm_suld_2d_v4i16_clamp(read_only image2d_t, int, - int) __asm("llvm.nvvm.suld.2d.v4i16.clamp"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp"); struct out_16 __nvvm_suld_3d_v4i16_clamp(read_only image3d_t, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i16.clamp"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp"); struct out_32 __nvvm_suld_1d_v4i32_clamp(read_only image1d_t, - int) __asm("llvm.nvvm.suld.1d.v4i32.clamp"); + int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp"); struct out_32 __nvvm_suld_2d_v4i32_clamp(read_only image2d_t, int, - int) __asm("llvm.nvvm.suld.2d.v4i32.clamp"); + int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp"); struct out_32 __nvvm_suld_3d_v4i32_clamp(read_only image3d_t, int, int, - int) __asm("llvm.nvvm.suld.3d.v4i32.clamp"); + int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp"); void __nvvm_sust_1d_v4i16_clamp(write_only image1d_t, int, short, short, short, short) __asm("llvm.nvvm.sust.b.1d.v4i16.clamp"); diff --git a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll index f02b8b5371c7f..de92b9e0b84c0 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll +++ b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll @@ -32,4 +32,131 @@ define i32 @__clc__sampler_extract_addressing_mode_prop(i32 %sampl) nounwind alw entry: %0 = lshr i32 %sampl, 2 ret i32 %0 -} \ No newline at end of file +} + +// We need wrappers around intrinsics as intrinsics are not allowed to return named structs +declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.trap(i64, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_trap_s(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.trap(i64 %img, i32 %x); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.trap(i64, i32, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_trap(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.trap(i64 %img, i32 %x, i32 %y); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.trap(i64, i32, i32, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_trap(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.trap(i64 %img, i32 %x, i32 %y, i32 %z); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.clamp(i64, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_clamp(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.clamp(i64 %img, i32 %x); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.clamp(i64, i32, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_clamp(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.clamp(i64 %img, i32 %x, i32 %y); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.clamp(i64, i32, i32, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_clamp(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.clamp(i64 %img, i32 %x, i32 %y, i32 %z); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.zero(i64, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_zero(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.zero(i64 %img, i32 %x); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.zero(i64, i32, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_zero(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.zero(i64 %img, i32 %x, i32 %y); + ret {i16,i16,i16,i16} %0 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.zero(i64, i32, i32, i32) +define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_zero(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.zero(i64 %img, i32 %x, i32 %y, i32 %z); + ret {i16,i16,i16,i16} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.trap(i64, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_trap(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.trap(i64 %img, i32 %x); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.trap(i64, i32, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_trap(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.trap(i64 %img, i32 %x, i32 %y); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.trap(i64, i32, i32, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_trap(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.trap(i64 %img, i32 %x, i32 %y, i32 %z); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.clamp(i64, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_clamp(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.clamp(i64 %img, i32 %x); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.clamp(i64, i32, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_clamp(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.clamp(i64 %img, i32 %x, i32 %y); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.clamp(i64, i32, i32, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_clamp(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.clamp(i64 %img, i32 %x, i32 %y, i32 %z); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.zero(i64, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_zero(i64 %img, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.zero(i64 %img, i32 %x); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.zero(i64, i32, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_zero(i64 %img, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.zero(i64 %img, i32 %x, i32 %y); + ret {i32,i32,i32,i32} %0 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.zero(i64, i32, i32, i32) +define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_zero(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.zero(i64 %img, i32 %x, i32 %y, i32 %z); + ret {i32,i32,i32,i32} %0 +}