Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions libclc/ptx-nvidiacl/libspirv/images/image.cl
Original file line number Diff line number Diff line change
Expand Up @@ -58,74 +58,74 @@ int __clc__sampled_image3d_unpack_sampler(__ocl_sampled_image3d_ro_t) __asm(

// NVVM helpers
struct out_16
__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.trap");
__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_trap_s");
struct out_16
__nvvm_suld_2d_v4i16_trap_s(long, int,
int) __asm("llvm.nvvm.suld.2d.v4i16.trap");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_trap");
struct out_16
__nvvm_suld_3d_v4i16_trap_s(long, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i16.trap");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_trap");
struct out_32
__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.trap");
__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_trap");
struct out_32
__nvvm_suld_2d_v4i32_trap_s(long, int,
int) __asm("llvm.nvvm.suld.2d.v4i32.trap");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_trap");
struct out_32
__nvvm_suld_3d_v4i32_trap_s(long, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i32.trap");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_trap");

struct out_16
__nvvm_suld_1d_v4i16_clamp_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.clamp");
__nvvm_suld_1d_v4i16_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp");
struct out_16
__nvvm_suld_2d_v4i16_clamp_s(long, int,
int) __asm("llvm.nvvm.suld.2d.v4i16.clamp");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp");
struct out_16
__nvvm_suld_3d_v4i16_clamp_s(long, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i16.clamp");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp");
struct out_32
__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.clamp");
__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp");
struct out_32
__nvvm_suld_2d_v4i32_clamp_s(long, int,
int) __asm("llvm.nvvm.suld.2d.v4i32.clamp");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp");
struct out_32
__nvvm_suld_3d_v4i32_clamp_s(long, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i32.clamp");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp");

struct out_16
__nvvm_suld_1d_v4i16_zero_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.zero");
__nvvm_suld_1d_v4i16_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_zero");
struct out_16
__nvvm_suld_2d_v4i16_zero_s(long, int,
int) __asm("llvm.nvvm.suld.2d.v4i16.zero");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_zero");
struct out_16
__nvvm_suld_3d_v4i16_zero_s(long, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i16.zero");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_zero");
struct out_32
__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.zero");
__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_zero");
struct out_32
__nvvm_suld_2d_v4i32_zero_s(long, int,
int) __asm("llvm.nvvm.suld.2d.v4i32.zero");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_zero");
struct out_32
__nvvm_suld_3d_v4i32_zero_s(long, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i32.zero");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_zero");

struct out_16
__nvvm_suld_1d_v4i16_clamp(read_only image1d_t,
int) __asm("llvm.nvvm.suld.1d.v4i16.clamp");
int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp");
struct out_16
__nvvm_suld_2d_v4i16_clamp(read_only image2d_t, int,
int) __asm("llvm.nvvm.suld.2d.v4i16.clamp");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp");
struct out_16
__nvvm_suld_3d_v4i16_clamp(read_only image3d_t, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i16.clamp");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp");
struct out_32
__nvvm_suld_1d_v4i32_clamp(read_only image1d_t,
int) __asm("llvm.nvvm.suld.1d.v4i32.clamp");
int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp");
struct out_32
__nvvm_suld_2d_v4i32_clamp(read_only image2d_t, int,
int) __asm("llvm.nvvm.suld.2d.v4i32.clamp");
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp");
struct out_32
__nvvm_suld_3d_v4i32_clamp(read_only image3d_t, int, int,
int) __asm("llvm.nvvm.suld.3d.v4i32.clamp");
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp");

void __nvvm_sust_1d_v4i16_clamp(write_only image1d_t, int, short, short, short,
short) __asm("llvm.nvvm.sust.b.1d.v4i16.clamp");
Expand Down
129 changes: 128 additions & 1 deletion libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,131 @@ define i32 @__clc__sampler_extract_addressing_mode_prop(i32 %sampl) nounwind alw
entry:
%0 = lshr i32 %sampl, 2
ret i32 %0
}
}

// We need wrappers around intrinsics as intrinsics are not allowed to return named structs
declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.trap(i64, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_trap_s(i64 %img, i32 %x) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.trap(i64 %img, i32 %x);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.trap(i64, i32, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_trap(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.trap(i64 %img, i32 %x, i32 %y);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.trap(i64, i32, i32, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_trap(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.trap(i64 %img, i32 %x, i32 %y, i32 %z);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.clamp(i64, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_clamp(i64 %img, i32 %x) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.clamp(i64 %img, i32 %x);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.clamp(i64, i32, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_clamp(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.clamp(i64 %img, i32 %x, i32 %y);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.clamp(i64, i32, i32, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_clamp(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.clamp(i64 %img, i32 %x, i32 %y, i32 %z);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.zero(i64, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_zero(i64 %img, i32 %x) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.zero(i64 %img, i32 %x);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.zero(i64, i32, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_zero(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.zero(i64 %img, i32 %x, i32 %y);
ret {i16,i16,i16,i16} %0
}

declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.zero(i64, i32, i32, i32)
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_zero(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
entry:
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.zero(i64 %img, i32 %x, i32 %y, i32 %z);
ret {i16,i16,i16,i16} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.trap(i64, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_trap(i64 %img, i32 %x) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.trap(i64 %img, i32 %x);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.trap(i64, i32, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_trap(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.trap(i64 %img, i32 %x, i32 %y);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.trap(i64, i32, i32, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_trap(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.trap(i64 %img, i32 %x, i32 %y, i32 %z);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.clamp(i64, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_clamp(i64 %img, i32 %x) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.clamp(i64 %img, i32 %x);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.clamp(i64, i32, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_clamp(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.clamp(i64 %img, i32 %x, i32 %y);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.clamp(i64, i32, i32, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_clamp(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.clamp(i64 %img, i32 %x, i32 %y, i32 %z);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.zero(i64, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_zero(i64 %img, i32 %x) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.zero(i64 %img, i32 %x);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.zero(i64, i32, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_zero(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.zero(i64 %img, i32 %x, i32 %y);
ret {i32,i32,i32,i32} %0
}

declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.zero(i64, i32, i32, i32)
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_zero(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.zero(i64 %img, i32 %x, i32 %y, i32 %z);
ret {i32,i32,i32,i32} %0
}