Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions SYCL/SubGroup/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void exit_if_not_equal_vec(vec<T, N> val, vec<T, N> ref, const char *name) {
}

bool core_sg_supported(const device &Device) {
return (Device.has_extension("cl_khr_subgroups") ||
Device.get_info<info::device::version>().find(" 2.1") !=
string_class::npos);
if (Device.has_extension("cl_khr_subgroups"))
return true;
return Device.get_info<info::device::version>() >= "2.1";
}
51 changes: 49 additions & 2 deletions SYCL/SubGroup/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_bPPlfvdGShi>,
T>(Queue, T(0), ONEAPI::maximum<T>(), true, G, L);

#if __cplusplus >= 201402L
// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName,
class KernelName_fkOyLRYirfMnvBcnbRFy>,
T>(Queue, T(L), ONEAPI::plus<>(), false, G, L);
Expand All @@ -107,5 +107,52 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
check_op<
sycl_subgr<SpecializationKernelName, class KernelName_BaCGaWDMFeMFqvotbk>,
T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L);
#endif
}

template <typename SpecializationKernelName, typename T>
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
Queue, T(G), ONEAPI::multiplies<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
Queue, T(1), ONEAPI::multiplies<T>(), true, G, L);

// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulFV>, T>(
Queue, T(G), ONEAPI::multiplies<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
}

template <typename SpecializationKernelName, typename T>
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);

// Transparent operator functors
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
}
39 changes: 39 additions & 0 deletions SYCL/SubGroup/reduce_spirv13.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// UNSUPPORTED: cpu
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA

// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
// used with the operation MUL, bitwise OR, XOR, AND.

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}

check_mul<class MulA, int>(Queue);
check_mul<class MulB, unsigned int>(Queue);
check_mul<class MulC, long>(Queue);
check_mul<class MulD, unsigned long>(Queue);
check_mul<class MulE, float>(Queue);

check_bit_ops<class A, int>(Queue);
check_bit_ops<class B, unsigned int>(Queue);
check_bit_ops<class C, unsigned>(Queue);
check_bit_ops<class D, long>(Queue);
check_bit_ops<class E, unsigned long>(Queue);
check_bit_ops<class F, long long>(Queue);
check_bit_ops<class G, unsigned long long>(Queue);
return 0;
}
20 changes: 20 additions & 0 deletions SYCL/SubGroup/reduce_spirv13_fp16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
// used with MUL operation.

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulHalf, cl::sycl::half>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
27 changes: 27 additions & 0 deletions SYCL/SubGroup/reduce_spirv13_fp64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// UNSUPPORTED: cpu
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA

// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
// used with MUL operation.

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulDouble, double>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
50 changes: 48 additions & 2 deletions SYCL/SubGroup/scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<T>(), true, G, L);
}

#if __cplusplus >= 201402L
// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName, class KernelName_TPWS>, T>(
Queue, T(L), ONEAPI::plus<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_hWZv>, T>(
Expand Down Expand Up @@ -150,5 +150,51 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<>(), true, G,
L);
}
#endif
}

template <typename SpecializationKernelName, typename T>
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
Queue, T(L), ONEAPI::multiplies<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulFV>, T>(
Queue, T(L), ONEAPI::multiplies<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
}

template <typename SpecializationKernelName, typename T>
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
Queue, T(L), ONEAPI::bit_or<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
Queue, T(L), ONEAPI::bit_xor<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
Queue, T(L), ONEAPI::bit_and<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);

// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
Queue, T(L), ONEAPI::bit_or<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
Queue, T(0), ONEAPI::bit_or<>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
Queue, T(L), ONEAPI::bit_xor<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
Queue, T(0), ONEAPI::bit_xor<>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
Queue, T(L), ONEAPI::bit_and<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L);
}
39 changes: 39 additions & 0 deletions SYCL/SubGroup/scan_spirv13.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// UNSUPPORTED: cpu
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA

// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
// inclusive_scan() algoriths used with the operation MUL, bitwise OR, XOR, AND.

#include "scan.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulA, int>(Queue);
check_mul<class MulB, unsigned int>(Queue);
check_mul<class MulC, long>(Queue);
check_mul<class MulD, unsigned long>(Queue);
check_mul<class MulE, float>(Queue);

check_bit_ops<class A, int>(Queue);
check_bit_ops<class B, unsigned int>(Queue);
check_bit_ops<class C, unsigned>(Queue);
check_bit_ops<class D, long>(Queue);
check_bit_ops<class E, unsigned long>(Queue);
check_bit_ops<class F, long long>(Queue);
check_bit_ops<class G, unsigned long long>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
20 changes: 20 additions & 0 deletions SYCL/SubGroup/scan_spirv13_fp16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
// inclusive_scan() algoriths used with the MUL operation.

#include "scan.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulHalf, cl::sycl::half>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
27 changes: 27 additions & 0 deletions SYCL/SubGroup/scan_spirv13_fp64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// UNSUPPORTED: cpu
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA

// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
// inclusive_scan() algoriths used with the MUL operation.

#include "scan.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
check<class MulDouble, double>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}