Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions SYCL/GroupAlgorithm/exclusive_scan_sycl2020.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
// That requires either adding a switch to clang (-spirv-max-version=1.3) or
// raising the spirv version from 1.1. to 1.3 for spirv translator
// unconditionally. Using operators specific for spirv 1.3 and higher with
// -spirv-max-version=1.1 being set by default causes assert/check fails
// in spirv translator.
// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o \
%t13.out

#include "support.h"
#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <limits>
#include <numeric>
#include <vector>
using namespace sycl;

template <class SpecializationKernelName, int TestNumber>
class exclusive_scan_kernel;

template <typename SpecializationKernelName, typename InputContainer,
typename OutputContainer, class BinaryOperation>
void test(queue q, InputContainer input, OutputContainer output,
BinaryOperation binary_op,
typename OutputContainer::value_type identity) {
typedef typename InputContainer::value_type InputT;
typedef typename OutputContainer::value_type OutputT;
typedef class exclusive_scan_kernel<SpecializationKernelName, 0> kernel_name0;
typedef class exclusive_scan_kernel<SpecializationKernelName, 1> kernel_name1;
typedef class exclusive_scan_kernel<SpecializationKernelName, 2> kernel_name2;
typedef class exclusive_scan_kernel<SpecializationKernelName, 3> kernel_name3;
OutputT init = 42;
size_t N = input.size();
size_t G = 64;
std::vector<OutputT> expected(N);
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name0>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = exclusive_scan_over_group(g, in[lid], binary_op);
});
});
}
std::exclusive_scan(input.begin(), input.begin() + G, expected.begin(),
identity, binary_op);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name1>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = exclusive_scan_over_group(g, in[lid], init, binary_op);
});
});
}
std::exclusive_scan(input.begin(), input.begin() + G, expected.begin(), init,
binary_op);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name2>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_exclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), binary_op);
});
});
}
std::exclusive_scan(input.begin(), input.begin() + N, expected.begin(),
identity, binary_op);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));

{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name3>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_exclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), init, binary_op);
});
});
}
std::exclusive_scan(input.begin(), input.begin() + N, expected.begin(), init,
binary_op);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));
}

int main() {
queue q;
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}

constexpr int N = 128;
std::array<int, N> input;
std::array<int, N> output;
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

test<class KernelNamePlusV>(q, input, output, sycl::plus<>(), 0);
test<class KernelNameMinimumV>(q, input, output, sycl::minimum<>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumV>(q, input, output, sycl::maximum<>(),
std::numeric_limits<int>::lowest());

test<class KernelNamePlusI>(q, input, output, sycl::plus<int>(), 0);
test<class KernelNameMinimumI>(q, input, output, sycl::minimum<int>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumI>(q, input, output, sycl::maximum<int>(),
std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test<class KernelName_VzAPutpBRRJrQPB>(q, input, output,
sycl::multiplies<int>(), 1);
test<class KernelName_UXdGbr>(q, input, output, sycl::bit_or<int>(), 0);
test<class KernelName_saYaodNyJknrPW>(q, input, output, sycl::bit_xor<int>(),
0);
test<class KernelName_GPcuAlvAOjrDyP>(q, input, output, sycl::bit_and<int>(),
~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}
148 changes: 148 additions & 0 deletions SYCL/GroupAlgorithm/inclusive_scan_sycl2020.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
// That requires either adding a switch to clang (-spirv-max-version=1.3) or
// raising the spirv version from 1.1. to 1.3 for spirv translator
// unconditionally. Using operators specific for spirv 1.3 and higher with
// -spirv-max-version=1.1 being set by default causes assert/check fails
// in spirv translator.
// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o \
%t13.out

#include "support.h"
#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <limits>
#include <numeric>
#include <vector>
using namespace sycl;

template <class SpecializationKernelName, int TestNumber>
class inclusive_scan_kernel;

template <typename SpecializationKernelName, typename InputContainer,
typename OutputContainer, class BinaryOperation>
void test(queue q, InputContainer input, OutputContainer output,
BinaryOperation binary_op,
typename OutputContainer::value_type identity) {
typedef typename InputContainer::value_type InputT;
typedef typename OutputContainer::value_type OutputT;
typedef class inclusive_scan_kernel<SpecializationKernelName, 0> kernel_name0;
typedef class inclusive_scan_kernel<SpecializationKernelName, 1> kernel_name1;
typedef class inclusive_scan_kernel<SpecializationKernelName, 2> kernel_name2;
typedef class inclusive_scan_kernel<SpecializationKernelName, 3> kernel_name3;
OutputT init = 42;
size_t N = input.size();
size_t G = 64;
std::vector<OutputT> expected(N);
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name0>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = inclusive_scan_over_group(g, in[lid], binary_op);
});
});
}
std::inclusive_scan(input.begin(), input.begin() + G, expected.begin(),
binary_op, identity);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name1>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = inclusive_scan_over_group(g, in[lid], binary_op, init);
});
});
}
std::inclusive_scan(input.begin(), input.begin() + G, expected.begin(),
binary_op, init);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name2>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_inclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), binary_op);
});
});
}
std::inclusive_scan(input.begin(), input.begin() + N, expected.begin(),
binary_op, identity);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));

{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name3>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_inclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), binary_op, init);
});
});
}
std::inclusive_scan(input.begin(), input.begin() + N, expected.begin(),
binary_op, init);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));
}

int main() {
queue q;
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}

constexpr int N = 128;
std::array<int, N> input;
std::array<int, N> output;
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

test<class KernelNamePlusV>(q, input, output, sycl::plus<>(), 0);
test<class KernelNameMinimumV>(q, input, output, sycl::minimum<>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumV>(q, input, output, sycl::maximum<>(),
std::numeric_limits<int>::lowest());

test<class KernelNamePlusI>(q, input, output, sycl::plus<int>(), 0);
test<class KernelNameMinimumI>(q, input, output, sycl::minimum<int>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumI>(q, input, output, sycl::maximum<int>(),
std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test<class KernelName_zMyjxUrBgeUGoxmDwhvJ>(q, input, output,
sycl::multiplies<int>(), 1);
test<class KernelName_SljjtroxNRaAXoVnT>(q, input, output,
sycl::bit_or<int>(), 0);
test<class KernelName_yXIZfjwjxQGiPeQAnc>(q, input, output,
sycl::bit_xor<int>(), 0);
test<class KernelName_xGnAnMYHvqekCk>(q, input, output, sycl::bit_and<int>(),
~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}
100 changes: 100 additions & 0 deletions SYCL/GroupAlgorithm/reduce_sycl2020.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
// That requires either adding a switch to clang (-spirv-max-version=1.3) or
// raising the spirv version from 1.1. to 1.3 for spirv translator
// unconditionally. Using operators specific for spirv 1.3 and higher with
// -spirv-max-version=1.1 being set by default causes assert/check fails
// in spirv translator.
// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o \
%t13.out

#include "support.h"
#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <limits>
#include <numeric>
using namespace sycl;

template <typename SpecializationKernelName, typename InputContainer,
typename OutputContainer, class BinaryOperation>
void test(queue q, InputContainer input, OutputContainer output,
BinaryOperation binary_op,
typename OutputContainer::value_type identity) {
typedef typename InputContainer::value_type InputT;
typedef typename OutputContainer::value_type OutputT;
OutputT init = 42;
size_t N = input.size();
size_t G = 64;
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());

q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<SpecializationKernelName>(
nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[0] = reduce_over_group(g, in[lid], binary_op);
out[1] = reduce_over_group(g, in[lid], init, binary_op);
out[2] = joint_reduce(g, in.get_pointer(), in.get_pointer() + N,
binary_op);
out[3] = joint_reduce(g, in.get_pointer(), in.get_pointer() + N,
init, binary_op);
});
});
}
// std::reduce is not implemented yet, so use std::accumulate instead
assert(output[0] == std::accumulate(input.begin(), input.begin() + G,
identity, binary_op));
assert(output[1] ==
std::accumulate(input.begin(), input.begin() + G, init, binary_op));
assert(output[2] ==
std::accumulate(input.begin(), input.end(), identity, binary_op));
assert(output[3] ==
std::accumulate(input.begin(), input.end(), init, binary_op));
}

int main() {
queue q;
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}

constexpr int N = 128;
std::array<int, N> input;
std::array<int, 4> output;
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

test<class KernelNamePlusV>(q, input, output, sycl::plus<>(), 0);
test<class KernelNameMinimumV>(q, input, output, sycl::minimum<>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumV>(q, input, output, sycl::maximum<>(),
std::numeric_limits<int>::lowest());

test<class KernelNamePlusI>(q, input, output, sycl::plus<int>(), 0);
test<class KernelNameMinimumI>(q, input, output, sycl::minimum<int>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumI>(q, input, output, sycl::maximum<int>(),
std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test<class KernelName_WonwuUVPUPOTKRKIBtT>(q, input, output,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please fix kernel name here as well

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

sycl::multiplies<int>(), 1);
test<class KernelName_qYBaJDZTMGkdIwD>(q, input, output, sycl::bit_or<int>(),
0);
test<class KernelName_eLSFt>(q, input, output, sycl::bit_xor<int>(), 0);
test<class KernelName_uFhJnxSVhNAiFPTG>(q, input, output,
sycl::bit_and<int>(), ~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}