This repository was archived by the owner on Mar 28, 2023. It is now read-only.
forked from llvm/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 130
[SYCL] Add a test for group sorting algorithm #438
Merged
Merged
Changes from 2 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
ebb2c57
add a test for sorting
andreyfe1 bc185e8
add more cases
andreyfe1 0632bcd
minor fixes
andreyfe1 60a73f9
run clang-format
andreyfe1 deb5dc1
fixed the test according to the implementation
andreyfe1 4829ebc
add multidimensional groups
andreyfe1 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,361 @@ | ||
| // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out | ||
| // RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
| // RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
| // RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
| // | ||
| // Missinsg __spirv_GroupIAdd, __spirv_GroupSMin and __spirv_GroupSMax on AMD: | ||
| // XFAIL: rocm_amd | ||
|
|
||
| // TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3. | ||
| // That requires either adding a switch to clang (-spirv-max-version=1.3) or | ||
| // raising the spirv version from 1.1. to 1.3 for spirv translator | ||
| // unconditionally. Using operators specific for spirv 1.3 and higher with | ||
| // -spirv-max-version=1.1 being set by default causes assert/check fails | ||
| // in spirv translator. | ||
| // RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o \ | ||
| %t13.out | ||
|
|
||
| #include "support.h" | ||
| #include <CL/sycl.hpp> | ||
|
|
||
| #include <algorithm> | ||
| #include <iostream> | ||
| #include <random> | ||
| #include <vector> | ||
|
|
||
| namespace my_sycl = sycl::ext::oneapi; | ||
|
|
||
| auto async_handler_ = [](sycl::exception_list ex_list) { | ||
| for (auto &ex : ex_list) { | ||
| try { | ||
| std::rethrow_exception(ex); | ||
| } catch (sycl::exception &ex) { | ||
| std::cerr << ex.what() << std::endl; | ||
| std::exit(EXIT_FAILURE); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| constexpr uint32_t items_per_work_item = 4; | ||
|
|
||
| struct CustomType { | ||
| int x; | ||
| }; | ||
|
|
||
| struct CustomFunctor { | ||
| bool operator()(const CustomType &lhs, const CustomType &rhs) const { | ||
| return lhs.x < rhs.x; | ||
| } | ||
| }; | ||
|
|
||
| // we need it since using std::abs leads to compilation error | ||
| template <typename T> T my_abs(T x) { return x >= 0 ? x : -x; } | ||
|
|
||
| template <typename T> bool check(T lhs, T rhs, float epsilon) { | ||
| return my_abs(lhs - rhs) > epsilon; | ||
| } | ||
| bool check(CustomType lhs, CustomType rhs, float epsilon) { | ||
| return my_abs(lhs.x - rhs.x) > epsilon; | ||
| } | ||
|
|
||
| template <typename T> | ||
| bool verify(T *expected, T *got, std::size_t n, float epsilon) { | ||
| for (std::size_t i = 0; i < n; ++i) { | ||
| if (check(expected[i], got[i], epsilon)) { | ||
| return false; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| // forward declared classes to name kernels | ||
| template <typename... Args> class sort_over_group_kernel_name; | ||
| template <typename... Args> class joint_sort_kernel_name; | ||
| template <typename... Args> class custom_sorter_kernel_name; | ||
|
|
||
| // custom sorter | ||
| template <typename Compare> struct bubble_sorter { | ||
| Compare comp; | ||
| size_t idx; | ||
|
|
||
| template <typename Group, typename Ptr> | ||
| void operator()(Group g, Ptr begin, Ptr end) { | ||
| size_t n = end - begin; | ||
| if (idx == 0) | ||
| for (size_t i = 0; i < n; ++i) | ||
| for (size_t j = i + 1; j < n; ++j) | ||
| if (comp(begin[j], begin[i])) | ||
| std::swap(begin[i], begin[j]); | ||
| } | ||
| }; | ||
|
|
||
| template <typename T, typename Compare> | ||
| int test_sort_over_group(sycl::queue &q, std::size_t local, | ||
| sycl::buffer<T> &bufI1, Compare comp, int test_case) { | ||
| auto n = bufI1.size(); | ||
| if (n > local) | ||
| return -1; | ||
|
|
||
| sycl::range<1> local_range(local); | ||
|
|
||
| std::size_t local_memory_size = | ||
| my_sycl::experimental::default_sorter<>::memory_required<T>( | ||
| sycl::memory_scope::work_group, local_range); | ||
|
|
||
| if (local_memory_size > | ||
| q.get_device().template get_info<sycl::info::device::local_mem_size>()) | ||
| std::cout << "local_memory_size = " << local_memory_size << ", available = " | ||
| << q.get_device() | ||
| .template get_info<sycl::info::device::local_mem_size>() | ||
| << std::endl; | ||
| q.submit([&](sycl::handler &h) { | ||
| auto aI1 = sycl::accessor(bufI1, h); | ||
| sycl::accessor<std::uint8_t, 1, sycl::access_mode::read_write, | ||
| sycl::access::target::local> | ||
| scratch({local_memory_size}, h); | ||
|
|
||
| h.parallel_for<sort_over_group_kernel_name<T, Compare>>( | ||
| sycl::nd_range<1>(local_range, local_range), [=](sycl::nd_item<1> id) { | ||
| scratch[0] = std::uint8_t{}; | ||
| auto local_id = id.get_local_id(); | ||
| switch (test_case) { | ||
| case 0: | ||
| if constexpr (std::is_same_v<Compare, std::less<T>> && | ||
| !std::is_same_v<T, CustomType>) | ||
| aI1[local_id] = my_sycl::sort_over_group( | ||
| my_sycl::experimental::group_with_scratchpad( | ||
| id.get_group(), | ||
| sycl::span{&scratch[0], local_memory_size}), | ||
| aI1[local_id]); | ||
| break; | ||
| case 1: | ||
| aI1[local_id] = my_sycl::sort_over_group( | ||
| my_sycl::experimental::group_with_scratchpad( | ||
| id.get_group(), | ||
| sycl::span{&scratch[0], local_memory_size}), | ||
| aI1[local_id], comp); | ||
| break; | ||
| case 2: | ||
| aI1[local_id] = my_sycl::sort_over_group( | ||
| id.get_group(), aI1[local_id], | ||
| my_sycl::experimental::default_sorter<Compare>( | ||
| sycl::span{&scratch[0], local_memory_size})); | ||
| break; | ||
| } | ||
| }); | ||
| }).wait_and_throw(); | ||
| return 1; | ||
| } | ||
|
|
||
| template <typename T, typename Compare> | ||
| int test_joint_sort(sycl::queue &q, std::size_t n_items, std::size_t local, | ||
| sycl::buffer<T> &bufI1, Compare comp, int test_case) { | ||
| auto n = bufI1.size(); | ||
| auto n_groups = (n - 1) / n_items + 1; | ||
|
|
||
| std::size_t local_memory_size = | ||
| my_sycl::experimental::default_sorter<>::memory_required<T>( | ||
| sycl::memory_scope::work_group, n); | ||
| if (local_memory_size > | ||
| q.get_device().template get_info<sycl::info::device::local_mem_size>()) | ||
| std::cout << "local_memory_size = " << local_memory_size << ", available = " | ||
| << q.get_device() | ||
| .template get_info<sycl::info::device::local_mem_size>() | ||
| << std::endl; | ||
| q.submit([&](sycl::handler &h) { | ||
| auto aI1 = sycl::accessor(bufI1, h); | ||
| sycl::accessor<std::uint8_t, 1, sycl::access_mode::read_write, | ||
| sycl::access::target::local> | ||
| scratch({local_memory_size}, h); | ||
|
|
||
| h.parallel_for<joint_sort_kernel_name<T, Compare>>( | ||
| sycl::nd_range<1>{{n_groups * local}, {local}}, | ||
| [=](sycl::nd_item<1> id) { | ||
| auto group_id = id.get_group(0); | ||
| auto ptr_keys = &aI1[group_id * n_items]; | ||
| // auto ptr = aI.get_pointer() + group_id * n_items; | ||
andreyfe1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| scratch[0] = std::uint8_t{}; | ||
| switch (test_case) { | ||
| case 0: | ||
| if constexpr (std::is_same_v<Compare, std::less<T>> && | ||
| !std::is_same_v<T, CustomType>) | ||
| my_sycl::joint_sort( | ||
| my_sycl::experimental::group_with_scratchpad( | ||
| id.get_group(), | ||
| sycl::span{&scratch[0], local_memory_size}), | ||
| ptr_keys, | ||
| ptr_keys + sycl::min(n_items, n - group_id * n_items)); | ||
| break; | ||
| case 1: | ||
| my_sycl::joint_sort( | ||
| my_sycl::experimental::group_with_scratchpad( | ||
| id.get_group(), | ||
| sycl::span{&scratch[0], local_memory_size}), | ||
| ptr_keys, | ||
| ptr_keys + sycl::min(n_items, n - group_id * n_items), comp); | ||
| break; | ||
| case 2: | ||
| my_sycl::joint_sort( | ||
| id.get_group(), ptr_keys, | ||
| ptr_keys + sycl::min(n_items, n - group_id * n_items), | ||
| my_sycl::experimental::default_sorter<Compare>( | ||
| sycl::span{&scratch[0], local_memory_size})); | ||
| break; | ||
| } | ||
| }); | ||
| }).wait_and_throw(); | ||
| return n_groups; | ||
| } | ||
|
|
||
| template <typename T, typename Compare> | ||
| int test_custom_sorter(sycl::queue &q, sycl::buffer<T> &bufI1, Compare comp) { | ||
| std::size_t local = 256; | ||
| auto n = bufI1.size(); | ||
| if (n > local) | ||
| return -1; | ||
| local = std::min(local, n); | ||
|
|
||
| q.submit([&](sycl::handler &h) { | ||
| auto aI1 = sycl::accessor(bufI1, h); | ||
|
|
||
| h.parallel_for<custom_sorter_kernel_name<T, Compare>>( | ||
| sycl::nd_range<1>({local}, {local}), [=](sycl::nd_item<1> id) { | ||
| auto ptr = aI1.get_pointer(); | ||
|
|
||
| my_sycl::joint_sort(id.get_group(), ptr, ptr + n, | ||
| bubble_sorter<Compare>{comp, id.get_local_id()}); | ||
| }); | ||
| }).wait_and_throw(); | ||
| return 1; | ||
| } | ||
|
|
||
| template <typename T, typename Compare> | ||
| void run_sort(sycl::queue &q, std::vector<T> &in, std::size_t size, | ||
| Compare comp, int test_case, int sort_case) { | ||
| std::vector<T> in2(in.begin(), in.begin() + size); | ||
| std::vector<T> expected(in.begin(), in.begin() + size); | ||
| std::size_t local = | ||
| q.get_device() | ||
| .template get_info<sycl::info::device::max_work_group_size>(); | ||
| local = std::min(local, size); | ||
| auto n_items = items_per_work_item * local; | ||
|
|
||
| int n_groups = 1; | ||
| { // scope to destruct buffers | ||
| sycl::buffer<T> bufKeys(in2.data(), size); | ||
| { | ||
| switch (sort_case) { | ||
| case 0: | ||
| n_groups = test_sort_over_group(q, local, bufKeys, comp, test_case); | ||
| break; | ||
| case 1: | ||
| n_groups = test_joint_sort(q, n_items, local, bufKeys, comp, test_case); | ||
| break; | ||
| case 2: | ||
| n_groups = test_custom_sorter(q, bufKeys, comp); | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // check results | ||
| for (int i_group = 0; i_group < n_groups; ++i_group) { | ||
| std::sort(expected.begin() + i_group * n_items, | ||
| expected.begin() + std::min((i_group + 1) * n_items, size), comp); | ||
| } | ||
| if (n_groups != -1 && | ||
| (test_case != 0 || | ||
| test_case == 0 && std::is_same_v<Compare, std::less<T>> && | ||
| !std::is_same_v<T, CustomType>)&&!verify(expected.data(), in2.data(), | ||
| size, 0.001f)) { | ||
| std::cerr << "Verification failed \n"; | ||
| exit(1); | ||
| } | ||
| } | ||
|
|
||
| template <typename T> struct test_sort_cases { | ||
| template <typename Generator, typename Compare> | ||
| void operator()(sycl::queue &q, std::size_t dataSize, Compare comp, | ||
| Generator generate) { | ||
| std::vector<T> stationaryData(dataSize); | ||
| // fill data | ||
| for (std::size_t i = 0; i < dataSize; ++i) | ||
| stationaryData[i] = generate(i); | ||
|
|
||
| // run test | ||
| for (int test_case = 0; test_case < 3; ++test_case) { | ||
| for (int sort_case = 0; sort_case < 3; ++sort_case) { | ||
| run_sort(q, stationaryData, dataSize, comp, test_case, sort_case); | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| void test_custom_type(sycl::queue &q, std::size_t dataSize) { | ||
| std::vector<CustomType> stationaryData(dataSize, CustomType{0}); | ||
| // fill data | ||
| for (std::size_t i = 0; i < dataSize; ++i) | ||
| stationaryData[i] = CustomType{int(i)}; | ||
|
|
||
| // run test | ||
| for (int test_case = 0; test_case < 1; ++test_case) { | ||
| for (int sort_case = 0; sort_case < 3; ++sort_case) { | ||
| run_sort(q, stationaryData, dataSize, CustomFunctor{}, test_case, | ||
| sort_case); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename T, typename Compare> | ||
| void test_sort_by_comp(sycl::queue &q, std::size_t dataSize) { | ||
| std::default_random_engine generator; | ||
| std::normal_distribution<float> distribution((10.0), (2.0)); | ||
|
|
||
| T max_size = std::numeric_limits<T>::max(); | ||
| std::size_t to_fill = dataSize; | ||
| if (dataSize > max_size) | ||
| to_fill = max_size; | ||
|
|
||
| // reversed order | ||
| test_sort_cases<T>()(q, to_fill, Compare{}, | ||
| [to_fill](std::size_t i) { return T(to_fill - i - 1); }); | ||
| // filled by 1 | ||
| test_sort_cases<T>()(q, dataSize, Compare{}, | ||
| [](std::size_t) { return T(1); }); | ||
| // random distribution | ||
| test_sort_cases<T>()(q, dataSize, Compare{}, | ||
| [&distribution, &generator](std::size_t) { | ||
| return T(distribution(generator)); | ||
| }); | ||
| } | ||
|
|
||
| template <typename T> | ||
| void test_sort_by_type(sycl::queue &q, std::size_t dataSize) { | ||
| test_sort_by_comp<T, std::less<T>>(q, dataSize); | ||
| test_sort_by_comp<T, std::greater<T>>(q, dataSize); | ||
| } | ||
|
|
||
| int main(int argc, char *argv[]) { | ||
| sycl::queue q(sycl::default_selector{}, async_handler_); | ||
| if (!isSupportedDevice(q.get_device())) { | ||
| std::cout << "Skipping test\n"; | ||
| return 0; | ||
| } | ||
|
|
||
| std::vector<int> sizes{1, 2, 64, 256, 1024, 2048, 4096}; | ||
|
|
||
| for (int i = 0; i < sizes.size(); ++i) { | ||
| test_sort_by_type<std::int8_t>(q, sizes[i]); | ||
| test_sort_by_type<std::uint16_t>(q, sizes[i]); | ||
| test_sort_by_type<std::int32_t>(q, sizes[i]); | ||
| test_sort_by_type<std::uint32_t>(q, sizes[i]); | ||
| test_sort_by_type<float>(q, sizes[i]); | ||
| test_sort_by_type<sycl::half>(q, sizes[i]); | ||
| test_sort_by_type<double>(q, sizes[i]); | ||
| test_sort_by_type<std::size_t>(q, sizes[i]); | ||
|
|
||
| test_custom_type(q, sizes[i]); | ||
| } | ||
| std::cout << "Test passed." << std::endl; | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.