Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libclc/ptx-nvidiacl/libspirv/group/collectives.cl
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ __CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)
result = OP(sg_x, scratch[sg_id - 1]); \
} \
} \
__spirv_ControlBarrier(Workgroup, 0, 0); \
return result; \
}

Expand Down
64 changes: 64 additions & 0 deletions sycl/test/on-device/back_to_back_collectives.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>
#include <numeric>
#include <vector>
using namespace cl::sycl;
using namespace cl::sycl::ONEAPI;

int main() {
queue q;
if (q.get_device().is_host()) {
std::cout << "Skipping test\n";
return 0;
}

// Use max work-group size to maximize chance of race
int N = q.get_device().get_info<info::device::max_work_group_size>();

std::vector<int> Input(N), Sum(N), EScan(N), IScan(N);
std::iota(Input.begin(), Input.end(), 0);
std::fill(Sum.begin(), Sum.end(), 0);
std::fill(EScan.begin(), EScan.end(), 0);
std::fill(IScan.begin(), IScan.end(), 0);

{
buffer<int> InputBuf(Input.data(), N);
buffer<int> SumBuf(Sum.data(), N);
buffer<int> EScanBuf(EScan.data(), N);
buffer<int> IScanBuf(IScan.data(), N);
q.submit([&](handler &h) {
auto Input = InputBuf.get_access<access::mode::read>(h);
auto Sum = SumBuf.get_access<access::mode::write>(h);
auto EScan = EScanBuf.get_access<access::mode::write>(h);
auto IScan = IScanBuf.get_access<access::mode::write>(h);
h.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
size_t i = it.get_global_id(0);
auto g = it.get_group();
// Loop to increase number of back-to-back calls
for (int r = 0; r < 10; ++r) {
Sum[i] = reduce(g, Input[i], plus<>());
EScan[i] = exclusive_scan(g, Input[i], plus<>());
IScan[i] = inclusive_scan(g, Input[i], plus<>());
}
});
});
}

int sum = 0;
bool passed = true;
for (int i = 0; i < N; ++i) {
passed &= (sum == EScan[i]);
sum += i;
passed &= (sum == IScan[i]);
}
for (int i = 0; i < N; ++i) {
passed &= (sum == Sum[i]);
}
std::cout << "Test passed." << std::endl;
return 0;
}