diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 74b9edc20d..129eba7543 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -23,6 +23,7 @@ SYCL/DeviceCodeSplit @AlexeySachkov @Fznamznon # Device library SYCL/DeviceLib @vzakhari +SYCL/DeviceLib/ITTAnnotations @vzakhari @MrSidims @AGindinson # dot_product API SYCL/DotProduct @rdeodhar diff --git a/SYCL/DeviceLib/ITTAnnotations/atomic.cpp b/SYCL/DeviceLib/ITTAnnotations/atomic.cpp new file mode 100644 index 0000000000..290c710e73 --- /dev/null +++ b/SYCL/DeviceLib/ITTAnnotations/atomic.cpp @@ -0,0 +1,48 @@ +// UNSUPPORTED: cuda || hip + +// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.cpu.out \ +// RUN: -fsycl-targets=spir64_x86_64-unknown-unknown +// RUN: %CPU_RUN_PLACEHOLDER %t.cpu.out + +#include "CL/sycl.hpp" + +using namespace sycl; + +int main() { + queue q{}; + + int source = 42; + int target = 0; + { + buffer source_buf(&source, 1); + buffer target_buf(&target, 1); + + // Ensure that a simple kernel gets run when instrumented with + // ITT start/finish annotations and ITT atomic start/finish annotations. + q.submit([&](handler &cgh) { + auto source_acc = + source_buf.template get_access(cgh); + auto target_acc = + target_buf.template get_access(cgh); + cgh.single_task([=]() { + auto source_atomic = + ext::oneapi::atomic_ref( + source_acc[0]); + // Store source value into target + target_acc[0] = source_atomic.load(); + // Nullify source + source_atomic.store(0); + }); + }); + } + + return 0; +} diff --git a/SYCL/DeviceLib/ITTAnnotations/barrier.cpp b/SYCL/DeviceLib/ITTAnnotations/barrier.cpp new file mode 100644 index 0000000000..a671224e01 --- /dev/null +++ b/SYCL/DeviceLib/ITTAnnotations/barrier.cpp @@ -0,0 +1,45 @@ +// UNSUPPORTED: cuda || hip + +// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.cpu.out \ +// RUN: -fsycl-targets=spir64_x86_64-unknown-unknown +// RUN: %CPU_RUN_PLACEHOLDER %t.cpu.out + +#include "CL/sycl.hpp" +#include + +using namespace sycl; + +int main() { + queue q{}; + + std::vector data_vec(/*size*/ 10, /*value*/ 0); + { + range<1> num_items(data_vec.size()); + buffer buf(data_vec.data(), num_items); + range<1> local_range(2); + + // Ensure that a simple kernel gets run when instrumented with + // ITT start/finish annotations and ITT wg_barrier/wi_resume annotations. + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + accessor + local_acc(local_range, cgh); + cgh.parallel_for( + nd_range<1>(num_items, local_range), [=](nd_item<1> item) { + size_t idx = item.get_global_linear_id(); + int pos = idx & 1; + int opp = pos ^ 1; + local_acc[pos] = acc[idx]; + item.barrier(access::fence_space::local_space); + acc[idx] = local_acc[opp]; + }); + }); + } + + return 0; +}