diff --git a/SYCL/ESIMD/api/simd_mask.cpp b/SYCL/ESIMD/api/simd_mask.cpp new file mode 100644 index 0000000000..b4593921de --- /dev/null +++ b/SYCL/ESIMD/api/simd_mask.cpp @@ -0,0 +1,317 @@ +//==---------------- simd_mask.cpp - DPC++ ESIMD simd_mask API test -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu +// UNSUPPORTED: cuda +// RUN: %clangxx -fsycl-unnamed-lambda -fsycl -I%S/.. %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// Smoke test for simd_mask API functionality. + +#include "esimd_test_utils.hpp" + +#include +#include +#include +#include +#include +#include + +using namespace sycl::ext::intel::experimental::esimd; +using namespace cl::sycl; + +template using value_type = typename simd_mask::value_type; + +template static inline constexpr value_type Error = 0; +template static inline constexpr value_type Pass = 1; + +// Slow mask storage function independent of simd_mask::copy_to (memory) and +// simd_mask::value_type. +template +static SYCL_ESIMD_FUNCTION void store(value_type *Ptr, simd_mask M) { + value_type Arr[N]; + M.copy_to(Arr); + + for (auto I = 0; I < N; ++I) { + Ptr[I] = Arr[I] ? 1 : 0; + } +} + +// Slow mask storage function independent of simd_mask::copy_from (memory) and +// simd_mask::value_type. +template +static SYCL_ESIMD_FUNCTION simd_mask load(value_type *Ptr) { + value_type Arr[N]; + for (auto I = 0; I < N; ++I) { + Arr[I] = Ptr[I] ? 1 : 0; + } + simd_mask M(std::move(Arr)); + return M; +} + +// Apply F to each element of M and write result to Res. +template +static SYCL_ESIMD_FUNCTION void +check_mask(const simd_mask &M, typename simd_mask::value_type *Res, + PerElemF F) { + for (auto I = 0; I < N; ++I) { + value_type Val = F(M[I]) ? Pass : Error; + Res[I] = Val; + } +} + +// Slow check if M1 and M2 are equal and write result to Res. +template +static SYCL_ESIMD_FUNCTION void +check_masks_equal(const simd_mask &M1, const simd_mask &M2, + typename simd_mask::value_type *Res) { + for (auto I = 0; I < N; ++I) { + value_type Val = ((M1[I] == 0) == (M2[I] == 0)) ? Pass : Error; + Res[I] = Val; + } +} + +// Represents a generic test case. Each test case has two optional inputs - +// In and InvIn, and one mandatory output - Res. Each input and output element +// matches the simd_mask value type, there is one data element in each per +// NDRange element. InvIn is a logical inversion of In for easier validation of +// operations. +template struct sub_test { + using value_type = typename simd_mask::value_type; + + // Used to automatically free USM memory allocated for input/output. + struct usm_deleter { + queue Q; + + void operator()(value_type *Ptr) { + if (Ptr) { + sycl::free(Ptr, Q); + } + } + }; + + queue Q; + using ptr_type = std::unique_ptr; + ptr_type In; + ptr_type InvIn; + ptr_type Res; + size_t Size = N * 7; + + sub_test(queue Q, bool Need2Inputs = false) : Q(Q) { + In = ptr_type{nullptr, usm_deleter{Q}}; + InvIn = ptr_type{nullptr, usm_deleter{Q}}; + Res = ptr_type{nullptr, usm_deleter{Q}}; + init(Need2Inputs); + } + + void init(bool Need2Inputs) { + device Dev = Q.get_device(); + context Ctx = Q.get_context(); + const auto Sz = Size * sizeof(value_type); + In.reset(static_cast(malloc_shared(Sz, Dev, Ctx))); + if (Need2Inputs) + InvIn.reset(static_cast(malloc_shared(Sz, Dev, Ctx))); + Res.reset(static_cast(malloc_shared(Sz, Dev, Ctx))); + if (!In || (Need2Inputs && !InvIn) || !Res) { + throw sycl::exception(std::error_code{}, "malloc_shared failed"); + } + for (unsigned I = 0; I < Size; I += N) { + unsigned J = 0; + + for (; J < N / 2; ++J) { + auto Ind = I + J; + In.get()[Ind] = 1; + if (Need2Inputs) + InvIn.get()[Ind] = 0; + Res.get()[Ind] = Error; + } + for (; J < N; ++J) { + auto Ind = I + J; + In.get()[Ind] = 0; + if (Need2Inputs) + InvIn.get()[Ind] = 1; + Res.get()[Ind] = Error; + } + } + } + + // The main test function which submits the test kernel F. + template bool run(const char *Name, FuncType F) { + std::cout << " Running " << Name << " API test, N=" << N << "...\n"; + + // Submit the kernel. + try { + cl::sycl::range<1> R{Size / N}; + auto E = Q.submit([&](handler &CGH) { CGH.parallel_for(R, F); }); + E.wait(); + } catch (sycl::exception &Exc) { + std::cout << " *** ERROR. SYCL exception caught: << " << Exc.what() + << "\n"; + return false; + } + // Verify results - basically see if there are no non-zeros in the 'Res' + // array. + int ErrCnt = 0; + + for (auto I = 0; I < Size; ++I) { + if (Res.get()[I] == Error) { + if (++ErrCnt < 10) { + std::cout << " failed at index " << I << "\n"; + } + } + } + if (ErrCnt > 0) { + std::cout << " pass rate: " + << ((float)(Size - ErrCnt) / (float)Size) * 100.0f << "% (" + << (Size - ErrCnt) << "/" << Size << ")\n"; + } + std::cout << (ErrCnt > 0 ? " FAILED\n" : " Passed\n"); + return ErrCnt == 0; + } +}; + +// Defines actual test cases. +template struct simd_mask_api_test { + using value_type = typename simd_mask::value_type; + + bool run(queue Q) { + bool Passed = true; + // Tests for constructors and operators ! []. + { + sub_test Test(Q); + value_type *In = Test.In.get(); + value_type *Res = Test.Res.get(); + Passed &= Test.run( + "broadcast constructor, operator[]", [=](id<1> Id) SYCL_ESIMD_KERNEL { + auto Off = Id * N; + simd_mask M0 = load(In + Off); // 1..1,0...0 + simd_mask M1(M0[0]); + check_mask(M1, Res + Off, [](value_type V) { return V != 0; }); + }); + } + { + sub_test Test(Q); + value_type *Res = Test.Res.get(); + Passed &= + Test.run("default constructor", [=](id<1> Id) SYCL_ESIMD_KERNEL { + auto Off = Id * N; + simd_mask M0; + // TODO FIXME Shorter version not work due to a BE bug +#define WORKAROUND_BE_BUG +#ifdef WORKAROUND_BE_BUG + for (auto I = 0; I < N; ++I) { + if (M0[I] == 0) { + Res[Off + I] = Pass; + } + // else write Error, but its already there + } +#else + check_mask(M0, Res + Off, [](value_type V) { return (V == 0); }); +#endif // WORKAROUND_BE_BUG +#undef WORKAROUND_BE_BUG + }); + } + { + sub_test Test(Q, true /*need InInv*/); + value_type *In = Test.In.get(); + value_type *InInv = Test.InvIn.get(); + value_type *Res = Test.Res.get(); + Passed &= Test.run("operator!", [=](id<1> Id) SYCL_ESIMD_KERNEL { + auto Off = Id * N; + simd_mask M0 = load(In + Off); // 1..1,0...0 + simd_mask M1 = !M0; + simd_mask M2 = load(InInv + Off); // 0..0,1...1 + check_masks_equal(M1, M2, Res + Off); + }); + } + + // Tests for binary and assignment operators. + +#define RUN_TEST(Op, Gold) \ + { \ + sub_test Test(Q, true /*need InInv*/); \ + value_type *In = Test.In.get(); \ + value_type *InInv = Test.InvIn.get(); \ + value_type *Res = Test.Res.get(); \ + Passed &= Test.run("operator " #Op, [=](id<1> Id) SYCL_ESIMD_KERNEL { \ + auto Off = Id * N; \ + simd_mask M0 = load(In + Off); /* 1..1,0...0 */ \ + simd_mask M1 = load(InInv + Off); /* 0..0,1...1 */ \ + simd_mask M2 = M0 Op M1; \ + simd_mask MGold((value_type)Gold); \ + check_masks_equal(M2, MGold, Res + Off); \ + }); \ + } + + RUN_TEST(&&, 0); + RUN_TEST(||, 1); + RUN_TEST(&, 0); + RUN_TEST(|, 1); + RUN_TEST(^, 1); + RUN_TEST(==, 0); + RUN_TEST(!=, 1); + RUN_TEST(&=, 0); + RUN_TEST(|=, 1); + RUN_TEST(^=, 1); +#undef RUN_TEST + + // Tests for APIs that access memory. + if constexpr (N == 8 || N == 32) { + { + sub_test Test(Q); + value_type *In = Test.In.get(); + value_type *Res = Test.Res.get(); + Passed &= Test.run("load constructor", [=](id<1> Id) SYCL_ESIMD_KERNEL { + auto Off = Id * N; + simd_mask M0 = load(In + Off); + simd_mask M1(In + Off); + check_masks_equal(M0, M1, Res + Off); + }); + } + { + sub_test Test(Q); + value_type *In = Test.In.get(); + value_type *Res = Test.Res.get(); + Passed &= Test.run("copy_from", [=](id<1> Id) SYCL_ESIMD_KERNEL { + auto Off = Id * N; + simd_mask M0 = load(In + Off); + simd_mask M1; + M1.copy_from(In + Off); + check_masks_equal(M0, M1, Res + Off); + }); + } + { + sub_test Test(Q, true /*need InInv*/); + value_type *In = Test.In.get(); + value_type *InInv = Test.InvIn.get(); + value_type *Res = Test.Res.get(); + Passed &= Test.run("copy_to", [=](id<1> Id) SYCL_ESIMD_KERNEL { + auto Off = Id * N; + simd_mask M0 = load(In + Off); + M0.copy_to(InInv + Off); + simd_mask M1 = load(InInv + Off); + check_masks_equal(M0, M1, Res + Off); + }); + } + } + return Passed; + } +}; + +int main(int argc, char **argv) { + queue Q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()); + auto Dev = Q.get_device(); + std::cout << "Running on " << Dev.get_info() << "\n"; + bool Passed = true; + // Run tests for different mask size, including the one exceeding the h/w flag + // register width and being not multiple of such. + Passed &= simd_mask_api_test<8>().run(Q); + Passed &= simd_mask_api_test<32>().run(Q); + Passed &= simd_mask_api_test<67>().run(Q); + std::cout << (Passed ? "Test Passed\n" : "Test FAILED\n"); + return Passed ? 0 : 1; +}