diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_intrin.hpp index 124f69e41e771..4fb011c2b5421 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_intrin.hpp @@ -19,7 +19,6 @@ #include #define __SEIEED sycl::ext::intel::experimental::esimd::detail -#define __SEIEE sycl::ext::intel::experimental::esimd // \brief __esimd_rdregion: region access intrinsic. // @@ -125,14 +124,14 @@ template __esimd_wrregion(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, uint16_t Offset, - __SEIEE::mask_type_t Mask = 1); + __SEIEED::simd_mask_impl_t Mask = 1); template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_wrindirect(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, __SEIEED::vector_type_t Offset, - __SEIEE::mask_type_t Mask = 1); + __SEIEED::simd_mask_impl_t Mask = 1); __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -286,7 +285,7 @@ template __esimd_wrregion(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, uint16_t Offset, - __SEIEE::mask_type_t Mask) { + __SEIEED::simd_mask_impl_t Mask) { uint16_t EltOffset = Offset / sizeof(T); assert(Offset % sizeof(T) == 0); @@ -310,7 +309,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_wrindirect(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, __SEIEED::vector_type_t Offset, - __SEIEE::mask_type_t Mask) { + __SEIEED::simd_mask_impl_t Mask) { __SEIEED::vector_type_t Result = OldVal; for (int i = 0; i < M; ++i) { if (Mask[i]) { @@ -325,5 +324,4 @@ __esimd_wrindirect(__SEIEED::vector_type_t OldVal, #endif // __SYCL_DEVICE_ONLY__ -#undef __SEIEE #undef __SEIEED diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_memory_intrin.hpp index f667c6733e916..91473a091dbe2 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_memory_intrin.hpp @@ -85,7 +85,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_read(__SEIEED::vector_type_t addrs, int ElemsPerAddr = NumBlk, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_impl_t pred = 1); // flat_write does flat-address scatter template addrs, __SEIEED::vector_type_t vals, - int ElemsPerAddr = NumBlk, __SEIEED::vector_type_t pred = 1); + int ElemsPerAddr = NumBlk, __SEIEED::simd_mask_impl_t pred = 1); // flat_block_read reads a block of data from one flat address template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __esimd_flat_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_impl_t pred = 1); // flat_write does flat-address scatter template addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_impl_t pred = 1); // Low-level surface-based gather. Collects elements located at given offsets in // a surface and returns them as a single \ref simd object. Element can be @@ -205,7 +205,7 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void -__esimd_surf_write(__SEIEED::vector_type_t pred, int16_t scale, +__esimd_surf_write(__SEIEED::simd_mask_impl_t pred, int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, __SEIEED::vector_type_t elem_offsets, __SEIEED::vector_type_t vals) @@ -229,7 +229,7 @@ template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N, __SEIEE::CacheHint L3H = __SEIEE::CacheHint::None> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_impl_t pred); template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None, @@ -237,7 +237,7 @@ template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N, SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_impl_t pred); template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None, @@ -246,7 +246,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_impl_t pred); // esimd_barrier, generic group barrier SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_barrier(); @@ -262,14 +262,14 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_fence(uint8_t cntl); template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_impl_t pred = 1); // slm_write does SLM scatter template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_impl_t pred = 1); // slm_block_read reads a block of data from SLM template @@ -286,33 +286,33 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_impl_t pred = 1); // slm_write4 does SLM scatter4 template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_write4(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_impl_t pred = 1); // slm_atomic: SLM atomic template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_impl_t pred); template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_impl_t pred); template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_impl_t pred); // Media block load // @@ -418,9 +418,9 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, - uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, + __SEIEED::simd_mask_impl_t pred, uint8_t numSrc0, + uint8_t numSrc1, uint8_t numDst, uint8_t sfid, + uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgSrc1, __SEIEED::vector_type_t msgDst); @@ -454,9 +454,9 @@ __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numDst, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, + __SEIEED::simd_mask_impl_t pred, uint8_t numSrc0, + uint8_t numDst, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgDst); @@ -485,13 +485,11 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, /// @param msgSrc1 the second source operand of send message. /// template -SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void -__esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgSrc1); +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_raw_sends_store( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_impl_t pred, + uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgSrc1); /// \brief Raw send store. /// @@ -515,9 +513,8 @@ __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, - uint32_t msgDesc, + __SEIEED::simd_mask_impl_t pred, uint8_t numSrc0, + uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0); #ifndef __SYCL_DEVICE_ONLY__ @@ -525,7 +522,7 @@ template inline __SEIEED::vector_type_t __esimd_flat_read(__SEIEED::vector_type_t addrs, int ElemsPerAddr, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk); __SEIEED::vector_type_t V; ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr); @@ -551,7 +548,7 @@ template inline __SEIEED::vector_type_t __esimd_flat_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t V; unsigned int Next = 0; @@ -601,7 +598,7 @@ inline void __esimd_flat_write( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - int ElemsPerAddr, __SEIEED::vector_type_t pred) { + int ElemsPerAddr, __SEIEED::simd_mask_impl_t pred) { auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk); ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr); @@ -626,7 +623,7 @@ template addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t V; unsigned int Next = 0; @@ -830,7 +827,7 @@ inline void __esimd_slm_fence(uint8_t cntl) {} template inline __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -839,7 +836,7 @@ __esimd_slm_read(__SEIEED::vector_type_t addrs, template inline void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::simd_mask_impl_t pred) {} // slm_block_read reads a block of data from SLM template @@ -857,7 +854,7 @@ inline void __esimd_slm_block_write(uint32_t addr, template inline __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -867,13 +864,13 @@ template inline void __esimd_slm_write4(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::simd_mask_impl_t pred) {} // slm_atomic: SLM atomic template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N> inline __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -882,7 +879,7 @@ template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N> inline __SEIEED::vector_type_t __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -892,7 +889,7 @@ inline __SEIEED::vector_type_t __esimd_slm_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -901,7 +898,7 @@ template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N, __SEIEE::CacheHint L1H, __SEIEE::CacheHint L3H> inline __SEIEED::vector_type_t __esimd_flat_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -911,7 +908,7 @@ template <__SEIEE::EsimdAtomicOpType Op, typename Ty, int N, inline __SEIEED::vector_type_t __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -922,7 +919,7 @@ inline __SEIEED::vector_type_t __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_impl_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -986,14 +983,12 @@ inline uint32_t __esimd_get_value(AccessorTy acc) { /// template -inline __SEIEED::vector_type_t -__esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, - uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgSrc1, - __SEIEED::vector_type_t msgDst) { +inline __SEIEED::vector_type_t __esimd_raw_sends_load( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_impl_t pred, + uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t sfid, + uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgSrc1, + __SEIEED::vector_type_t msgDst) { throw cl::sycl::feature_not_supported(); return 0; } @@ -1025,13 +1020,11 @@ __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, /// Returns a simd vector of type Ty1 and size N1. /// template -inline __SEIEED::vector_type_t -__esimd_raw_send_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numDst, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgDst) { +inline __SEIEED::vector_type_t __esimd_raw_send_load( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_impl_t pred, + uint8_t numSrc0, uint8_t numDst, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgDst) { throw cl::sycl::feature_not_supported(); return 0; } @@ -1062,7 +1055,7 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, /// template inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, + __SEIEED::simd_mask_impl_t pred, uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, @@ -1092,7 +1085,7 @@ inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, /// template inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, + __SEIEED::simd_mask_impl_t pred, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0) { diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_types.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_types.hpp index fd13b8b4ce08d..0148cebea855e 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_types.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/esimd_types.hpp @@ -248,19 +248,23 @@ inline std::istream &operator>>(std::istream &I, half &rhs) { rhs = ValFloat; return I; } -} // namespace detail -// TODO @rolandschulz on May 21 -// {quote} -// - The mask should also be a wrapper around the clang - vector type rather -// than the clang - vector type itself. -// - The internal storage should be implementation defined.uint16_t is a bad -// choice for some HW.Nor is it how clang - vector types works(using the same -// size int as the corresponding vector type used for comparison(e.g. long for -// double and int for float)). -template -using mask_type_t = typename detail::vector_type::type; +// internal implementation for the mask type +template struct mask_impl { + static_assert(N > 0, "mask must have at least one element"); + + static constexpr int length = N; + using type = T __attribute__((ext_vector_type(N))); + using value_type = T; +}; + +template using mask_impl_t = typename mask_impl::type; + +template using simd_mask_impl = mask_impl; +template using simd_mask_impl_t = typename simd_mask_impl::type; + +} // namespace detail } // namespace esimd } // namespace experimental } // namespace intel diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/esimd.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/esimd.hpp index 266720eefbd2c..ae634470db878 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/esimd.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/esimd.hpp @@ -14,6 +14,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -32,6 +33,7 @@ namespace esimd { /// \ingroup sycl_esimd template class simd { template friend class simd_view; + template friend class simd_mask; public: /// The underlying builtin data type. @@ -127,7 +129,7 @@ template class simd { /// Whole region update with predicates. void merge(const simd &Val, const mask_type_t &Mask) { set(__esimd_wrregion( - data(), Val.data(), 0, Mask)); + data(), Val.data(), 0, Mask.data())); } void merge(const simd &Val1, simd Val2, const mask_type_t &Mask) { Val2.merge(Val1, Mask); @@ -247,18 +249,12 @@ template class simd { #undef DEF_BINOP - // TODO @rolandschulz, @mattkretz - // Introduce simd_mask type and let user use this type instead of specific - // type representation (simd) to make it more portable - // TODO @iburyl should be mask_type_t, which might become more abstracted in - // the future revisions. - // #define DEF_RELOP(RELOP) \ - ESIMD_INLINE friend simd operator RELOP(const simd &X, \ - const simd &Y) { \ + ESIMD_INLINE friend simd_mask operator RELOP(const simd &X, \ + const simd &Y) { \ auto R = X.data() RELOP Y.data(); \ - mask_type_t M(1); \ - return M & detail::convert>(R); \ + using mask_elem_t = std::remove_reference_t; \ + return simd_mask::template create(R); \ } DEF_RELOP(>) @@ -627,6 +623,31 @@ ESIMD_INLINE #endif // __SYCL_DEVICE_ONLY__ } +// Some simd_mask member definitions. Put here because their implementation +// needs full definition of the simd class. + +template +simd_mask::simd_mask(const simd &v) noexcept { + set(__builtin_convertvector(v.data(), simd_mask_impl_t)); +} + +template simd_mask::simd_mask(simd &&v) noexcept { + set(__builtin_convertvector(v.data(), simd_mask_impl_t)); +} + +template +simd_mask & +simd_mask::operator=(const simd &v) noexcept { + set(__builtin_convertvector(v.data(), simd_mask_impl_t)); + return *this; +} + +template +simd_mask &simd_mask::operator=(simd &&v) noexcept { + set(__builtin_convertvector(v.data(), simd_mask_impl_t)); + return *this; +} + } // namespace esimd } // namespace experimental } // namespace intel diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/esimd_math.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/esimd_math.hpp index 9602994980443..8ad908c0112ff 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/esimd_math.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/esimd_math.hpp @@ -1108,8 +1108,8 @@ ESIMD_NODEBUG ESIMD_INLINE esimd_atan(simd src0, int flag = GENX_NOSAT) { simd Src0 = esimd_abs(src0); - simd Neg = src0 < T(0.0); - simd Gt1 = Src0 > T(1.0); + simd_mask Neg = src0 < T(0.0); + simd_mask Gt1 = Src0 > T(1.0); Src0.merge(esimd_inv(Src0), Gt1); @@ -1151,8 +1151,8 @@ ESIMD_NODEBUG ESIMD_INLINE esimd_acos(simd src0, int flag = GENX_NOSAT) { simd Src0 = esimd_abs(src0); - simd Neg = src0 < T(0.0); - simd TooBig = Src0 >= T(0.999998); + simd_mask Neg = src0 < T(0.0); + simd_mask TooBig = Src0 >= T(0.999998); // Replace oversized values to ensure no possibility of sqrt of // a negative value later @@ -1194,7 +1194,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t::value, simd> esimd_asin(simd src0, int flag = GENX_NOSAT) { - simd Neg = src0 < T(0.0); + simd_mask Neg = src0 < T(0.0); simd Result = T(ESIMD_HDR_CONST_PI / 2.0) - esimd_acos(esimd_abs(src0)); @@ -1487,7 +1487,7 @@ ESIMD_INLINE simd esimd_atan2_fast(simd y, simd x, simd a1; simd atan2; - simd mask = (y >= 0.0f); + simd_mask mask = (y >= 0.0f); a0.merge(ESIMD_CONST_PI * 0.5f, ESIMD_CONST_PI * 1.5f, mask); a1.merge(0, ESIMD_CONST_PI * 2.0f, mask); @@ -1523,7 +1523,7 @@ ESIMD_INLINE simd esimd_atan2(simd y, simd x, simd v_distance; simd v_y0; simd atan2; - simd mask; + simd_mask mask; mask = (x < 0); v_y0.merge(ESIMD_CONST_PI, 0, mask); @@ -1541,10 +1541,10 @@ template <> ESIMD_INLINE float esimd_atan2(float y, float x, const uint flags) { float v_distance; float v_y0; simd atan2; - unsigned short mask; + simd_mask<1> mask; mask = (x < 0); - v_y0 = mask ? ESIMD_CONST_PI : 0; + v_y0 = mask[0] ? ESIMD_CONST_PI : 0; v_distance = esimd_sqrt(x * x + y * y); mask = (esimd_abs(y) < 0.000001f); atan2.merge(v_y0, (2 * esimd_atan((v_distance - x) / y)), mask); diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/esimd_memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/esimd_memory.hpp index e2f76ab5e5950..a6434fc0d10ba 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/esimd_memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/esimd_memory.hpp @@ -76,7 +76,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< ((n == 8 || n == 16 || n == 32) && (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), simd> -gather(T *p, simd offsets, simd pred = 1) { +gather(T *p, simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); @@ -126,7 +126,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), void> scatter(T *p, simd vals, simd offsets, - simd pred = 1) { + simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); addrs = addrs + offsets_i; @@ -317,7 +317,7 @@ ESIMD_INLINE ESIMD_NODEBUG !std::is_pointer::value, void> scatter(AccessorTy acc, simd vals, simd offsets, - uint32_t glob_offset = 0, simd pred = 1) { + uint32_t glob_offset = 0, simd_mask pred = simd_mask{1}) { constexpr int TypeSizeLog2 = detail::ElemsPerAddrEncoding(); // TODO (performance) use hardware-supported scale once BE supports it @@ -338,19 +338,19 @@ ESIMD_INLINE ESIMD_NODEBUG #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); __esimd_surf_write( - pred, scale, surf_ind, glob_offset, offsets, promo_vals); + pred.data(), scale, surf_ind, glob_offset, offsets, promo_vals); #else __esimd_surf_write( - pred, scale, acc, glob_offset, offsets, promo_vals); + pred.data(), scale, acc, glob_offset, offsets, promo_vals); #endif } else { #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); __esimd_surf_write( - pred, scale, surf_ind, glob_offset, offsets, vals); + pred.data(), scale, surf_ind, glob_offset, offsets, vals); #else __esimd_surf_write( - pred, scale, acc, glob_offset, offsets, vals); + pred.data(), scale, acc, glob_offset, offsets, vals); #endif } } @@ -385,7 +385,7 @@ template > - gather4(T *p, simd offsets, simd pred = 1) { + gather4(T *p, simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); @@ -401,7 +401,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 16 || n == 32) && (sizeof(T) == 4), void> scatter4(T *p, simd vals, - simd offsets, simd pred = 1) { + simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); addrs = addrs + offsets_i; @@ -525,7 +525,7 @@ template (), simd> - flat_atomic(T *p, simd offset, simd pred) { + flat_atomic(T *p, simd offset, simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -540,7 +540,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> flat_atomic(T *p, simd offset, simd src0, - simd pred) { + simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -556,7 +556,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> flat_atomic(T *p, simd offset, simd src0, - simd src1, simd pred) { + simd src1, simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -625,8 +625,8 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void slm_init(uint32_t size); template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 16 || n == 32), simd> - slm_load(simd offsets, simd pred = 1) { - return __esimd_slm_read(offsets.data(), pred.data()); + slm_load(simd offsets, simd_mask Pred = 1) { + return __esimd_slm_read(offsets.data(), Pred.data()); } /// SLM scatter. @@ -634,7 +634,7 @@ template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 16 || n == 32), void> slm_store(simd vals, simd offsets, - simd pred = 1) { + simd_mask pred = 1) { __esimd_slm_write(offsets.data(), vals.data(), pred.data()); } @@ -646,7 +646,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 8 || n == 16 || n == 32) && (sizeof(T) == 4), simd> - slm_load4(simd offsets, simd pred = 1) { + slm_load4(simd offsets, simd_mask pred = 1) { return __esimd_slm_read4(offsets.data(), pred.data()); } @@ -655,7 +655,7 @@ template typename sycl::detail::enable_if_t< (n == 8 || n == 16 || n == 32) && (sizeof(T) == 4), void> slm_store4(simd vals, simd offsets, - simd pred = 1) { + simd_mask pred = 1) { __esimd_slm_write4(offsets.data(), vals.data(), pred.data()); } @@ -698,7 +698,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> - slm_atomic(simd offsets, simd pred) { + slm_atomic(simd offsets, simd_mask pred) { return __esimd_slm_atomic0(offsets.data(), pred.data()); } @@ -707,8 +707,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> - slm_atomic(simd offsets, simd src0, - simd pred) { + slm_atomic(simd offsets, simd src0, simd_mask pred) { return __esimd_slm_atomic1(offsets.data(), src0.data(), pred.data()); } @@ -719,7 +718,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> slm_atomic(simd offsets, simd src0, simd src1, - simd pred) { + simd_mask pred) { return __esimd_slm_atomic2(offsets.data(), src0.data(), src1.data(), pred.data()); } @@ -873,7 +872,7 @@ esimd_raw_sends_load(simd msgDst, simd msgSrc0, simd msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT = 0, - uint8_t isSendc = 0, simd mask = 1) { + uint8_t isSendc = 0, simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -913,7 +912,7 @@ ESIMD_INLINE ESIMD_NODEBUG simd esimd_raw_send_load(simd msgDst, simd msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0, - uint8_t isSendc = 0, simd mask = 1) { + uint8_t isSendc = 0, simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -951,7 +950,7 @@ esimd_raw_sends_store(simd msgSrc0, simd msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0, uint8_t isSendc = 0, - simd mask = 1) { + simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -985,7 +984,7 @@ ESIMD_INLINE ESIMD_NODEBUG void esimd_raw_send_store(simd msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0, uint8_t isSendc = 0, - simd mask = 1) { + simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0"); diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd_mask.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd_mask.hpp new file mode 100644 index 0000000000000..8c2c87e9b2438 --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd_mask.hpp @@ -0,0 +1,300 @@ +//==-------------- simd_mask.hpp - DPC++ Explicit SIMD API -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// simd_mask class definition - represents Gen simd operation mask. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include // to define C++14,17 extensions +#include +#include + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace intel { +namespace experimental { +namespace esimd { + +// Represents a Gen simd operation mask. Aims to have similar interface to +// std::experimantal::simd_mask: +// (https://github.com/llvm/llvm-project/blob/main/libcxx/include/experimental/simd) +// Does not provide the following members/features: +// - abi_type (not needed for ESIMD) +// - simd_type (not needed for ESIMD) +// - reference (TODO) +// - implicit type conversion constructor (not needed for ESIMD) +// - reference operator[](size_t) (TODO) +// - flags in memory load/store operations (TODO) +// - reduction functions (TODO): +// any_of, all_of, some_of, none_of, popcount, find_first_set, find_last_set +template class simd_mask { +private: + using simd_mask_impl_t = typename detail::simd_mask_impl_t; + +public: + using value_type = typename detail::simd_mask_impl::value_type; + + static constexpr size_t size() noexcept { return N; } + + // Default constructor. + simd_mask() = default; + + /// Broadcast constructor. + /// NOTE: std::exprimental::simd_mask provides broadcast constructor only for + /// the value_type argument. + template ::value>> + simd_mask(T v) noexcept { + for (auto I = 0; I < N; ++I) { + Val[I] = v; + } + } + + // TODO add accessor-based mask memory operations. + + /// Load constructor. + // Implementation note: use SFINAE to avoid overload ambiguity: + // 1) with 'simd_mask(value_type v)' in 'simd_mask m(0)' + // 2) with 'simd_mask(const T1(&&arr)[N])' in simd_mask m((value_type*)p)' + template ::value>> + explicit simd_mask(const T *ptr) { + copy_from(ptr); + } + +#define __ESIMD_UNSUPPORTED_MASK_SIZE_MSG \ + "This sime_mask size is not supported yet in memory I/O operations. " \ + "Supported sizes are 8, 18, 32." + +private: + static inline constexpr bool mask_size_ok_for_mem_io() { + constexpr unsigned Sz = sizeof(value_type) * N; + return (Sz >= detail::OperandSize::OWORD) && + (Sz % detail::OperandSize::OWORD == 0) && + detail::isPowerOf2(Sz / detail::OperandSize::OWORD) && + (Sz <= 8 * detail::OperandSize::OWORD); + } + +public: + /// Load the mask's value from memory. + /// TODO support arbitrary sizes. + template ::value>> + void copy_from(const T *ptr) { + static_assert(mask_size_ok_for_mem_io(), __ESIMD_UNSUPPORTED_MASK_SIZE_MSG); + set(__esimd_flat_block_read_unaligned( + reinterpret_cast(ptr))); + } + + /// Store the mask's value to memory. + template ::value>> + void copy_to(T *ptr) const { + static_assert(mask_size_ok_for_mem_io(), __ESIMD_UNSUPPORTED_MASK_SIZE_MSG); + __esimd_flat_block_write(reinterpret_cast(ptr), data()); + } + +#undef __ESIMD_UNSUPPORTED_MASK_SIZE_MSG + + value_type operator[](size_t i) const { return data()[i]; } + + simd_mask operator!() const noexcept { + return simd_mask{__builtin_convertvector(!data(), simd_mask_impl_t)}; + } + + // Logic and bitwise operators. + template + friend simd_mask operator&&(const simd_mask &, + const simd_mask &) noexcept; + template + friend simd_mask operator||(const simd_mask &, + const simd_mask &) noexcept; + template + friend simd_mask operator&(const simd_mask &, + const simd_mask &) noexcept; + template + friend simd_mask operator|(const simd_mask &, + const simd_mask &) noexcept; + template + friend simd_mask operator^(const simd_mask &, + const simd_mask &) noexcept; + + // Comparison operators. + template + friend simd_mask operator==(const simd_mask &, + const simd_mask &) noexcept; + template + friend simd_mask operator!=(const simd_mask &, + const simd_mask &) noexcept; + + // Compound assignment operators. + template + friend simd_mask &operator&=(simd_mask &, + const simd_mask &) noexcept; + template + friend simd_mask &operator|=(simd_mask &, + const simd_mask &) noexcept; + template + friend simd_mask &operator^=(simd_mask &, + const simd_mask &) noexcept; + + // TODO + // template friend bool all_of(const simd_mask&) noexcept; + // template friend bool any_of(const simd_mask&) noexcept; + // template friend bool none_of(const simd_mask&) noexcept; + // template friend bool some_of(const simd_mask&) noexcept; + // template friend int popcount(const simd_mask&) noexcept; + // template friend int find_first_set(const simd_mask&); + // template friend int find_last_set(const simd_mask&); + + // APIs not present in std::experimental: + + // To allow simd_mask m({1,0,0,1,...}). + template explicit simd_mask(const T1(&&arr)[N]) noexcept { + for (auto I = 0; I < N; ++I) { + Val[I] = arr[I]; // implicit conversion from T1 to value_type + } + } + + // Load the mask's value from array. + void copy_from(const value_type (&arr)[N]) { + simd_mask_impl_t Tmp; + for (auto I = 0; I < N; ++I) { + Tmp[I] = arr[I]; + } + set(Tmp); + } + + // Store the mask's value to array. + void copy_to(value_type (&arr)[N]) const { + for (auto I = 0; I < N; ++I) { + arr[I] = data()[I]; + } + } + + // Assignment operator to support simd_mask n = a > b; + simd_mask &operator=(value_type val) noexcept { + set(val); + return *this; + } + + // TODO FIXME Make this private, add friend accessor to let ESIMD API free + // functions access the 'Val' field. Mimics simd::data() - the note applies + // there too. + simd_mask_impl_t data() const { +#ifndef __SYCL_DEVICE_ONLY__ + return Val; +#else + return __esimd_vload(&Val); +#endif // __SYCL_DEVICE_ONLY__ + } + +#define __ESIMD_MASK_DEPRECATION_MSG \ + "Use of 'simd' class to represent predicate or mask is deprecated. Use " \ + "'simd_mask' instead." + + // Implicit conversion constructors for backward compatibility + // (implemented in esimd.hpp) + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask(const simd &) noexcept; + + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask(simd &&) noexcept; + + // Assignment operators for backward compatibility (implemented in esimd.hpp) + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask &operator=(const simd &) noexcept; + + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask &operator=(simd &&) noexcept; + +#undef __ESIMD_MASK_DEPRECATION_MSG + +private: + explicit simd_mask(const simd_mask_impl_t &data) noexcept { set(data); } + explicit simd_mask(simd_mask_impl_t &&data) noexcept { set(data); } + + // Factory function to create simd_mask objects from a result of a clang + // vector binary operation. rvalue reference version. + template + static simd_mask create(detail::mask_impl_t &&v) noexcept { + return simd_mask{__builtin_convertvector(v, simd_mask_impl_t)}; + } + + // Factory function to create simd_mask objects from a result of a clang + // vector binary operation. Constant reference version. + template + static simd_mask create(const detail::mask_impl_t &v) noexcept { + return simd_mask{__builtin_convertvector(v, simd_mask_impl_t)}; + } + + template friend class simd; + template friend class simd_view; + + void set(const simd_mask_impl_t &v) { +#ifndef __SYCL_DEVICE_ONLY__ + Val = v; +#else + __esimd_vstore(&Val, v); +#endif + } + +private: + simd_mask_impl_t Val; +}; + +// Alias for backward compatibility. +template using mask_type_t = simd_mask; + +// Logic and bitwise operators. +// Comparison operators. + +#define __DEFINE_ESIMD_MASK_BIN_OP(op) \ + template \ + ESIMD_INLINE simd_mask operator op(const simd_mask &m1, \ + const simd_mask &m2) noexcept { \ + auto Res = m1.data() op m2.data(); \ + using T = std::remove_reference_t; \ + return simd_mask::template create(std::move(Res)); \ + } + +__DEFINE_ESIMD_MASK_BIN_OP(&&) +__DEFINE_ESIMD_MASK_BIN_OP(||) +__DEFINE_ESIMD_MASK_BIN_OP(&) +__DEFINE_ESIMD_MASK_BIN_OP(|) +__DEFINE_ESIMD_MASK_BIN_OP(^) +__DEFINE_ESIMD_MASK_BIN_OP(==) +__DEFINE_ESIMD_MASK_BIN_OP(!=) + +#undef __DEFINE_ESIMD_MASK_BIN_OP + +// Compound assignment operators. + +#define __DEFINE_ESIMD_MASK_ASSIGN_OP(assign_op, op) \ + template \ + ESIMD_INLINE simd_mask &operator assign_op( \ + simd_mask &m1, const simd_mask &m2) noexcept { \ + m1.set(m1.data() op m2.data()); \ + return m1; \ + } + +__DEFINE_ESIMD_MASK_ASSIGN_OP(&=, &) +__DEFINE_ESIMD_MASK_ASSIGN_OP(|=, |) +__DEFINE_ESIMD_MASK_ASSIGN_OP(^=, ^) + +#undef __DEFINE_ESIMD_MASK_ASSIGN_OP + +} // namespace esimd +} // namespace experimental +} // namespace intel +} // namespace ext +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/esimd/intrins_trans.cpp b/sycl/test/esimd/intrins_trans.cpp index 09b1ac5babdbd..4881bcc5baa2e 100644 --- a/sycl/test/esimd/intrins_trans.cpp +++ b/sycl/test/esimd/intrins_trans.cpp @@ -42,7 +42,7 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { simd v1(0, x + z); simd offsets(0, y); simd v_addr(reinterpret_cast(ptr)); - simd pred; + simd_mask pred; v_addr += offsets; __esimd_flat_atomic0( diff --git a/sycl/test/esimd/simd_copy_to_copy_from.cpp b/sycl/test/esimd/simd_copy_to_copy_from.cpp index cdc87c128866d..11699779f64ad 100644 --- a/sycl/test/esimd/simd_copy_to_copy_from.cpp +++ b/sycl/test/esimd/simd_copy_to_copy_from.cpp @@ -42,13 +42,13 @@ kernel3(accessor &buf) simd v1(0, 1); simd v0; // expected-error@+3 {{no matching member function for call to 'copy_from'}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:499 {{}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:511 {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} v0.copy_from(buf, 0); v0 = v0 + v1; // expected-error@+3 {{no matching member function for call to 'copy_to'}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:516 {{}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:527 {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} v0.copy_to(buf, 0); } @@ -58,8 +58,8 @@ SYCL_EXTERNAL void kernel4( SYCL_ESIMD_FUNCTION { simd v; // expected-error@+3 {{no matching member function for call to 'copy_from'}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:499 {{}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:511 {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} v.copy_from(buf, 0); } @@ -69,7 +69,7 @@ SYCL_EXTERNAL void kernel5( SYCL_ESIMD_FUNCTION { simd v(0, 1); // expected-error@+3 {{no matching member function for call to 'copy_to'}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:516 {{}} - // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:527 {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/esimd.hpp:* {{}} v.copy_to(buf, 0); } diff --git a/sycl/test/esimd/simd_mask.cpp b/sycl/test/esimd/simd_mask.cpp new file mode 100644 index 0000000000000..eb7f5492b110d --- /dev/null +++ b/sycl/test/esimd/simd_mask.cpp @@ -0,0 +1,73 @@ +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s + +// This test checks that both host and device compilers can +// successfully compile simd_mask APIs. + +#include +#include +#include +#include + +using namespace sycl::ext::intel::experimental::esimd; +using namespace cl::sycl; + +#define DEFINE_BIN_OP_TEST(op, name) \ + template \ + SYCL_EXTERNAL SYCL_ESIMD_FUNCTION simd_mask test_impl_##name( \ + simd_mask &m1, simd_mask &m2) { \ + return m1 op m2; \ + } \ + \ + simd_mask<1> test_impl_1_##name(simd_mask<1> &m1, simd_mask<1> &m2) { \ + return test_impl_##name(m1, m2); \ + } \ + \ + simd_mask<17> test_impl_17_##name(simd_mask<17> &m1, simd_mask<17> &m2) { \ + return test_impl_##name(m1, m2); \ + } \ + \ + simd_mask<32> test_impl_32_##name(simd_mask<32> &m1, simd_mask<32> &m2) { \ + return test_impl_##name(m1, m2); \ + } + +DEFINE_BIN_OP_TEST(&&, and) +DEFINE_BIN_OP_TEST(||, or) +DEFINE_BIN_OP_TEST(&, bit_and) +DEFINE_BIN_OP_TEST(|, bit_or) +DEFINE_BIN_OP_TEST(^, xor) +DEFINE_BIN_OP_TEST(==, eq) +DEFINE_BIN_OP_TEST(!=, ne) +DEFINE_BIN_OP_TEST(&=, bit_and_eq) +DEFINE_BIN_OP_TEST(|=, bit_or_eq) +DEFINE_BIN_OP_TEST(^=, xor_eq) + +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION simd_mask<8> misc_tests(bool val) { + simd_mask<8> m1(val); // broadcast constructor + simd_mask<8> m2; // default constructor + simd_mask<8> m3(m1[4]); // operator[] + simd_mask<8> m4 = !m3; // operator! + static_assert(m4.size() == 8, "size() failed"); + simd ch1(1); + simd ch2(2); + simd_mask<8> m5 = ch1 > ch2; + return m5; +} + +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void compat_test(float *ptr) { + simd pred(1); + simd offsets; + // expected-warning@+2 {{deprecated}} + // expected-note@sycl/ext/intel/experimental/esimd/simd_mask.hpp:* {{}} + auto x1 = gather(ptr, offsets, pred); + // expected-warning@+2 {{deprecated}} + // expected-note@sycl/ext/intel/experimental/esimd/simd_mask.hpp:* {{}} + auto x2 = gather(ptr, offsets, simd{}); + simd_mask<16> m1(0); + // expected-warning@+2 {{deprecated}} + // expected-note@sycl/ext/intel/experimental/esimd/simd_mask.hpp:* {{}} + m1 = pred; + simd_mask<16> m2(0); + // expected-warning@+2 {{deprecated}} + // expected-note@sycl/ext/intel/experimental/esimd/simd_mask.hpp:* {{}} + m2 = std::move(pred); +}