From 5b400888b16b6289191700ab8554aa83687fd258 Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Sun, 27 Jun 2021 00:24:18 -0700 Subject: [PATCH 1/8] [SYCL][ESIMD] Replace mask_type_t with simd_mask to represent Gen predicates. Addresses the following review comment from simd.hpp: // TODO @rolandschulz, @mattkretz // Introduce simd_mask type and let user use this type instead of specific // type representation (simd) to make it more portable // TODO @iburyl should be mask_type_t, which might become more abstracted in // the future revisions. // Gen predicates (masks) used to be represented with simd types which made them indistinguishable from simd objects forcing the same same sets of available APIs, even though it did not always make sense. This patch introduces a new class simd_mask, which provides its own set of APIs, some of which are unique, other - repeat those from the simd class. A new class 'simd_obj_impl' is added, which encompasses functionality common between simd and simd_mask objects. Particularly, view creation APIs (select, bit_cast_view), object construction APIs - replicate, memory I/O (copy from/to) and others. simd and simd_mask both extend the simd_obj_impl class. They use the 'curiously recurring template' design pattern, when simd_obj_impl is templated by its derivative class (simd or simd_mask), to allow simd_obj_impl APIs return subclass objects rather than simd_obj_impl instances, those avoiding API duplication in subclasses. For example, 'select' functions defined in simd_obj_impl, will return a view of the derived class (subclass) rather than a view of simd_obj_impl: simd_view, where Derived is the derived class. APIs unique to simd_mask: - unary logical negation '!' operator - implicit conversion to bool for single-element masks - logical binary operators '||' and '&&' APIs unique to simd: - binary arithmetic operators ('+', '-', '*', '/') and corresponding compound assignments - unary arithmetic operators ('+', '-', '++', '--') - relational operators ('<', '<=', '>', '>=') The same division of the APIs to unique sets applies to simd_view,...> and simd_view,...> simd_view class specializations. --- llvm/lib/SYCLLowerIR/LowerESIMD.cpp | 11 +- llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp | 16 +- .../experimental/esimd/detail/intrin.hpp | 14 +- .../experimental/esimd/detail/math_intrin.hpp | 9 +- .../esimd/detail/memory_intrin.hpp | 123 ++- .../experimental/esimd/detail/operators.hpp | 412 ++++++++++ .../experimental/esimd/detail/region.hpp | 15 +- .../esimd/detail/simd_obj_impl.hpp | 716 +++++++++++++++++ .../esimd/detail/simd_view_impl.hpp | 246 +++--- .../intel/experimental/esimd/detail/types.hpp | 331 ++++++-- .../intel/experimental/esimd/detail/util.hpp | 4 + .../ext/intel/experimental/esimd/math.hpp | 134 ++-- .../ext/intel/experimental/esimd/memory.hpp | 72 +- .../ext/intel/experimental/esimd/simd.hpp | 743 ++++-------------- .../intel/experimental/esimd/simd_view.hpp | 261 ++---- sycl/test/esimd/esimd_math.cpp | 4 +- sycl/test/esimd/intrins_trans.cpp | 6 +- sycl/test/esimd/operators.cpp | 514 ++++++++++++ sycl/test/esimd/regression/simd_wrapper.cpp | 2 +- sycl/test/esimd/simd.cpp | 32 +- sycl/test/esimd/simd_mask.cpp | 71 ++ sycl/test/esimd/simd_merge.cpp | 8 +- sycl/test/esimd/simd_replicate_deprecated.cpp | 12 +- sycl/test/esimd/simd_view.cpp | 30 +- 24 files changed, 2617 insertions(+), 1169 deletions(-) create mode 100644 sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp create mode 100644 sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp create mode 100644 sycl/test/esimd/operators.cpp create mode 100644 sycl/test/esimd/simd_mask.cpp diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index cd619a94f71bd..bab10b79abe84 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -1350,11 +1350,18 @@ SmallPtrSet collectGenXVolatileTypes(Module &M) { continue; auto GTy = dyn_cast(PTy->getPointerElementType()); // TODO FIXME relying on type name in LLVM IR is fragile, needs rework - if (!GTy || !GTy->getName().endswith( - "cl::sycl::ext::intel::experimental::esimd::simd")) + if (!GTy || + !GTy->getName().endswith("sycl::ext::intel::experimental::esimd::simd")) continue; assert(GTy->getNumContainedTypes() == 1); auto VTy = GTy->getContainedType(0); + if (GTy = dyn_cast(VTy)) { + assert( + GTy && + GTy->getName().endswith( + "sycl::ext::intel::experimental::esimd::detail::simd_obj_impl")); + VTy = GTy->getContainedType(0); + } assert(VTy->isVectorTy()); GenXVolatileTypeSet.insert(VTy); } diff --git a/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp b/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp index 9b30c72d3436a..848b289352a03 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp @@ -110,14 +110,18 @@ ModulePass *llvm::createESIMDLowerVecArgPass() { // nullptr. Type *ESIMDLowerVecArgPass::getSimdArgPtrTyOrNull(Value *arg) { auto ArgType = dyn_cast(arg->getType()); - if (!ArgType || !ArgType->getElementType()->isStructTy()) + if (!ArgType) return nullptr; - auto ContainedType = ArgType->getElementType(); - if ((ContainedType->getStructNumElements() != 1) || - !ContainedType->getStructElementType(0)->isVectorTy()) + Type *Res = nullptr; + StructType *ST = dyn_cast_or_null(ArgType->getElementType()); + + while (ST && (ST->getStructNumElements() == 1)) { + Res = ST->getStructElementType(0); + ST = dyn_cast(Res); + } + if (!Res || !Res->isVectorTy()) return nullptr; - return PointerType::get(ContainedType->getStructElementType(0), - ArgType->getPointerAddressSpace()); + return PointerType::get(Res, ArgType->getPointerAddressSpace()); } // F may have multiple arguments of type simd*. This diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp index d8a493cd6b3d7..5d8661bb85cd5 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp @@ -18,9 +18,6 @@ #include #include -#define __SEIEED sycl::ext::intel::experimental::esimd::detail -#define __SEIEE sycl::ext::intel::experimental::esimd - // \brief __esimd_rdregion: region access intrinsic. // // @param T the element data type, one of i8, i16, i32, i64, half, float, @@ -125,14 +122,14 @@ template __esimd_wrregion(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, uint16_t Offset, - __SEIEE::mask_type_t Mask = 1); + __SEIEED::simd_mask_storage_t Mask = 1); template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_wrindirect(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, __SEIEED::vector_type_t Offset, - __SEIEE::mask_type_t Mask = 1); + __SEIEED::simd_mask_storage_t Mask = 1); __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -286,7 +283,7 @@ template __esimd_wrregion(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, uint16_t Offset, - __SEIEE::mask_type_t Mask) { + __SEIEED::simd_mask_storage_t Mask) { uint16_t EltOffset = Offset / sizeof(T); assert(Offset % sizeof(T) == 0); @@ -310,7 +307,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_wrindirect(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, __SEIEED::vector_type_t Offset, - __SEIEE::mask_type_t Mask) { + __SEIEED::simd_mask_storage_t Mask) { __SEIEED::vector_type_t Result = OldVal; for (int i = 0; i < M; ++i) { if (Mask[i]) { @@ -324,6 +321,3 @@ __esimd_wrindirect(__SEIEED::vector_type_t OldVal, } #endif // __SYCL_DEVICE_ONLY__ - -#undef __SEIEE -#undef __SEIEED diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp index 619f9f598f198..7f2f5fc5bdd10 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp @@ -15,11 +15,10 @@ #include #include #include +#include #include -#define __SEIEED sycl::ext::intel::experimental::esimd::detail - // saturation intrinsics template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t @@ -385,8 +384,6 @@ inline T extract(const uint32_t &width, const uint32_t &offset, uint32_t src, return ret; } -#define __SEIEEED sycl::ext::intel::experimental::esimd::emu::detail - template inline __SEIEED::vector_type_t __esimd_satf(__SEIEED::vector_type_t src) { @@ -1327,8 +1324,4 @@ __esimd_reduced_smin(__SEIEED::vector_type_t src1, return __esimd_reduced_min(src1, src2); } -#undef __SEIEEED - #endif // #ifdef __SYCL_DEVICE_ONLY__ - -#undef __SEIEED diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 03ec0ab45b171..1751c32d37084 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -74,9 +74,6 @@ constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) { } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) -#define __SEIEE sycl::ext::intel::experimental::esimd -#define __SEIEED sycl::ext::intel::experimental::esimd::detail - // flat_read does flat-address gather template __esimd_flat_read(__SEIEED::vector_type_t addrs, int ElemsPerAddr = NumBlk, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // flat_write does flat-address scatter template addrs, __SEIEED::vector_type_t vals, - int ElemsPerAddr = NumBlk, __SEIEED::vector_type_t pred = 1); + int ElemsPerAddr = NumBlk, __SEIEED::simd_mask_storage_t pred = 1); // flat_block_read reads a block of data from one flat address template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __esimd_flat_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // flat_write does flat-address scatter template addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // Low-level surface-based gather. Collects elements located at given offsets in // a surface and returns them as a single \ref simd object. Element can be @@ -205,7 +202,7 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void -__esimd_surf_write(__SEIEED::vector_type_t pred, int16_t scale, +__esimd_surf_write(__SEIEED::simd_mask_storage_t pred, int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, __SEIEED::vector_type_t elem_offsets, __SEIEED::vector_type_t vals) @@ -229,7 +226,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L3H = __SEIEE::CacheHint::None> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None, @@ -237,7 +234,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None, @@ -246,7 +243,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); // esimd_barrier, generic group barrier SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_barrier(); @@ -262,14 +259,14 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_fence(uint8_t cntl); template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_write does SLM scatter template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_block_read reads a block of data from SLM template @@ -286,33 +283,33 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_write4 does SLM scatter4 template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_write4( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_atomic: SLM atomic template <__SEIEE::atomic_op Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); // Media block load // @@ -418,9 +415,9 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, - uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, + uint8_t numSrc1, uint8_t numDst, uint8_t sfid, + uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgSrc1, __SEIEED::vector_type_t msgDst); @@ -454,9 +451,9 @@ __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numDst, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, + uint8_t numDst, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgDst); @@ -485,13 +482,11 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, /// @param msgSrc1 the second source operand of send message. /// template -SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void -__esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgSrc1); +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_raw_sends_store( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_storage_t pred, + uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgSrc1); /// \brief Raw send store. /// @@ -515,9 +510,8 @@ __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, - uint32_t msgDesc, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, + uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0); #ifndef __SYCL_DEVICE_ONLY__ @@ -525,7 +519,7 @@ template inline __SEIEED::vector_type_t __esimd_flat_read(__SEIEED::vector_type_t addrs, int ElemsPerAddr, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk); __SEIEED::vector_type_t V; ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr); @@ -551,7 +545,7 @@ template inline __SEIEED::vector_type_t __esimd_flat_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t V; unsigned int Next = 0; @@ -601,7 +595,7 @@ inline void __esimd_flat_write( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - int ElemsPerAddr, __SEIEED::vector_type_t pred) { + int ElemsPerAddr, __SEIEED::simd_mask_storage_t pred) { auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk); ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr); @@ -626,7 +620,7 @@ template addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t V; unsigned int Next = 0; @@ -830,7 +824,7 @@ inline void __esimd_slm_fence(uint8_t cntl) {} template inline __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -839,7 +833,7 @@ __esimd_slm_read(__SEIEED::vector_type_t addrs, template inline void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::simd_mask_storage_t pred) {} // slm_block_read reads a block of data from SLM template @@ -857,7 +851,7 @@ inline void __esimd_slm_block_write(uint32_t addr, template inline __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -867,13 +861,13 @@ template inline void __esimd_slm_write4( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::simd_mask_storage_t pred) {} // slm_atomic: SLM atomic template <__SEIEE::atomic_op Op, typename Ty, int N> inline __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -882,7 +876,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N> inline __SEIEED::vector_type_t __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -892,7 +886,7 @@ inline __SEIEED::vector_type_t __esimd_slm_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -901,7 +895,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H, __SEIEE::CacheHint L3H> inline __SEIEED::vector_type_t __esimd_flat_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -911,7 +905,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H, inline __SEIEED::vector_type_t __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -922,7 +916,7 @@ inline __SEIEED::vector_type_t __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -986,14 +980,12 @@ inline uint32_t __esimd_get_value(AccessorTy acc) { /// template -inline __SEIEED::vector_type_t -__esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, - uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgSrc1, - __SEIEED::vector_type_t msgDst) { +inline __SEIEED::vector_type_t __esimd_raw_sends_load( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_storage_t pred, + uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t sfid, + uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgSrc1, + __SEIEED::vector_type_t msgDst) { throw cl::sycl::feature_not_supported(); return 0; } @@ -1025,13 +1017,11 @@ __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, /// Returns a simd vector of type Ty1 and size N1. /// template -inline __SEIEED::vector_type_t -__esimd_raw_send_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numDst, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgDst) { +inline __SEIEED::vector_type_t __esimd_raw_send_load( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_storage_t pred, + uint8_t numSrc0, uint8_t numDst, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgDst) { throw cl::sycl::feature_not_supported(); return 0; } @@ -1062,7 +1052,7 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, /// template inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, @@ -1092,7 +1082,7 @@ inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, /// template inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0) { @@ -1100,6 +1090,3 @@ inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, } #endif // __SYCL_DEVICE_ONLY__ - -#undef __SEIEED -#undef __SEIEE diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp new file mode 100644 index 0000000000000..1f332a1b46220 --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp @@ -0,0 +1,412 @@ +//==-------------- operators.hpp - DPC++ Explicit SIMD API -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Binary operator definitions for ESIMD types. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#include +#include + +// Table of contents: +// +// simd_obj_impl/simd/simd_mask global operators +// bitwise logic and arithmetic operators +// simd_obj_impl BINOP simd_obj_impl +// simd_obj_impl BINOP SCALAR +// SCALAR BINOP simd_obj_impl +// comparison operators +// simd_obj_impl CMPOP simd_obj_impl +// simd_obj_impl CMPOP SCALAR +// SCALAR CMPOP simd_obj_impl +// simd_view global operators +// bitwise logic and arithmetic operators +// simd_view BINOP simd_view +// simd* BINOP simd_view +// simd_view BINOP simd* +// SCALAR BINOP simd_view +// simd_view BINOP SCALAR +// comparison operators +// simd_view CMPOP simd_view +// simd_view CMPOP simd_obj_impl +// simd_obj_impl CMPOP simd_view +// simd_view CMPOP SCALAR +// SCALAR CMPOP simd_view +// +// Some operations are enabled only for particular element and simd object type +// (simd or simd_mask): +// - bitwise logic operations - for integral element types (both simd and +// simd_mask) +// - bit shift operations and and '%' - for the simd type (not for simd_mask) +// with integral element types. +// - arithmetic binary operations - for the simd type (not for simd_mask) +// In all cases, when an operation has a simd_view and a simd_obj_impl's +// subclass objects as operands, it is enabled only when: +// - simd_view's base type matches the simd object operand. I.e. only +// { simd_view, simd } and { simd_view, simd_mask } +// pairs are enabled (with any order of operand types). +// - simd_view's value length matches the length of the simd object operand + +// Put operators into the ESIMD namespace to make argument-dependent lookup find +// these operators instead of those defined in e.g. sycl namespace (which would +// stop further lookup, leaving just non-viable sycl::operator < etc. on the +// table). +namespace __SEIEED { + +//////////////////////////////////////////////////////////////////////////////// +// simd_obj_impl global operators +//////////////////////////////////////////////////////////////////////////////// + +// ========= simd_obj_impl bitwise logic and arithmetic operators + +#define __ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(BINOP, COND) \ + \ + /* simd_obj_impl BINOP simd_obj_impl */ \ + template class SimdT, \ + class SimdTx = SimdT, class = std::enable_if_t> \ + inline auto operator BINOP( \ + const __SEIEED::simd_obj_impl> &LHS, \ + const __SEIEED::simd_obj_impl> &RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + using SimdPromotedT = \ + __SEIEED::computation_type_t, SimdT>; \ + using VecT = typename SimdPromotedT::vector_type; \ + return SimdPromotedT(__SEIEED::convert(LHS.data()) \ + BINOP __SEIEED::convert(RHS.data())); \ + } else { \ + /* for SimdT=simd_mask_impl T1 and T2 are both equal to \ + * simd_mask_elem_type */ \ + return SimdT(LHS.data() BINOP RHS.data()); \ + } \ + } \ + \ + /* simd_obj_impl BINOP SCALAR */ \ + template class SimdT1, class T2, \ + class SimdTx = SimdT1, class = std::enable_if_t> \ + inline auto operator BINOP( \ + const __SEIEED::simd_obj_impl> &LHS, T2 RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + /* convert the SCALAR to vector type and reuse the basic operation over \ + * simd objects */ \ + return LHS BINOP SimdT1(RHS); \ + } else { \ + /* SimdT1 is a mask, T1 is mask element type - convert RHS implicitly to \ + * T1 */ \ + return LHS BINOP SimdT1(RHS); \ + } \ + } \ + \ + /* SCALAR BINOP simd_obj_impl */ \ + template class SimdT2, \ + class SimdTx = SimdT2, class = std::enable_if_t> \ + inline auto operator BINOP( \ + T1 LHS, const __SEIEED::simd_obj_impl> &RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + /* convert the SCALAR to vector type and reuse the basic operation over \ + * simd objects */ \ + return SimdT2(LHS) BINOP RHS; \ + } else { \ + /* simd_mask_case */ \ + return SimdT2(LHS) BINOP RHS; \ + } \ + } + +#define __ESIMD_BITWISE_OP_FILTER \ + std::is_integral_v &&std::is_integral_v +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(^, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(|, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(&, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILTER + +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v \ + &&__SEIEED::is_simd_type_v +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(%, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(<<, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(>>, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +#define __ESIMD_ARITH_OP_FILTER \ + __SEIEED::is_vectorizable_v &&__SEIEED::is_vectorizable_v \ + &&__SEIEED::is_simd_type_v + +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(+, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(-, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(*, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(/, __ESIMD_ARITH_OP_FILTER) +#undef __ESIMD_ARITH_OP_FILTER + +#undef __ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP + +// ========= simd_obj_impl comparison operators +// Both simd and simd_mask will match simd_obj_impl argument when resolving +// operator overloads. + +#define __ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(CMPOP, COND) \ + \ + /* simd_obj_impl CMPOP simd_obj_impl */ \ + template class SimdT, \ + class SimdTx = SimdT, class = std::enable_if_t> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEED::simd_obj_impl> &LHS, \ + const __SEIEED::simd_obj_impl> &RHS) { \ + using MaskVecT = typename __SEIEE::simd_mask::vector_type; \ + \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + using PromSimdT = \ + __SEIEED::computation_type_t, SimdT>; \ + using PromVecT = typename PromSimdT::vector_type; \ + auto ResVec = __SEIEED::convert(LHS.data()) \ + CMPOP __SEIEED::convert(RHS.data()); \ + return __SEIEE::simd_mask(__SEIEED::convert(ResVec) & \ + MaskVecT(1)); \ + } else { \ + /* this is comparison of masks, don't perform type promotion */ \ + auto ResVec = LHS.data() CMPOP RHS.data(); \ + return __SEIEE::simd_mask(__SEIEED::convert(ResVec) & \ + MaskVecT(1)); \ + } \ + } \ + \ + /* simd_obj_impl CMPOP SCALAR */ \ + template class SimdT1, class T2, \ + class SimdTx = SimdT1, \ + class = std::enable_if_t<__SEIEED::is_vectorizable_v && COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEED::simd_obj_impl> &LHS, T2 RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) \ + /* simd case */ \ + return LHS CMPOP SimdT1(RHS); \ + else \ + /* simd_mask case - element type is fixed */ \ + return LHS CMPOP SimdT1((T1)RHS); \ + } \ + \ + /* SCALAR CMPOP simd_obj_impl */ \ + template class SimdT2, \ + class SimdTx = SimdT2, \ + class = std::enable_if_t<__SEIEED::is_vectorizable_v && COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + T1 LHS, const __SEIEED::simd_obj_impl> &RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) \ + /* simd case */ \ + return SimdT2(LHS) CMPOP RHS; \ + else \ + /* simd_mask case - element type is fixed */ \ + return SimdT2((T2)LHS) CMPOP RHS; \ + } + +// Equality comparison is defined for all simd_obj_impl subclasses. +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(==, true) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(!=, true) + +// Relational operators are defined only for the simd type. +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(<, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(>, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(<=, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(>=, __SEIEED::is_simd_type_v) + +// Logical operators are defined only for the simd_mask type +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(&&, __SEIEED::is_simd_mask_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(||, __SEIEED::is_simd_mask_type_v) + +#undef __ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP +} // namespace __SEIEED + +namespace __SEIEE { +//////////////////////////////////////////////////////////////////////////////// +// simd_view global operators +//////////////////////////////////////////////////////////////////////////////// + +// ========= simd_view bitwise logic and arithmetic operators + +#define __ESIMD_DEF_SIMD_VIEW_BIN_OP(BINOP, COND) \ + \ + /* simd_view BINOP simd_view */ \ + template ::element_type, \ + class T2 = typename __SEIEE::shape_type::element_type, \ + class = \ + std::enable_if_t<__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v && \ + (__SEIEE::shape_type::length == \ + __SEIEE::shape_type::length) && \ + COND>> \ + inline auto operator BINOP( \ + const __SEIEE::simd_view &LHS, \ + const __SEIEE::simd_view &RHS) { \ + return LHS.read() BINOP RHS.read(); \ + } \ + \ + /* simd* BINOP simd_view */ \ + template ::element_type, \ + class = std::enable_if_t< \ + __SEIEED::is_simd_obj_impl_derivative_v && \ + (__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v)&&(SimdT1::length == \ + __SEIEE::shape_type< \ + RegionT2>::length) && \ + COND>> \ + inline auto operator BINOP( \ + const SimdT1 &LHS, const __SEIEE::simd_view &RHS) { \ + return LHS BINOP RHS.read(); \ + } \ + \ + /* simd_view BINOP simd* */ \ + template < \ + class SimdT1, class RegionT1, class SimdT2, \ + class T1 = typename __SEIEE::shape_type::element_type, \ + class T2 = typename SimdT2::element_type, \ + class = std::enable_if_t< \ + __SEIEED::is_simd_obj_impl_derivative_v && \ + __SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v && \ + (SimdT2::length == __SEIEE::shape_type::length) && COND>> \ + inline auto operator BINOP(const __SEIEE::simd_view &LHS, \ + const SimdT2 &RHS) { \ + return LHS.read() BINOP RHS; \ + } \ + \ + /* SCALAR BINOP simd_view */ \ + template && COND>> \ + inline auto operator BINOP(T1 LHS, const SimdViewT2 &RHS) { \ + using SimdT = typename SimdViewT2::value_type; \ + return SimdT(LHS) BINOP RHS.read(); \ + } \ + \ + /* simd_view BINOP SCALAR */ \ + template && COND>> \ + inline auto operator BINOP(const SimdViewT1 &LHS, T2 RHS) { \ + using SimdT = typename SimdViewT1::value_type; \ + return LHS.read() BINOP SimdT(RHS); \ + } + +#define __ESIMD_BITWISE_OP_FILTER \ + std::is_integral_v &&std::is_integral_v +__ESIMD_DEF_SIMD_VIEW_BIN_OP(^, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(|, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(&, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILITER + +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v \ + &&__SEIEED::is_simd_type_v + +__ESIMD_DEF_SIMD_VIEW_BIN_OP(%, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(<<, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(>>, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +#define __ESIMD_ARITH_OP_FILTER \ + __SEIEED::is_simd_type_v &&__SEIEED::is_vectorizable_v \ + &&__SEIEED::is_vectorizable_v + +__ESIMD_DEF_SIMD_VIEW_BIN_OP(+, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(-, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(*, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(/, __ESIMD_ARITH_OP_FILTER) +#undef __ESIMD_ARITH_OP_FILTER + +__ESIMD_DEF_SIMD_VIEW_BIN_OP(&&, __SEIEED::is_simd_mask_type_v) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(||, __SEIEED::is_simd_mask_type_v) + +#undef __ESIMD_DEF_SIMD_VIEW_BIN_OP + +// ========= simd_view comparison operators + +#define __ESIMD_DEF_SIMD_VIEW_CMP_OP(CMPOP, COND) \ + \ + /* simd_view CMPOP simd_view */ \ + template == \ + __SEIEED::is_simd_type_v< \ + SimdT2>)&&/* the length of the views \ + must match as well: */ \ + (__SEIEE::shape_type::length == \ + __SEIEE::shape_type::length) && \ + COND>> \ + inline auto operator CMPOP( \ + const __SEIEE::simd_view &LHS, \ + const __SEIEE::simd_view &RHS) { \ + return LHS.read() CMPOP RHS.read(); \ + } \ + \ + /* simd_view CMPOP simd_obj_impl */ \ + template ::length == N2) && \ + (__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v)&&COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEE::simd_view &LHS, \ + const __SEIEED::simd_obj_impl &RHS) { \ + return LHS.read() CMPOP SimdT2(RHS.data()); \ + } \ + \ + /* simd_obj_impl CMPOP simd_view */ \ + template ::length == N1) && \ + (__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v)&&COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEED::simd_obj_impl &LHS, \ + const __SEIEE::simd_view &RHS) { \ + return SimdT1(LHS.data()) CMPOP RHS.read(); \ + } \ + \ + /* simd_view CMPOP SCALAR */ \ + template && COND>> \ + inline auto operator CMPOP(const __SEIEE::simd_view &LHS, \ + T2 RHS) { \ + using SimdValueT = \ + typename __SEIEE::simd_view::value_type; \ + return LHS.read() CMPOP SimdValueT(RHS); \ + } \ + \ + /* SCALAR CMPOP simd_view */ \ + template && COND>> \ + inline auto operator CMPOP( \ + T1 LHS, const __SEIEE::simd_view &RHS) { \ + using SimdValueT = \ + typename __SEIEE::simd_view::value_type; \ + return SimdValueT(LHS) CMPOP RHS.read(); \ + } + +// Equality comparison is defined for views of all simd_obj_impl derivatives. +__ESIMD_DEF_SIMD_VIEW_CMP_OP(==, true) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(!=, true) + +// Relational operators are defined only for views of the simd class. +__ESIMD_DEF_SIMD_VIEW_CMP_OP(<, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(>, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(<=, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(>=, __SEIEED::is_simd_type_v) + +#undef __ESIMD_DEF_SIMD_VIEW_CMP_OP + +} // namespace __SEIEE diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp index 39092fea83616..df541f4e81d14 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp @@ -50,15 +50,16 @@ struct region_base { // A basic 1D region type. template -using region1d_t = region_base; +using region1d_t = region_base; // A basic 2D region type. template using region2d_t = region_base; // A region with a single element. -template -using region1d_scalar_t = region_base; +template +using region1d_scalar_t = + region_base; // simd_view forward declaration. template class simd_view; @@ -92,12 +93,20 @@ template struct shape_type> { using element_type = Ty; using type = region1d_t; + static inline constexpr int length = type::length; +}; + +template struct shape_type> { + using element_type = Ty; + using type = region1d_t; + static inline constexpr int length = type::length; }; template struct shape_type> { using element_type = Ty; using type = region2d_t; + static inline constexpr int length = type::length; }; // Forward the shape computation on the top region type. diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp new file mode 100644 index 0000000000000..fe65f64dff832 --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp @@ -0,0 +1,716 @@ +//==------------ - simd_obj_impl.hpp - DPC++ Explicit SIMD API +//--------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implement Explicit SIMD vector APIs. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace intel { +namespace experimental { +namespace esimd { + +/// Represents a simd mask. +template using simd_mask = detail::simd_mask_type; + +namespace detail { + +/// The simd_obj_impl vector class. +/// +/// This is a base class for all ESIMD simd classes with real storage (simd, +/// simd_mask_impl). It wraps a clang vector as the storage for the elements. +/// Additionally this class supports region operations that map to Intel GPU +/// regions. The type of a region select or bit_cast_view operation is of +/// simd_view type, which models read-update-write semantics. +/// +/// For the is_simd_obj_impl_derivative helper to work correctly, all derived +/// classes must be templated by element type and number of elements. If fewer +/// template arguments are needed, template aliases can be used +/// (simd_mask_type). +/// +/// \tparam Ty the element type +/// \tparam N number of elements +/// \tparam Derived - a class derived from this one; this class and its +/// derivatives must follow the 'curiously recurring template' pattern. +/// \tparam SFINAE - defaults to 'void' in the forward declarion within +/// types.hpp, used to disable invalid specializations. +/// +/// \ingroup sycl_esimd +template class simd_obj_impl { + template friend class simd_view; + template friend class simd; + template friend class simd_mask_impl; + +public: + /// The underlying builtin data type. + using vector_type = vector_type_t; + + /// The element type of this simd_obj_impl object. + using element_type = Ty; + + /// The number of elements in this simd_obj_impl object. + static constexpr int length = N; + +protected: + template > + void init_from_array(const Ty(&&Arr)[N1]) noexcept { + for (auto I = 0; I < N; ++I) { + M_data[I] = Arr[I]; + } + } + +private: + Derived &cast_this_to_derived() { return reinterpret_cast(*this); } + +public: + /// @{ + /// Constructors. + simd_obj_impl() = default; + simd_obj_impl(const simd_obj_impl &other) { + __esimd_dbg_print(simd_obj_impl(const simd_obj_impl &other)); + set(other.data()); + } + + /// Implicit conversion constructor from another \c simd_obj_impl object. + template + simd_obj_impl( + const simd_obj_impl, + SFINAE> &other) { + __esimd_dbg_print(simd_obj_impl(const simd_obj_impl... > &other)); + if constexpr (std::is_same_v) + set(other.data()); + else + set(__builtin_convertvector(other.data(), vector_type)); + } + + /// Implicit conversion constructor from a raw vector object. + simd_obj_impl(const vector_type &Val) { + __esimd_dbg_print(simd_obj_impl(const vector_type &Val)); + set(Val); + } + + /// This constructor is deprecated for two reasons: + /// 1) it adds confusion between + /// simd s1(1,2); //calls next constructor + /// simd s2{1,2}; //calls this constructor (uniform initialization syntax) + /// 2) no compile-time control over the size of the initializer; e.g. the + /// following will compile: + /// simd x = {1, 2, 3, 4}; + __SYCL_DEPRECATED("use constructor from array, e.g: simd x({1,2,3});") + simd_obj_impl(std::initializer_list Ilist) noexcept { + __esimd_dbg_print(simd_obj_impl(std::initializer_list Ilist)); + int i = 0; + for (auto It = Ilist.begin(); It != Ilist.end() && i < N; ++It) { + M_data[i++] = *It; + } + } + + /// Initialize a simd_obj_impl object with an initial value and step. + simd_obj_impl(Ty Val, Ty Step) noexcept { + __esimd_dbg_print(simd_obj_impl(Ty Val, Ty Step)); +#pragma unroll + for (int i = 0; i < N; ++i) { + M_data[i] = Val; + Val += Step; + } + } + + /// Broadcast constructor + simd_obj_impl(Ty Val) noexcept { + __esimd_dbg_print(simd_obj_impl(Ty Val)); + M_data = Val; + } + + /// Construct from an array. To allow e.g. simd_mask_type m({1,0,0,1,...}). + template > + simd_obj_impl(const Ty(&&Arr)[N1]) noexcept { + __esimd_dbg_print(simd_obj_impl(const Ty(&&Arr)[N1])); + init_from_array(std::move(Arr)); + } + + /// @} + + // Load the object's value from array. + template std::enable_if_t copy_from(const Ty (&Arr)[N1]) { + __esimd_dbg_print(copy_from(const Ty(&Arr)[N1])); + vector_type Tmp; + for (auto I = 0; I < N; ++I) { + Tmp[I] = Arr[I]; + } + set(Tmp); + } + + // Store the object's value to array. + template std::enable_if_t copy_to(Ty (&Arr)[N1]) const { + __esimd_dbg_print(copy_to(Ty(&Arr)[N1])); + for (auto I = 0; I < N; ++I) { + Arr[I] = data()[I]; + } + } + + /// @{ + /// Conversion operators. + explicit operator const vector_type &() const & { + __esimd_dbg_print(explicit operator const vector_type &() const &); + return M_data; + } + explicit operator vector_type &() & { + __esimd_dbg_print(explicit operator vector_type &() &); + return M_data; + } + + /// Explicit conversion for simd_obj_impl into T. + template > + operator Ty() const { + __esimd_dbg_print(explicit operator Ty()); + return data()[0]; + } + /// @} + + vector_type data() const { + __esimd_dbg_print(vector_type data()); +#ifndef __SYCL_DEVICE_ONLY__ + return M_data; +#else + return __esimd_vload(&M_data); +#endif + } + + /// Whole region read. + Derived read() const { return Derived{data()}; } + + /// Whole region write. + Derived &write(const Derived &Val) { + set(Val.data()); + return cast_this_to_derived(); + } + + /// Whole region update with predicates. + void merge(const Derived &Val, const simd_mask &Mask) { + set(__esimd_wrregion(data(), Val.data(), 0, + Mask.data())); + } + + void merge(const Derived &Val1, Derived Val2, const simd_mask &Mask) { + Val2.merge(Val1, Mask); + set(Val2.data()); + } + + /// View this simd_obj_impl object in a different element type. + template auto bit_cast_view() &[[clang::lifetimebound]] { + using TopRegionTy = compute_format_type_t; + using RetTy = simd_view; + return RetTy{cast_this_to_derived(), TopRegionTy{0}}; + } + + template + __SYCL_DEPRECATED("use simd_obj_impl::bit_cast_view.") + auto format() & { + return bit_cast_view(); + } + + /// View as a 2-dimensional simd_view. + template + auto bit_cast_view() &[[clang::lifetimebound]] { + using TopRegionTy = + compute_format_type_2d_t; + using RetTy = simd_view; + return RetTy{cast_this_to_derived(), TopRegionTy{0, 0}}; + } + + template + __SYCL_DEPRECATED("use simd_obj_impl::bit_cast_view.") + auto format() & { + return bit_cast_view(); + } + + /// 1D region select, apply a region on top of this LValue object. + /// + /// \tparam Size is the number of elements to be selected. + /// \tparam Stride is the element distance between two consecutive elements. + /// \param Offset is the starting element offset. + /// \return the representing region object. + template + simd_view> + select(uint16_t Offset = 0) &[[clang::lifetimebound]] { + static_assert(Size > 1 || Stride == 1, + "Stride must be 1 in single-element region"); + region1d_t Reg(Offset); + return {cast_this_to_derived(), std::move(Reg)}; + } + + /// 1D region select, apply a region on top of this RValue object. + /// + /// \tparam Size is the number of elements to be selected. + /// \tparam Stride is the element distance between two consecutive elements. + /// \param Offset is the starting element offset. + /// \return the value this region object refers to. + template + resize_a_simd_type_t select(uint16_t Offset = 0) && { + static_assert(Size > 1 || Stride == 1, + "Stride must be 1 in single-element region"); + Derived &&Val = std::move(cast_this_to_derived()); + return __esimd_rdregion(Val.data(), + Offset); + } + + /// Read single element, return value only (not reference). + Ty operator[](int i) const { return data()[i]; } + + /// Read single element, return value only (not reference). + __SYCL_DEPRECATED("use operator[] form.") + Ty operator()(int i) const { return data()[i]; } + + /// Return writable view of a single element. + simd_view> operator[](int i) + [[clang::lifetimebound]] { + return select<1, 1>(i); + } + + /// Return writable view of a single element. + __SYCL_DEPRECATED("use operator[] form.") + simd_view> operator()(int i) { + return select<1, 1>(i); + } + + // TODO ESIMD_EXPERIMENTAL + /// Read multiple elements by their indices in vector + template + resize_a_simd_type_t + iselect(const simd &Indices) { + vector_type_t Offsets = Indices.data() * sizeof(Ty); + return __esimd_rdindirect(data(), Offsets); + } + // TODO ESIMD_EXPERIMENTAL + /// update single element + void iupdate(ushort Index, Ty V) { + auto Val = data(); + Val[Index] = V; + set(Val); + } + + // TODO ESIMD_EXPERIMENTAL + /// update multiple elements by their indices in vector + template + void iupdate(const simd &Indices, + const resize_a_simd_type_t &Val, + const simd_mask &Mask) { + vector_type_t Offsets = Indices.data() * sizeof(Ty); + set(__esimd_wrindirect(data(), Val.data(), Offsets, + Mask.data())); + } + + /// \name Replicate + /// Replicate simd_obj_impl instance given a region. + /// @{ + /// + + /// \tparam Rep is number of times region has to be replicated. + /// \return replicated simd_obj_impl instance. + template resize_a_simd_type_t replicate() { + return replicate(0); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam W is width of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + __SYCL_DEPRECATED("use simd_obj_impl::replicate_w") + resize_a_simd_type_t replicate(uint16_t Offset) { + return replicate_w(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam W is width of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + resize_a_simd_type_t replicate_w(uint16_t Offset) { + return replicate_vs_w_hs(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W is width of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + __SYCL_DEPRECATED("use simd_obj_impl::replicate_vs_w") + resize_a_simd_type_t replicate(uint16_t Offset) { + return replicate_vs_w(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W width of src region to replicate. + /// \param Offset offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + resize_a_simd_type_t replicate_vs_w(uint16_t Offset) { + return replicate_vs_w_hs(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W is width of src region to replicate. + /// \tparam HS horizontal stride of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + __SYCL_DEPRECATED("use simd_obj_impl::replicate_vs_w_hs") + resize_a_simd_type_t replicate(uint16_t Offset) { + return replicate_vs_w_hs(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W is width of src region to replicate. + /// \tparam HS horizontal stride of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + resize_a_simd_type_t replicate_vs_w_hs(uint16_t Offset) { + return __esimd_rdregion(data(), + Offset * sizeof(Ty)); + } + ///@} + + /// Any operation. + /// + /// \return 1 if any element is set, 0 otherwise. + template ::value>> + uint16_t any() { + return __esimd_any(data()); + } + + /// All operation. + /// + /// \return 1 if all elements are set, 0 otherwise. + template ::value>> + uint16_t all() { + return __esimd_all(data()); + } + + /// Write a simd_obj_impl-vector into a basic region of a simd_obj_impl + /// object. + template + ESIMD_INLINE void writeRegion( + RTy Region, + const vector_type_t &Val) { + using ElemTy = typename RTy::element_type; + if constexpr (N * sizeof(Ty) == RTy::length * sizeof(ElemTy)) + // update the entire vector + set(bitcast(Val)); + else { + static_assert(!RTy::Is_2D); + // If element type differs, do bitcast conversion first. + auto Base = bitcast(data()); + constexpr int BN = (N * sizeof(Ty)) / sizeof(ElemTy); + // Access the region information. + constexpr int M = RTy::Size_x; + constexpr int Stride = RTy::Stride_x; + uint16_t Offset = Region.M_offset_x * sizeof(ElemTy); + + // Merge and update. + auto Merged = __esimd_wrregion(Base, Val, Offset); + // Convert back to the original element type, if needed. + set(bitcast(Merged)); + } + } + + /// Write a simd_obj_impl-vector into a nested region of a simd_obj_impl + /// object. + template + ESIMD_INLINE void + writeRegion(std::pair Region, + const vector_type_t &Val) { + // parent-region type + using PaTy = typename shape_type::type; + using ElemTy = typename TR::element_type; + using BT = typename PaTy::element_type; + constexpr int BN = PaTy::length; + + if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) { + writeRegion(Region.second, bitcast(Val)); + } else { + // Recursively read the base + auto Base = readRegion(data(), Region.second); + // If element type differs, do bitcast conversion first. + auto Base1 = bitcast(Base); + constexpr int BN1 = PaTy::Size_in_bytes / sizeof(ElemTy); + + if constexpr (!TR::Is_2D) { + // Access the region information. + constexpr int M = TR::Size_x; + constexpr int Stride = TR::Stride_x; + uint16_t Offset = Region.first.M_offset_x * sizeof(ElemTy); + + // Merge and update. + Base1 = __esimd_wrregion(Base1, Val, Offset); + } else { + static_assert(std::is_same::value); + // Read columns with non-trivial horizontal stride. + constexpr int M = TR::length; + constexpr int VS = PaTy::Size_x * TR::Stride_y; + constexpr int W = TR::Size_x; + constexpr int HS = TR::Stride_x; + constexpr int ParentWidth = PaTy::Size_x; + + // Compute the byte offset for the starting element. + uint16_t Offset = static_cast( + (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) * + sizeof(ElemTy)); + + // Merge and update. + Base1 = __esimd_wrregion( + Base1, Val, Offset); + } + // Convert back to the original element type, if needed. + auto Merged1 = bitcast(Base1); + // recursively write it back to the base + writeRegion(Region.second, Merged1); + } + } + + /// @name Memory operations + /// TODO NOTE: These APIs do not support cache hint specification yet, as this + /// is WIP. Later addition of hints is not expected to break code using these + /// APIs. + /// + /// @{ + + /// Copy a contiguous block of data from memory into this simd_obj_impl + /// object. The amount of memory copied equals the total size of vector + /// elements in this object. + /// @param addr the memory address to copy from. Must be a pointer to the + /// global address space, otherwise behavior is undefined. + ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION; + + /// Copy a contiguous block of data from memory into this simd_obj_impl + /// object. The amount of memory copied equals the total size of vector + /// elements in this object. Source memory location is represented via a + /// global accessor and offset. + /// @param acc accessor to copy from. + /// @param offset offset to copy from. + template + ESIMD_INLINE EnableIfAccessor + copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; + + /// Copy all vector elements of this object into a contiguous block in memory. + /// @param addr the memory address to copy to. Must be a pointer to the + /// global address space, otherwise behavior is undefined. + ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION; + + /// Copy all vector elements of this object into a contiguous block in memory. + /// Destination memory location is represented via a global accessor and + /// offset. + /// @param acc accessor to copy from. + /// @param offset offset to copy from. + template + ESIMD_INLINE EnableIfAccessor + copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; + + /// @} // Memory operations + + /// Bitwise inversion, available in all subclasses. + template Derived operator~() { + static_assert(std::is_integral_v, "'~' applies only to integral types"); + return Derived(~M_data); + } + +#define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \ + \ + /* OPASSIGN simd_obj_impl */ \ + template == \ + is_simd_type_v)&&COND>> \ + Derived &operator OPASSIGN( \ + const __SEIEED::simd_obj_impl &RHS) { \ + auto Res = *this BINOP RHS; \ + set(__SEIEED::convert(Res.data())); \ + return cast_this_to_derived(); \ + } \ + \ + /* OPASSIGN simd_view */ \ + template == \ + is_simd_type_v)&&(RegionT1::length == length) && \ + COND>> \ + Derived &operator OPASSIGN( \ + const __SEIEE::simd_view &RHS) { \ + auto Res = *this BINOP RHS.read(); \ + set(__SEIEED::convert(Res.data())); \ + return cast_this_to_derived(); \ + } \ + \ + /* OPASSIGN SCALAR */ \ + template > \ + Derived &operator OPASSIGN(T1 RHS) { \ + if constexpr (is_simd_type_v) { \ + using RHSVecT = __SEIEED::construct_a_simd_type_t; \ + return *this OPASSIGN RHSVecT(RHS); \ + } else { \ + return *this OPASSIGN Derived((Ty)RHS); \ + } \ + } + +// Bitwise operations are defined for simd objects and masks, and both operands +// must be integral +#define __ESIMD_BITWISE_OP_FILTER \ + std::is_integral_v &&std::is_integral_v + + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(^, ^=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(|, |=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(&, &=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(%, %=, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILTER + +// Bit shift operations are defined only for simd objects (not for masks), and +// both operands must be integral +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v \ + &&__SEIEED::is_simd_type_v + + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(<<, <<=, __ESIMD_SHIFT_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(>>, >>=, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +// Arithmetic operations are defined only for simd objects, and the second +// operand's element type must be vectorizable. This requirement for 'this' +// is fulfilled, because otherwise 'this' couldn't have been constructed. +#define __ESIMD_ARITH_OP_FILTER \ + __SEIEED::is_simd_type_v &&__SEIEED::is_vectorizable_v + + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(+, +=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(-, -=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(*, *=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(/, /=, __ESIMD_ARITH_OP_FILTER) +#undef __ESIMD_ARITH_OP_FILTER + +private: + // The underlying data for this vector. + vector_type M_data; + +protected: + void set(const vector_type &Val) { +#ifndef __SYCL_DEVICE_ONLY__ + M_data = Val; +#else + __esimd_vstore(&M_data, Val); +#endif + } +}; + +// ----------- Outlined implementations of simd_obj_impl class APIs. + +template +void simd_obj_impl::copy_from(const T *const Addr) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); + + uintptr_t AddrVal = reinterpret_cast(Addr); + *this = + __esimd_flat_block_read_unaligned( + AddrVal); +} + +template +template +ESIMD_INLINE EnableIfAccessor +simd_obj_impl::copy_from(AccessorT acc, uint32_t offset) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); +#if defined(__SYCL_DEVICE_ONLY__) + auto surf_ind = AccessorPrivateProxy::getNativeImageObj(acc); + *this = __esimd_block_read(surf_ind, offset); +#else + *this = __esimd_block_read(acc, offset); +#endif // __SYCL_DEVICE_ONLY__ +} + +template +void simd_obj_impl::copy_to(T *addr) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); + + uintptr_t AddrVal = reinterpret_cast(addr); + __esimd_flat_block_write(AddrVal, + data()); +} + +template +template +ESIMD_INLINE EnableIfAccessor +simd_obj_impl::copy_to(AccessorT acc, uint32_t offset) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); + +#if defined(__SYCL_DEVICE_ONLY__) + auto surf_ind = AccessorPrivateProxy::getNativeImageObj(acc); + __esimd_block_write(surf_ind, offset >> 4, data()); +#else + __esimd_block_write(acc, offset >> 4, data()); +#endif // __SYCL_DEVICE_ONLY__ +} +} // namespace detail + +} // namespace esimd +} // namespace experimental +} // namespace intel +} // namespace ext +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp index 56c4e626118b8..da1f3f5d901f5 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp @@ -25,23 +25,22 @@ namespace detail { /// It is an internal class implementing basic functionality of simd_view. /// /// \ingroup sycl_esimd -template -class simd_view_impl { +template class simd_view_impl { + using Derived = simd_view; + template friend class simd_obj_impl; template friend class simd; - template friend class simd_view_impl; + template friend class simd_view_impl; + template friend class simd_mask_impl; public: - static_assert(!detail::is_simd_view_v::value); + static_assert(is_simd_obj_impl_derivative_v); // Deduce the corresponding value type from its region type. using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; - /// The simd type if reading the object. - using value_type = simd; - - /// The underlying builtin value type - using vector_type = - detail::vector_type_t; + using base_type = BaseTy; + template + using get_simd_t = construct_a_simd_type_t; /// The region type of this class. using region_type = RegionTy; @@ -50,43 +49,42 @@ class simd_view_impl { /// type of the base object type. using element_type = typename ShapeTy::element_type; - /// @{ - /// Constructors. + /// The simd type if reading the object. + using value_type = get_simd_t; + + /// The underlying builtin vector type backing the value read from the object. + using vector_type = vector_type_t; private: Derived &cast_this_to_derived() { return reinterpret_cast(*this); } protected: + /// @{ + /// Constructors. simd_view_impl(BaseTy &Base, RegionTy Region) : M_base(Base), M_region(Region) {} simd_view_impl(BaseTy &&Base, RegionTy Region) : M_base(Base), M_region(Region) {} - + /// @} public: // Default copy and move constructors. simd_view_impl(const simd_view_impl &Other) = default; simd_view_impl(simd_view_impl &&Other) = default; - /// @} - /// Conversion to simd type. - template operator simd() const { - if constexpr (std::is_same::value) + /// Implicit conversion to simd type. + template >> + inline operator simd() const { + if constexpr (std::is_same_v) return read(); else return convert(read()); } - /// @{ - /// Assignment operators. - simd_view_impl &operator=(const simd_view_impl &Other) { - return write(Other.read()); - } - simd_view_impl &operator=(const value_type &Val) { return write(Val); } - /// @} - - /// Move assignment operator. - simd_view_impl &operator=(simd_view_impl &&Other) { - return write(Other.read()); + /// Implicit conversion to simd_mask_impl type, if element type is compatible. + template >> + inline operator simd_mask_type() const { + return read(); } /// @{ @@ -97,9 +95,11 @@ class simd_view_impl { static constexpr int getStrideX() { return ShapeTy::Stride_x; } static constexpr int getSizeY() { return ShapeTy::Size_y; } static constexpr int getStrideY() { return ShapeTy::Stride_y; } + constexpr uint16_t getOffsetX() const { return getTopRegion(M_region).M_offset_x; } + constexpr uint16_t getOffsetY() const { return getTopRegion(M_region).M_offset_y; } @@ -109,9 +109,11 @@ class simd_view_impl { value_type read() const { using BT = typename BaseTy::element_type; constexpr int BN = BaseTy::length; - return detail::readRegion(M_base.data(), M_region); + return value_type{readRegion(M_base.data(), M_region)}; } + typename value_type::vector_type data() const { return read().data(); } + /// Write to this object. Derived &write(const value_type &Val) { M_base.writeRegion(M_region, Val.data()); @@ -120,12 +122,12 @@ class simd_view_impl { /// @{ /// Whole region update with predicates. - void merge(const value_type &Val, const mask_type_t &Mask) { + void merge(const value_type &Val, const simd_mask_type &Mask) { merge(Val, read(), Mask); } void merge(const value_type &Val1, value_type Val2, - const mask_type_t &Mask) { + const simd_mask_type &Mask) { Val2.merge(Val1, Mask); write(Val2.read()); } @@ -200,69 +202,120 @@ class simd_view_impl { TopRegionTy TopReg(OffsetY, OffsetX); return RetTy{this->M_base, std::make_pair(TopReg, M_region)}; } - -#define DEF_BINOP(BINOP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BINOP(const Derived &X, \ - const Derived &Y) { \ - return (X BINOP Y.read()); \ +#define __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \ + \ + /* OPASSIGN simd_obj_impl */ \ + template == \ + is_simd_type_v)&&(N1 == length) && \ + COND>> \ + Derived &operator OPASSIGN(const simd_obj_impl &RHS) { \ + auto Res = read() BINOP RHS; \ + write(Res); \ + return cast_this_to_derived(); \ } \ - Derived &operator OPASSIGN(const value_type &RHS) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(read().data()); \ - auto V1 = detail::convert(RHS.data()); \ - auto V2 = V0 BINOP V1; \ - auto V3 = detail::convert(V2); \ - write(V3); \ + \ + /* OPASSIGN simd_view_impl */ \ + template ::element_type, \ + class T = element_type, class SimdT = BaseTy, \ + class = std::enable_if_t< \ + (is_simd_type_v == is_simd_type_v)&&( \ + length == __SEIEE::shape_type::length) && \ + COND>> \ + Derived &operator OPASSIGN(const simd_view_impl &RHS) { \ + *this OPASSIGN RHS.read(); \ return cast_this_to_derived(); \ } \ - Derived &operator OPASSIGN(const Derived &RHS) { \ - return (*this OPASSIGN RHS.read()); \ + \ + /* OPASSIGN scalar */ \ + template > \ + Derived &operator OPASSIGN(T1 RHS) { \ + auto Res = read() BINOP RHS; \ + write(Res); \ + return cast_this_to_derived(); \ } - DEF_BINOP(+, +=) - DEF_BINOP(-, -=) - DEF_BINOP(*, *=) - DEF_BINOP(/, /=) - DEF_BINOP(%, %=) +#define __ESIMD_BITWISE_OP_FILTER std::is_integral_v &&std::is_integral_v + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(^, ^=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(|, |=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(&, &=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(%, %=, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILTER -#undef DEF_BINOP +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v &&is_simd_type_v -#define DEF_BITWISE_OP(BITWISE_OP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BITWISE_OP(const Derived &X, \ - const Derived &Y) { \ - return (X BITWISE_OP Y.read()); \ - } \ - Derived &operator OPASSIGN(const value_type &RHS) { \ - static_assert(std::is_integral(), "not integeral type"); \ - auto V2 = read().data() BITWISE_OP RHS.data(); \ - auto V3 = detail::convert(V2); \ - write(V3); \ - return cast_this_to_derived(); \ - } \ - Derived &operator OPASSIGN(const Derived &RHS) { \ - return (*this OPASSIGN RHS.read()); \ - } - DEF_BITWISE_OP(&, &=) - DEF_BITWISE_OP(|, |=) - DEF_BITWISE_OP(^, ^=) - DEF_BITWISE_OP(>>, >>=) - DEF_BITWISE_OP(<<, <<=) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(<<, <<=, __ESIMD_SHIFT_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(>>, >>=, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +#define __ESIMD_ARITH_OP_FILTER \ + is_vectorizable_v &&is_vectorizable_v &&is_simd_type_v -#undef DEF_BITWISE_OP + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(+, +=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(-, -=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(*, *=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(/, /=, __ESIMD_ARITH_OP_FILTER) -#define DEF_UNARY_OP(UNARY_OP) \ +#undef __ESIMD_ARITH_OP_FILTER +#undef __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN + +#define __ESIMD_DEF_UNARY_OP(UNARY_OP, COND) \ + template > \ auto operator UNARY_OP() { \ auto V = UNARY_OP(read().data()); \ - return simd(V); \ + return get_simd_t(V); \ + } + __ESIMD_DEF_UNARY_OP(~, std::is_integral_v &&is_simd_type_v) + __ESIMD_DEF_UNARY_OP(+, is_simd_type_v) + __ESIMD_DEF_UNARY_OP(-, is_simd_type_v) + +#undef __ESIMD_DEF_UNARY_OP + + template >> + auto operator!() { + auto V = read().data() == 0; + return get_simd_t(V); + } + + /// @{ + /// Assignment operators. + Derived &operator=(const Derived &Other) { return write(Other.read()); } + + Derived &operator=(const value_type &Val) { return write(Val); } + + /// Move assignment operator. + Derived &operator=(Derived &&Other) { return write(Other.read()); } + + template == + is_simd_type_v)&&(length == + SimdT::length)>> + Derived &operator=(const simd_obj_impl &Other) { + return write(convert(reinterpret_cast(Other))); } - DEF_UNARY_OP(~) - DEF_UNARY_OP(+) - DEF_UNARY_OP(-) -#undef DEF_UNARY_OP + template == is_simd_type_v)&&( + __SEIEE::shape_type::length == + __SEIEE::shape_type::length)>> + Derived &operator=(const simd_view &Other) { + return write(convert(Other.read())); + } - // negation operator - auto operator!() { return cast_this_to_derived() == 0; } + template >> + Derived &operator=(T1 RHS) { + return write(value_type((element_type)RHS)); + } + + /// @} // Operator ++, -- Derived &operator++() { @@ -334,12 +387,12 @@ class simd_view_impl { } /// \name Replicate - /// Replicate simd instance given a simd_view + /// Replicate simd instance given a simd_view_impl /// @{ /// /// \tparam Rep is number of times region has to be replicated. - template simd replicate() { + template get_simd_t replicate() { return read().replicate(0); } @@ -348,7 +401,7 @@ class simd_view_impl { /// \param OffsetX is column offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetX) { return replicate(0, OffsetX); } @@ -358,7 +411,8 @@ class simd_view_impl { /// \param OffsetY is row offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetY, uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetY, + uint16_t OffsetX) { return replicate(OffsetY, OffsetX); } @@ -368,7 +422,7 @@ class simd_view_impl { /// \param OffsetX is column offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetX) { return replicate(0, OffsetX); } @@ -379,7 +433,8 @@ class simd_view_impl { /// \param OffsetY is row offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetY, uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetY, + uint16_t OffsetX) { return replicate(OffsetY, OffsetX); } @@ -390,7 +445,7 @@ class simd_view_impl { /// \param OffsetX is column offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetX) { return read().template replicate(OffsetX); } @@ -402,29 +457,28 @@ class simd_view_impl { /// \param OffsetY is row offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetY, uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetY, + uint16_t OffsetX) { constexpr int RowSize = is2D() ? getSizeX() : 0; return read().template replicate(OffsetY * RowSize + OffsetX); } /// @} - /// Any operation. + /// 'any' operation. /// /// \return 1 if any element is set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = BaseTy, - typename = sycl::detail::enable_if_t::value, T2>> + template ::value, T2>> uint16_t any() { return read().any(); } - /// All operation. + /// 'all' operation. /// /// \return 1 if all elements are set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = BaseTy, - typename = sycl::detail::enable_if_t::value, T2>> + template ::value, T2>> uint16_t all() { return read().all(); } diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp index e14d2e9c5306e..d5096e5f00e24 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp @@ -16,6 +16,12 @@ #include #include +#if defined(__ESIMD_DBG_HOST) && !defined(__SYCL_DEVICE_ONLY__) +#define __esimd_dbg_print(a) std::cout << ">>> " << #a << "\n" +#else +#define __esimd_dbg_print(a) +#endif // defined(__ESIMD_DBG_HOST) && !defined(__SYCL_DEVICE_ONLY__) + #include __SYCL_INLINE_NAMESPACE(cl) { @@ -25,14 +31,139 @@ namespace intel { namespace experimental { namespace esimd { -// simd and simd_view forward declarations +// simd and simd_view_impl forward declarations template class simd; template class simd_view; namespace detail { -namespace csd = cl::sycl::detail; +// forward declarations of major internal simd classes +template class simd_mask_impl; +template +class simd_obj_impl; + +// @{ +// Helpers for major simd classes, which don't require their definitions to +// compile. Error checking/SFINAE is not used as these are only used internally. + +using simd_mask_elem_type = unsigned short; +template using simd_mask_type = simd_mask_impl; + +// @{ +// Checks if given type T is a raw clang vector type, plus provides some info +// about it if it is. + +struct invalid_element_type; + +template struct is_clang_vector_type : std::false_type { + static inline constexpr int length = 0; + using element_type = invalid_element_type; +}; + +template +struct is_clang_vector_type + : std::true_type { + static inline constexpr int length = N; + using element_type = T; +}; +template +static inline constexpr bool is_clang_vector_type_v = + is_clang_vector_type::value; + +// @} + +// @{ +// Checks if given type T derives from simd_obj_impl or is equal to it. +template +struct is_simd_obj_impl_derivative : public std::false_type { + using element_type = invalid_element_type; +}; + +// Specialization for the simd_obj_impl type itself. +template +struct is_simd_obj_impl_derivative> + : public std::true_type { + using element_type = ElT; +}; + +// Specialization for all other types. +template class Derived> +struct is_simd_obj_impl_derivative> + : public std::conditional_t< + std::is_base_of_v>, + Derived>, + std::true_type, std::false_type> { + using element_type = std::conditional_t< + std::is_base_of_v>, + Derived>, + ElT, void>; +}; + +// Convenience shortcut. +template +inline constexpr bool is_simd_obj_impl_derivative_v = + is_simd_obj_impl_derivative::value; +// @} + +// @{ +// "Resizes" given simd type \c T to given number of elements \c N. +template struct resize_a_simd_type; + +// Specialization for the simd_obj_impl type. +template class SimdT> +struct resize_a_simd_type>, Ndst> { + using type = simd_obj_impl>; +}; + +// Specialization for the simd_obj_impl type derivatives. +template class SimdT> +struct resize_a_simd_type, Ndst> { + using type = SimdT; +}; + +// Convenience shortcut. +template +using resize_a_simd_type_t = typename resize_a_simd_type::type; +// @} + +// @{ +// Converts element type of given simd type \c SimdT to +// given scalar type \c DstElemT. +template struct convert_simd_elem_type; + +// Specialization for the simd_obj_impl type. +template class SimdT> +struct convert_simd_elem_type>, + DstElemT> { + using type = simd_obj_impl>; +}; + +// Specialization for the simd_obj_impl type derivatives. +template class SimdT> +struct convert_simd_elem_type, DstElemT> { + using type = SimdT; +}; + +// Convenience shortcut. +template +using convert_simd_elem_type_t = + typename convert_simd_elem_type::type; + +// @} + +// Constructs a simd type with the same template type as in \c SimdT, and +// given element type and number. +template +using construct_a_simd_type_t = + convert_simd_elem_type_t, ElT>; +// @} + +namespace csd = cl::sycl::detail; using half = cl::sycl::detail::half_impl::StorageT; template @@ -52,22 +183,25 @@ struct is_esimd_arithmetic_type< decltype(std::declval() - std::declval()), decltype(std::declval() * std::declval()), decltype(std::declval() / std::declval())>> - : std::true_type {}; + : std::conditional_t, std::true_type, + std::false_type> {}; -// is_vectorizable_type template -struct is_vectorizable : public is_esimd_arithmetic_type {}; +static inline constexpr bool is_esimd_arithmetic_type_v = + is_esimd_arithmetic_type::value; -template <> struct is_vectorizable : public std::false_type {}; +// is_vectorizable_type +template +struct is_vectorizable : std::conditional_t, + std::true_type, std::false_type> {}; template -struct is_vectorizable_v - : std::integral_constant::value> {}; +static inline constexpr bool is_vectorizable_v = is_vectorizable::value; // vector_type, using clang vector type extension. template struct vector_type { static_assert(!std::is_const::value, "const element type not supported"); - static_assert(is_vectorizable_v::value, "element type not supported"); + static_assert(is_vectorizable_v, "element type not supported"); static_assert(N > 0, "zero-element vector not supported"); static constexpr int length = N; @@ -77,16 +211,28 @@ template struct vector_type { template using vector_type_t = typename vector_type::type; +// must match simd_mask::element_type +template +using simd_mask_storage_t = vector_type_t; + // Compute the simd_view type of a 1D format operation. template struct compute_format_type; -template -struct compute_format_type, EltTy> { +template struct compute_format_type_impl { static constexpr int Size = sizeof(Ty) * N / sizeof(EltTy); static constexpr int Stride = 1; using type = region1d_t; }; +template class SimdT> +struct compute_format_type, EltTy> + : compute_format_type_impl {}; + +template +struct compute_format_type, EltTy> + : compute_format_type_impl {}; + template struct compute_format_type, EltTy> { using ShapeTy = typename shape_type::type; @@ -103,7 +249,7 @@ template struct compute_format_type_2d; template -struct compute_format_type_2d, EltTy, Height, Width> { +struct compute_format_type_2d_impl { static constexpr int Prod = sizeof(Ty) * N / sizeof(EltTy); static_assert(Prod == Width * Height, "size mismatch"); @@ -114,6 +260,16 @@ struct compute_format_type_2d, EltTy, Height, Width> { using type = region2d_t; }; +template class SimdT> +struct compute_format_type_2d, EltTy, Height, Width> + : compute_format_type_2d_impl {}; + +template +struct compute_format_type_2d, EltTy, Height, Width> + : compute_format_type_2d_impl {}; + template struct compute_format_type_2d, EltTy, Height, @@ -133,49 +289,117 @@ template using compute_format_type_2d_t = typename compute_format_type_2d::type; -// Check if a type is simd_view type -template struct is_simd_view_type : std::false_type {}; +// @{ +// Checks if given type is a view of any simd type (simd or simd_mask). +template struct is_any_simd_view_type : std::false_type {}; template -struct is_simd_view_type> : std::true_type {}; +struct is_any_simd_view_type> : std::true_type {}; + +template +static inline constexpr bool is_any_simd_view_type_v = + is_any_simd_view_type::value; +// @} + +// @{ +// Check if a type is one of internal 'simd_xxx_impl' types exposing simd-like +// interfaces and behaving like a simd object type. template -struct is_simd_view_v - : std::integral_constant>::value> {}; +static inline constexpr bool is_simd_like_type_v = + is_any_simd_view_type_v || is_simd_obj_impl_derivative_v; +// @} -// Check if a type is simd or simd_view type +// @{ +// Checks if given type is a any of the user-visible simd types (simd or +// simd_mask). template struct is_simd_type : std::false_type {}; +template +struct is_simd_type> : std::true_type {}; +template +static inline constexpr bool is_simd_type_v = is_simd_type::value; -template -struct is_simd_type> : std::true_type {}; +template struct is_simd_mask_type : std::false_type {}; +template +struct is_simd_mask_type> + : std::true_type {}; +template +static inline constexpr bool is_simd_mask_type_v = is_simd_mask_type::value; +// @} -template -struct is_simd_type> : std::true_type {}; +// @{ +// Checks if given type is a view of the simd type. +template struct is_simd_view_type : std::false_type {}; + +template +struct is_simd_view_type> + : std::conditional_t, std::true_type, + std::false_type> {}; template -struct is_simd_v - : std::integral_constant>::value> {}; +static inline constexpr bool is_simd_view_type_v = is_simd_view_type::value; +// @} + +template +static inline constexpr bool is_simd_or_view_type_v = + is_simd_view_type_v || is_simd_type_v; + +// @{ +// Get the element type if it is a scalar, clang vector, simd or simd_view type. + +struct cant_deduce_element_type; -// Get the element type if it is a simd or simd_view type. -template struct element_type { using type = remove_cvref_t; }; -template struct element_type> { - using type = Ty; +template struct element_type { + using type = cant_deduce_element_type; }; -template -struct element_type> { - using type = typename RegionTy::element_type; + +template +struct element_type>> { + using type = remove_cvref_t; +}; + +template +struct element_type>> { + using type = typename T::element_type; +}; + +template +struct element_type>> { + using type = typename is_clang_vector_type::element_type; }; -// Get the common type of a binary operator. -template ::value && is_simd_v::value>> -struct common_type { +// @} + +// @{ +// Get computation type of a binary operator given its operand types: +// - if both types are arithmetic - return CPP's "common real type" of the +// computation (matches C++) +// - if both types are simd types, they must be of the same length N, +// and the returned type is simd, where N is the "common real type" of +// the element type of the operands (diverges from clang) +// - otherwise, one type is simd and another is arithmetic - the simd type is +// returned (matches clang) + +struct invalid_computation_type; + +template struct computation_type { + using type = invalid_computation_type; +}; + +template +struct computation_type< + T1, T2, std::enable_if_t && is_vectorizable_v>> { + using type = decltype(std::declval() + std::declval()); +}; + +template +struct computation_type< + T1, T2, + std::enable_if_t && is_simd_like_type_v>> { private: using Ty1 = typename element_type::type; using Ty2 = typename element_type::type; - using EltTy = decltype(Ty1() + Ty2()); + using EltTy = typename computation_type::type; static constexpr int N1 = T1::length; static constexpr int N2 = T2::length; static_assert(N1 == N2, "size mismatch"); @@ -184,20 +408,18 @@ struct common_type { using type = simd; }; -template -using compute_type_t = - typename common_type, remove_cvref_t>::type; +template +using computation_type_t = + typename computation_type, remove_cvref_t>::type; + +// @} -template To convert(From Val) { +template +std::enable_if_t && is_clang_vector_type_v, To> +convert(From Val) { return __builtin_convertvector(Val, To); } -/// Get the computation type. -template struct computation_type { - // Currently only arithmetic operations are needed. - typedef decltype(T1() + T2()) type; -}; - /// Base case for checking if a type U is one of the types. template constexpr bool is_type() { return false; } @@ -248,18 +470,11 @@ inline std::istream &operator>>(std::istream &I, half &rhs) { rhs = ValFloat; return I; } + } // namespace detail -// TODO @rolandschulz on May 21 -// {quote} -// - The mask should also be a wrapper around the clang - vector type rather -// than the clang - vector type itself. -// - The internal storage should be implementation defined.uint16_t is a bad -// choice for some HW.Nor is it how clang - vector types works(using the same -// size int as the corresponding vector type used for comparison(e.g. long for -// double and int for float)). -template -using mask_type_t = typename detail::vector_type::type; +// Alias for backward compatibility. +template using mask_type_t = detail::simd_mask_storage_t; } // namespace esimd } // namespace experimental diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp index c462f839e6ac5..021452042f097 100755 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp @@ -15,6 +15,10 @@ #include +#define __SEIEED sycl::ext::intel::experimental::esimd::detail +#define __SEIEE sycl::ext::intel::experimental::esimd +#define __SEIEEED sycl::ext::intel::experimental::esimd::emu::detail + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace ext { diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp index 5f87f1b4595c7..dc2fba53bba9e 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp @@ -13,9 +13,11 @@ #include #include #include +#include #include #include #include +#include #include @@ -61,7 +63,7 @@ namespace detail { template ESIMD_NODEBUG ESIMD_INLINE simd __esimd_abs_common_internal(simd src0, int flag = saturation_off) { - simd Result = __esimd_abs(src0.data()); + simd Result = simd{__esimd_abs(src0.data())}; if (flag != saturation_on) return Result; @@ -96,7 +98,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< typename sycl::detail::remove_const_t>::value, simd> esimd_abs(simd src0, int flag = saturation_off) { - return detail::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0.data(), flag); } /// Get absolute value (scalar version) @@ -129,7 +131,7 @@ esimd_abs(T1 src0, int flag = saturation_off) { template ESIMD_NODEBUG ESIMD_INLINE simd esimd_abs(simd src0, int flag = saturation_off) { - return detail::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0.data(), flag); } /// Get absolute value (scalar version). This is a specialization of a version @@ -165,8 +167,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_shl(simd src0, U src1, int flag = saturation_off) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; @@ -213,7 +214,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_shl(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_shl(Src0, Src1, flag); @@ -237,8 +238,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_shr(simd src0, U src1, int flag = saturation_off) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; typename detail::simd_type::type Result = @@ -266,7 +266,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_shr(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_shr(Src0, Src1, flag); @@ -285,7 +285,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, simd> esimd_rol(simd src0, simd src1) { - return __esimd_rol(src0, src1); + return __esimd_rol(src0.data(), src1.data()); } /// Rotate left operation with a vector and a scalar inputs @@ -303,8 +303,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_rol(simd src0, U src1) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; return __esimd_rol(Src0.data(), Src1.data()); @@ -324,7 +323,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_rol(T1 src0, T2 src1) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_rol(Src0, Src1); @@ -343,7 +342,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, simd> esimd_ror(simd src0, simd src1) { - return __esimd_ror(src0, src1); + return __esimd_ror(src0.data(), src1.data()); } /// Rotate right operation with a vector and a scalar inputs @@ -361,8 +360,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_ror(simd src0, U src1) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; return __esimd_ror(Src0.data(), Src1.data()); @@ -382,7 +380,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_ror(T1 src0, T2 src1) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_ror(Src0, Src1); @@ -406,7 +404,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_lsr(simd src0, U src1, int flag = saturation_off) { - typedef typename detail::computation_type::type IntermedTy; + using IntermedTy = detail::computation_type_t; typedef typename std::make_unsigned::type ComputationTy; simd Src0 = src0; simd Result = Src0.data() >> src1.data(); @@ -434,7 +432,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_lsr(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_lsr(Src0, Src1, flag); @@ -458,7 +456,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_asr(simd src0, U src1, int flag = saturation_off) { - typedef typename detail::computation_type::type IntermedTy; + using IntermedTy = detail::computation_type_t; typedef typename std::make_signed::type ComputationTy; simd Src0 = src0; simd Result = Src0 >> src1; @@ -486,7 +484,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_asr(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_asr(Src0, Src1, flag); @@ -503,8 +501,7 @@ ESIMD_NODEBUG ESIMD_INLINE detail::is_dword_type::value, simd> esimd_imul(simd &rmd, simd src0, U src1) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; rmd = Src0 * Src1; @@ -524,8 +521,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< detail::is_dword_type::value && SZ == 1, simd> esimd_imul(simd &rmd, simd src0, U src1) { - typedef typename detail::computation_type::type - ComputationTy; + using ComputationTy = detail::computation_type_t; ComputationTy Product = convert(src0); Product *= src1; rmd = Product.bit_cast_view().select<1, 1>[0]; @@ -538,8 +534,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< detail::is_dword_type::value && SZ != 1, simd> esimd_imul(simd &rmd, simd src0, U src1) { - typedef typename detail::computation_type::type - ComputationTy; + using ComputationTy = detail::computation_type_t; ComputationTy Product = convert(src0); Product *= src1; rmd = Product.bit_cast_view().select(0); @@ -697,15 +692,18 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_max(simd src0, simd src1, int flag = saturation_off) { if constexpr (std::is_floating_point::value) { auto Result = __esimd_fmax(src0.data(), src1.data()); - return (flag == saturation_off) ? Result : __esimd_satf(Result); + Result = (flag == saturation_off) ? Result : __esimd_satf(Result); + return simd{Result}; } else if constexpr (std::is_unsigned::value) { auto Result = __esimd_umax(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_uutrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_uutrunc_sat(Result); + return simd{Result}; } else { auto Result = __esimd_smax(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_sstrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_sstrunc_sat(Result); + return simd{Result}; } } @@ -781,15 +779,18 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_min(simd src0, simd src1, int flag = saturation_off) { if constexpr (std::is_floating_point::value) { auto Result = __esimd_fmin(src0.data(), src1.data()); - return (flag == saturation_off) ? Result : __esimd_satf(Result); + Result = (flag == saturation_off) ? Result : __esimd_satf(Result); + return simd{Result}; } else if constexpr (std::is_unsigned::value) { auto Result = __esimd_umin(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_uutrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_uutrunc_sat(Result); + return simd{Result}; } else { auto Result = __esimd_smin(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_sstrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_sstrunc_sat(Result); + return simd{Result}; } } @@ -1207,7 +1208,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE simd esimd_frc(simd src0) { simd Src0 = src0; - return __esimd_frc(Src0); + return __esimd_frc(Src0.data()); } /// Performs truncate-to-minus-infinity fraction operation of \p src0. @@ -1227,7 +1228,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_lzd(simd src0, int flag = saturation_off) { // Saturation parameter ignored simd Src0 = src0; - return __esimd_lzd(Src0); + return __esimd_lzd(Src0.data()); } template @@ -1251,7 +1252,8 @@ esimd_lrp(simd src0, U src1, V src2, int flag = saturation_off) { "vector size must be a multiple of 4"); simd Src1 = src1; simd Src2 = src2; - simd Result = __esimd_lrp(src0, Src1, Src2); + simd Result = + __esimd_lrp(src0.data(), Src1.data(), Src2.data()); if (flag != saturation_on) return Result; @@ -1309,7 +1311,7 @@ esimd_pln(simd src0, simd src1, simd src2, Src12.select<(SZ >> 3), 1, 8, 1>(0, 8) = src2.template bit_cast_view> 3), 8>(); - simd Result = __esimd_pln(src0, Src12.read()); + simd Result = __esimd_pln(src0.data(), Src12.read().data()); if (flag != saturation_on) return Result; @@ -1321,7 +1323,7 @@ esimd_pln(simd src0, simd src1, simd src2, template ESIMD_NODEBUG ESIMD_INLINE simd esimd_bf_reverse(simd src0) { simd Src0 = src0; - return __esimd_bfrev(Src0); + return __esimd_bfrev(Src0.data()); } template @@ -1348,7 +1350,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd Src2 = src2; simd Src3 = src3; - return __esimd_bfins(Src0, Src1, Src2, Src3); + return __esimd_bfins(Src0.data(), Src1.data(), Src2.data(), Src3.data()); } template @@ -1374,7 +1376,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd Src1 = src1; simd Src2 = src2; - return __esimd_bfext(Src0, Src1, Src2); + return __esimd_bfext(Src0.data(), Src1.data(), Src2.data()); } template @@ -1505,8 +1507,8 @@ ESIMD_NODEBUG ESIMD_INLINE esimd_atan(simd src0, int flag = saturation_off) { simd Src0 = esimd_abs(src0); - simd Neg = src0 < T(0.0); - simd Gt1 = Src0 > T(1.0); + simd_mask Neg = src0 < T(0.0); + simd_mask Gt1 = Src0 > T(1.0); Src0.merge(esimd_inv(Src0), Gt1); @@ -1548,8 +1550,8 @@ ESIMD_NODEBUG ESIMD_INLINE esimd_acos(simd src0, int flag = saturation_off) { simd Src0 = esimd_abs(src0); - simd Neg = src0 < T(0.0); - simd TooBig = Src0 >= T(0.999998); + simd_mask Neg = src0 < T(0.0); + simd_mask TooBig = Src0 >= T(0.999998); // Replace oversized values to ensure no possibility of sqrt of // a negative value later @@ -1591,7 +1593,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t::value, simd> esimd_asin(simd src0, int flag = saturation_off) { - simd Neg = src0 < T(0.0); + simd_mask Neg = src0 < T(0.0); simd Result = T(ESIMD_HDR_CONST_PI / 2.0) - esimd_acos(esimd_abs(src0)); @@ -1644,14 +1646,14 @@ ESIMD_INTRINSIC_DEF(rndz) template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t<(N == 8 || N == 16 || N == 32), uint> - esimd_pack_mask(simd src0) { + esimd_pack_mask(simd_mask src0) { return __esimd_pack_mask(src0.data()); } template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t<(N == 8 || N == 16 || N == 32), - simd> + simd_mask> esimd_unpack_mask(uint src0) { return __esimd_unpack_mask(src0); } @@ -1659,8 +1661,8 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t<(N != 8 && N != 16 && N < 32), uint> - esimd_pack_mask(simd src0) { - simd src_0 = 0; + esimd_pack_mask(simd_mask src0) { + simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0; src_0.template select() = src0.template bit_cast_view(); return esimd_pack_mask(src_0); } @@ -1973,7 +1975,7 @@ ESIMD_INLINE simd esimd_atan2_fast(simd y, simd x, simd a1; simd atan2; - simd mask = (y >= 0.0f); + simd_mask mask = (y >= 0.0f); a0.merge(ESIMD_CONST_PI * 0.5f, ESIMD_CONST_PI * 1.5f, mask); a1.merge(0, ESIMD_CONST_PI * 2.0f, mask); @@ -2009,7 +2011,7 @@ ESIMD_INLINE simd esimd_atan2(simd y, simd x, simd v_distance; simd v_y0; simd atan2; - simd mask; + simd_mask mask; mask = (x < 0); v_y0.merge(ESIMD_CONST_PI, 0, mask); @@ -2027,10 +2029,10 @@ template <> ESIMD_INLINE float esimd_atan2(float y, float x, const uint flags) { float v_distance; float v_y0; simd atan2; - unsigned short mask; + simd_mask<1> mask; mask = (x < 0); - v_y0 = mask ? ESIMD_CONST_PI : 0; + v_y0 = mask[0] ? ESIMD_CONST_PI : 0; v_distance = esimd_sqrt(x * x + y * y); mask = (esimd_abs(y) < 0.000001f); atan2.merge(v_y0, (2 * esimd_atan((v_distance - x) / y)), mask); @@ -2345,11 +2347,11 @@ template struct esimd_apply_reduced_max { template simd operator()(simd v1, simd v2) { if constexpr (std::is_floating_point::value) { - return __esimd_reduced_fmax(v1, v2); + return __esimd_reduced_fmax(v1.data(), v2.data()); } else if constexpr (std::is_unsigned::value) { - return __esimd_reduced_umax(v1, v2); + return __esimd_reduced_umax(v1.data(), v2.data()); } else { - return __esimd_reduced_smax(v1, v2); + return __esimd_reduced_smax(v1.data(), v2.data()); } } }; @@ -2358,11 +2360,11 @@ template struct esimd_apply_reduced_min { template simd operator()(simd v1, simd v2) { if constexpr (std::is_floating_point::value) { - return __esimd_reduced_fmin(v1, v2); + return __esimd_reduced_fmin(v1.data(), v2.data()); } else if constexpr (std::is_unsigned::value) { - return __esimd_reduced_umin(v1, v2); + return __esimd_reduced_umin(v1.data(), v2.data()); } else { - return __esimd_reduced_smin(v1, v2); + return __esimd_reduced_smin(v1.data(), v2.data()); } } }; @@ -2426,7 +2428,7 @@ T0 esimd_reduce(simd v) { template ESIMD_INLINE ESIMD_NODEBUG T0 esimd_sum(simd v) { - using TT = compute_type_t>; + using TT = detail::computation_type_t>; using RT = typename TT::element_type; T0 retv = esimd_reduce(v); return retv; @@ -2434,7 +2436,7 @@ ESIMD_INLINE ESIMD_NODEBUG T0 esimd_sum(simd v) { template ESIMD_INLINE ESIMD_NODEBUG T0 esimd_prod(simd v) { - using TT = compute_type_t>; + using TT = detail::computation_type_t>; using RT = typename TT::element_type; T0 retv = esimd_reduce(v); return retv; @@ -2471,7 +2473,7 @@ ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd v, BinaryOperation op) { template simd esimd_dp4(simd v1, simd v2) { - auto retv = __esimd_dp4(v1, v2); + auto retv = __esimd_dp4(v1.data(), v2.data()); return retv; } diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 02cfd986a0410..606844a790027 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -76,7 +76,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< ((n == 8 || n == 16 || n == 32) && (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), simd> -gather(T *p, simd offsets, simd pred = 1) { +gather(T *p, simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); @@ -126,7 +126,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), void> scatter(T *p, simd vals, simd offsets, - simd pred = 1) { + simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); addrs = addrs + offsets_i; @@ -271,21 +271,21 @@ ESIMD_INLINE ESIMD_NODEBUG const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); const simd promo_vals = __esimd_surf_read(scale, surf_ind, glob_offset, offsets); + L3H>(scale, surf_ind, glob_offset, offsets.data()); #else const simd promo_vals = __esimd_surf_read( - scale, acc, glob_offset, offsets); + scale, acc, glob_offset, offsets.data()); #endif return convert(promo_vals); } else { #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); return __esimd_surf_read( - scale, surf_ind, glob_offset, offsets); + scale, surf_ind, glob_offset, offsets.data()); #else return __esimd_surf_read( - scale, acc, glob_offset, offsets); + scale, acc, glob_offset, offsets.data()); #endif } } @@ -317,7 +317,7 @@ ESIMD_INLINE ESIMD_NODEBUG !std::is_pointer::value, void> scatter(AccessorTy acc, simd vals, simd offsets, - uint32_t glob_offset = 0, simd pred = 1) { + uint32_t glob_offset = 0, simd_mask pred = simd_mask{1}) { constexpr int TypeSizeLog2 = detail::ElemsPerAddrEncoding(); // TODO (performance) use hardware-supported scale once BE supports it @@ -338,19 +338,21 @@ ESIMD_INLINE ESIMD_NODEBUG #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); __esimd_surf_write( - pred, scale, surf_ind, glob_offset, offsets, promo_vals); + pred.data(), scale, surf_ind, glob_offset, offsets.data(), + promo_vals.data()); #else __esimd_surf_write( - pred, scale, acc, glob_offset, offsets, promo_vals); + pred.data(), scale, acc, glob_offset, offsets.data(), + promo_vals.data()); #endif } else { #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); __esimd_surf_write( - pred, scale, surf_ind, glob_offset, offsets, vals); + pred.data(), scale, surf_ind, glob_offset, offsets.data(), vals.data()); #else __esimd_surf_write( - pred, scale, acc, glob_offset, offsets, vals); + pred.data(), scale, acc, glob_offset, offsets.data(), vals.data()); #endif } } @@ -388,7 +390,7 @@ template > -gather_rgba(T *p, simd offsets, simd pred = 1) { +gather_rgba(T *p, simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); @@ -407,8 +409,8 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< simd> gather4(T *p, simd offsets, - simd - pred = 1) { + simd_mask pred = + 1) { return gather_rgba(p, offsets, pred); } @@ -429,7 +431,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(N == 16 || N == 32) && (sizeof(T) == 4), void> scatter_rgba(T *p, simd vals, - simd offsets, simd pred = 1) { + simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); addrs = addrs + offsets_i; @@ -445,7 +447,7 @@ __SYCL_DEPRECATED("use scatter_rgba.") ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (n == 16 || n == 32) && (sizeof(T) == 4), void> scatter4(T *p, simd vals, - simd offsets, simd pred = 1) { + simd offsets, simd_mask pred = 1) { scatter_rgba(p, vals, offsets, pred); } @@ -555,7 +557,7 @@ template (), simd> - flat_atomic(T *p, simd offset, simd pred) { + flat_atomic(T *p, simd offset, simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -570,7 +572,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> flat_atomic(T *p, simd offset, simd src0, - simd pred) { + simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -586,7 +588,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> flat_atomic(T *p, simd offset, simd src0, - simd src1, simd pred) { + simd src1, simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -655,8 +657,8 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void slm_init(uint32_t size); template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 16 || n == 32), simd> - slm_load(simd offsets, simd pred = 1) { - return __esimd_slm_read(offsets.data(), pred.data()); + slm_load(simd offsets, simd_mask Pred = 1) { + return __esimd_slm_read(offsets.data(), Pred.data()); } /// SLM scatter. @@ -664,7 +666,7 @@ template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 16 || n == 32), void> slm_store(simd vals, simd offsets, - simd pred = 1) { + simd_mask pred = 1) { __esimd_slm_write(offsets.data(), vals.data(), pred.data()); } @@ -675,7 +677,7 @@ template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (n == 8 || n == 16 || n == 32) && (sizeof(T) == 4), simd> -slm_load4(simd offsets, simd pred = 1) { +slm_load4(simd offsets, simd_mask pred = 1) { return __esimd_slm_read4(offsets.data(), pred.data()); } @@ -684,7 +686,7 @@ template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (n == 8 || n == 16 || n == 32) && (sizeof(T) == 4), void> slm_store4(simd vals, - simd offsets, simd pred = 1) { + simd offsets, simd_mask pred = 1) { __esimd_slm_write4(offsets.data(), vals.data(), pred.data()); } @@ -727,7 +729,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> - slm_atomic(simd offsets, simd pred) { + slm_atomic(simd offsets, simd_mask pred) { return __esimd_slm_atomic0(offsets.data(), pred.data()); } @@ -736,8 +738,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> - slm_atomic(simd offsets, simd src0, - simd pred) { + slm_atomic(simd offsets, simd src0, simd_mask pred) { return __esimd_slm_atomic1(offsets.data(), src0.data(), pred.data()); } @@ -748,7 +749,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> slm_atomic(simd offsets, simd src0, simd src1, - simd pred) { + simd_mask pred) { return __esimd_slm_atomic2(offsets.data(), src0.data(), src1.data(), pred.data()); } @@ -830,14 +831,15 @@ media_block_store(AccessorTy acc, unsigned x, unsigned y, simd vals) { temp_ref.template select() = vals_ref; __esimd_media_block_store( 0, detail::AccessorPrivateProxy::getNativeImageObj(acc), plane, - sizeof(T) * n, x, y, temp); + sizeof(T) * n, x, y, temp.data()); } else { __esimd_media_block_store( 0, detail::AccessorPrivateProxy::getNativeImageObj(acc), plane, - sizeof(T) * n, x, y, vals); + sizeof(T) * n, x, y, vals.data()); } #else - __esimd_media_block_store(0, acc, plane, sizeof(T) * n, x, y, vals); + __esimd_media_block_store(0, acc, plane, sizeof(T) * n, x, y, + vals.data()); #endif // __SYCL_DEVICE_ONLY__ } @@ -902,7 +904,7 @@ esimd_raw_sends_load(simd msgDst, simd msgSrc0, simd msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT = 0, - uint8_t isSendc = 0, simd mask = 1) { + uint8_t isSendc = 0, simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -942,7 +944,7 @@ ESIMD_INLINE ESIMD_NODEBUG simd esimd_raw_send_load(simd msgDst, simd msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0, - uint8_t isSendc = 0, simd mask = 1) { + uint8_t isSendc = 0, simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -980,7 +982,7 @@ esimd_raw_sends_store(simd msgSrc0, simd msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0, uint8_t isSendc = 0, - simd mask = 1) { + simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -1014,7 +1016,7 @@ ESIMD_INLINE ESIMD_NODEBUG void esimd_raw_send_store(simd msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0, uint8_t isSendc = 0, - simd mask = 1) { + simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0"); diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp index 26abc25916ae0..70065d7142e58 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp @@ -10,12 +10,21 @@ #pragma once -#include +#include #include #include #include #include +#include + +#ifndef __SYCL_DEVICE_ONLY__ +#include +#endif // __SYCL_DEVICE_ONLY__ + +#define __ESIMD_MASK_DEPRECATION_MSG \ + "Use of 'simd' class to represent predicate or mask is deprecated. Use " \ + "'simd_mask' instead." __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -32,654 +41,178 @@ namespace esimd { /// read-update-write semantics. /// /// \ingroup sycl_esimd -template class simd { - template friend class simd_view; +template +class simd + : public detail::simd_obj_impl< + Ty, N, simd, std::enable_if_t>> { + using base_type = detail::simd_obj_impl>; public: - /// The underlying builtin data type. - using vector_type = detail::vector_type_t; - - /// The element type of this simd object. - using element_type = Ty; - - /// The number of elements in this simd object. + using base_type::base_type; + using element_type = typename base_type::element_type; + using vector_type = typename base_type::vector_type; static constexpr int length = N; - /// @{ - /// Constructors. - simd() = default; - simd(const simd &other) { set(other.data()); } - template simd(const simd &other) { - set(__builtin_convertvector(other.data(), detail::vector_type_t)); - } - template simd(simd &&other) { - if constexpr (std::is_same::value) - set(other.data()); - else - set(__builtin_convertvector(other.data(), detail::vector_type_t)); - } - simd(const vector_type &Val) { set(Val); } - - // TODO @rolandschulz - // {quote} - // Providing both an overload of initializer-list and the same type itself - // causes really weird behavior. E.g. - // simd s1(1,2); //calls next constructor - // simd s2{1,2}; //calls this constructor - // This might not be confusing for all users but to everyone using - // uniform-initialization syntax. Therefore if you want to use this - // constructor the other one should have a special type (see - // https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#es64-use-the-tenotation-for-construction) - // to avoid this issue. Also this seems like one of those areas where this - // simd-type needless differs from std::simd. Why should these constructors be - // different? Why reinvent the wheel and have all the work of fixing these - // problems if we could just use the existing solution. Especially if that is - // anyhow the long-term goal. Adding extra stuff like the select is totally - // fine. But differ on things which have no apparent advantage and aren't as - // thought through seems to have only downsides. - // {/quote} - - simd(std::initializer_list Ilist) noexcept { - int i = 0; - for (auto It = Ilist.begin(); It != Ilist.end() && i < N; ++It) { - M_data[i++] = *It; - } - } - - /// Initialize a simd with an initial value and step. - simd(Ty Val, Ty Step = Ty()) noexcept { - if (Step == Ty()) - M_data = Val; - else { -#pragma unroll - for (int i = 0; i < N; ++i) { - M_data[i] = Val; - Val += Step; - } - } - } - /// @} - - /// conversion operator - operator const vector_type &() const & { return M_data; } - operator vector_type &() & { return M_data; } - - /// Implicit conversion for simd into T. - template > - operator element_type() const { - return data()[0]; - } - - vector_type data() const { -#ifndef __SYCL_DEVICE_ONLY__ - return M_data; -#else - return __esimd_vload(&M_data); -#endif - } - - /// Whole region read. - simd read() const { return data(); } - - /// Whole region write. - simd &write(const simd &Val) { - set(Val.data()); - return *this; - } - - /// Whole region update with predicates. - void merge(const simd &Val, const mask_type_t &Mask) { - set(__esimd_wrregion( - data(), Val.data(), 0, Mask)); - } - void merge(const simd &Val1, simd Val2, const mask_type_t &Mask) { - Val2.merge(Val1, Mask); - set(Val2.data()); + // Implicit conversion constructor from another simd object of the same + // length. + template && + (length == SimdT::length)>> + simd(const SimdT &RHS) + : base_type(__builtin_convertvector(RHS.data(), vector_type)) { + __esimd_dbg_print(simd(const SimdT &RHS)); } - /// View this simd object in a different element type. - template auto bit_cast_view() &[[clang::lifetimebound]] { - using TopRegionTy = detail::compute_format_type_t; - using RetTy = simd_view; - TopRegionTy R(0); - return RetTy{*this, R}; + // Broadcast constructor with conversion. + template >> + simd(T1 Val) : base_type((Ty)Val) { + __esimd_dbg_print(simd(T1 Val)); } - template - __SYCL_DEPRECATED("use simd::bit_cast_view.") - auto format() & { - return bit_cast_view(); + /// Explicit conversion for simd_obj_impl into T. + template >> + operator To() const { + __esimd_dbg_print(explicit operator To()); + return (To)base_type::data()[0]; } - /// View as a 2-dimensional simd_view. - template - auto bit_cast_view() &[[clang::lifetimebound]] { - using TopRegionTy = - detail::compute_format_type_2d_t; - using RetTy = simd_view; - TopRegionTy R(0, 0); - return RetTy{*this, R}; - } - - template - __SYCL_DEPRECATED("use simd::bit_cast_view.") - auto format() & { - return bit_cast_view(); - } - - /// 1D region select, apply a region on top of this LValue object. - /// - /// \tparam Size is the number of elements to be selected. - /// \tparam Stride is the element distance between two consecutive elements. - /// \param Offset is the starting element offset. - /// \return the representing region object. - template - simd_view> select(uint16_t Offset = 0) &[ - [clang::lifetimebound]] { - region1d_t Reg(Offset); - return {*this, Reg}; - } - - /// 1D region select, apply a region on top of this RValue object. - /// - /// \tparam Size is the number of elements to be selected. - /// \tparam Stride is the element distance between two consecutive elements. - /// \param Offset is the starting element offset. - /// \return the value this region object refers to. - template - simd select(uint16_t Offset = 0) && { - simd &&Val = *this; - return __esimd_rdregion(Val.data(), - Offset); - } - - /// Read single element, return value only (not reference). - Ty operator[](int i) const { return data()[i]; } - - /// Read single element, return value only (not reference). - __SYCL_DEPRECATED("use operator[] form.") - Ty operator()(int i) const { return data()[i]; } - - /// Return writable view of a single element. - simd_view> operator[](int i) - [[clang::lifetimebound]] { - return select<1, 0>(i); - } - - /// Return writable view of a single element. - __SYCL_DEPRECATED("use operator[] form.") - simd_view> operator()(int i) { - return select<1, 0>(i); - } - - // TODO ESIMD_EXPERIMENTAL - /// Read multiple elements by their indices in vector - template - simd iselect(const simd &Indices) { - detail::vector_type_t Offsets = Indices.data() * sizeof(Ty); - return __esimd_rdindirect(data(), Offsets); - } - // TODO ESIMD_EXPERIMENTAL - /// update single element - void iupdate(ushort Index, Ty V) { - auto Val = data(); - Val[Index] = V; - set(Val); - } - // TODO ESIMD_EXPERIMENTAL - /// update multiple elements by their indices in vector - template - void iupdate(const simd &Indices, const simd &Val, - mask_type_t Mask) { - detail::vector_type_t Offsets = Indices.data() * sizeof(Ty); - set(__esimd_wrindirect(data(), Val.data(), Offsets, Mask)); - } - - // Use auto as a return type to honor C++ integer promotion rules, - // e.g. simd + simd -> simd -#define DEF_BINOP(BINOP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BINOP(const simd &X, const simd &Y) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(X.data()); \ - auto V1 = detail::convert(Y.data()); \ - auto V2 = V0 BINOP V1; \ - return ComputeTy(V2); \ - } \ - template >> \ - ESIMD_INLINE friend auto operator BINOP(const simd &X, T1 Y) { \ - return X BINOP simd((Ty)Y); \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const simd &RHS) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(LHS.data()); \ - auto V1 = detail::convert(RHS.data()); \ - auto V2 = V0 BINOP V1; \ - LHS.write(detail::convert(V2)); \ - return LHS; \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const Ty &RHS) { \ - LHS OPASSIGN simd(RHS); \ - return LHS; \ - } - - DEF_BINOP(+, +=) - DEF_BINOP(-, -=) - DEF_BINOP(*, *=) - DEF_BINOP(/, /=) - DEF_BINOP(%, %=) - -#undef DEF_BINOP - - // TODO @rolandschulz, @mattkretz - // Introduce simd_mask type and let user use this type instead of specific - // type representation (simd) to make it more portable - // TODO @iburyl should be mask_type_t, which might become more abstracted in - // the future revisions. - // -#define DEF_RELOP(RELOP) \ - ESIMD_INLINE friend simd operator RELOP(const simd &X, \ - const simd &Y) { \ - auto R = X.data() RELOP Y.data(); \ - mask_type_t M(1); \ - return M & detail::convert>(R); \ - } \ - template >> \ - ESIMD_INLINE friend bool operator RELOP(const simd &X, T1 Y) { \ - return (Ty)X RELOP(Ty) Y; \ - } - - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP - -#define DEF_BITWISE_OP(BITWISE_OP, OPASSIGN) \ - ESIMD_INLINE friend simd operator BITWISE_OP(const simd &X, const simd &Y) { \ - static_assert(std::is_integral(), "not integeral type"); \ - auto V2 = X.data() BITWISE_OP Y.data(); \ - return simd(V2); \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const simd &RHS) { \ - static_assert(std::is_integral(), "not integeral type"); \ - auto V2 = LHS.data() BITWISE_OP RHS.data(); \ - LHS.write(detail::convert(V2)); \ - return LHS; \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const Ty &RHS) { \ - LHS OPASSIGN simd(RHS); \ - return LHS; \ - } - - DEF_BITWISE_OP(&, &=) - DEF_BITWISE_OP(|, |=) - DEF_BITWISE_OP(^, ^=) - DEF_BITWISE_OP(<<, <<=) - DEF_BITWISE_OP(>>, >>=) - -#undef DEF_BITWISE_OP - - // Operator ++, -- + /// @{ + /// Infix and postfix operators ++, -- simd &operator++() { *this += 1; return *this; } + simd operator++(int) { - simd Ret(*this); + simd Ret(base_type::data()); operator++(); return Ret; } + simd &operator--() { *this -= 1; return *this; } + simd operator--(int) { - simd Ret(*this); + simd Ret(base_type::data()); operator--(); return Ret; } + /// @} -#define DEF_UNARY_OP(UNARY_OP) \ - simd operator UNARY_OP() { \ - auto V = UNARY_OP(data()); \ - return simd(V); \ +#define __ESIMD_DEF_SIMD_ARITH_UNARY_OP(ARITH_UNARY_OP) \ + template simd operator ARITH_UNARY_OP() { \ + static_assert(!std::is_unsigned_v, \ + #ARITH_UNARY_OP "doesn't apply to unsigned types"); \ + return simd(ARITH_UNARY_OP(base_type::data())); \ } - DEF_UNARY_OP(~) - DEF_UNARY_OP(+) - DEF_UNARY_OP(-) -#undef DEF_UNARY_OP + __ESIMD_DEF_SIMD_ARITH_UNARY_OP(-) + __ESIMD_DEF_SIMD_ARITH_UNARY_OP(+) +#undef __ESIMD_DEF_SIMD_ARITH_UNARY_OP +}; - // negation operator - auto operator!() { return *this == 0; } +/// Covert from a simd object with element type \c From to a simd object with +/// element type \c To. +template +ESIMD_INLINE simd convert(const simd &val) { + if constexpr (std::is_same_v) + return val; + else + return __builtin_convertvector(val.data(), detail::vector_type_t); +} - /// \name Replicate - /// Replicate simd instance given a region. - /// @{ - /// +namespace detail { +template +class simd_mask_impl + : public detail::simd_obj_impl< + T, N, simd_mask_impl, + std::enable_if_t>> { + using base_type = detail::simd_obj_impl>; - /// \tparam Rep is number of times region has to be replicated. - /// \return replicated simd instance. - template simd replicate() { - return replicate(0); - } +public: + using element_type = T; + using vector_type = typename base_type::vector_type; + static_assert(std::is_same_v> && + "mask impl type mismatch"); - /// \tparam Rep is number of times region has to be replicated. - /// \tparam W is width of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - __SYCL_DEPRECATED("use simd::replicate_w") - simd replicate(uint16_t Offset) { - return replicate_w(Offset); - } + simd_mask_impl() = default; + simd_mask_impl(const simd_mask_impl &other) : base_type(other) {} - /// \tparam Rep is number of times region has to be replicated. - /// \tparam W is width of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template simd replicate_w(uint16_t Offset) { - return replicate_vs_w_hs(Offset); - } + /// Broadcast constructor with conversion. + template >> + simd_mask_impl(T1 Val) : base_type((T)Val) {} - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W is width of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - __SYCL_DEPRECATED("use simd::replicate_vs_w") - simd replicate(uint16_t Offset) { - return replicate_vs_w(Offset); - } + /// Implicit conversion constructor from a raw vector object. + // TODO this should be made inaccessible from user code. + simd_mask_impl(const vector_type &Val) : base_type(Val) {} - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W width of src region to replicate. - /// \param Offset offset in number of elements in src region. - /// \return replicated simd instance. - template - simd replicate_vs_w(uint16_t Offset) { - return replicate_vs_w_hs(Offset); - } + /// Initializer list constructor. + __SYCL_DEPRECATED("use constructor from array, e.g: simd_mask<3> x({0,1,1});") + simd_mask_impl(std::initializer_list Ilist) : base_type(Ilist) {} - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W is width of src region to replicate. - /// \tparam HS horizontal stride of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - __SYCL_DEPRECATED("use simd::replicate_vs_w_hs") - simd replicate(uint16_t Offset) { - return replicate_vs_w_hs(Offset); + /// Construct from an array. To allow e.g. simd_mask m({1,0,0,1,...}). + template > + simd_mask_impl(const element_type(&&Arr)[N1]) { + base_type::template init_from_array(std::move(Arr)); } - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W is width of src region to replicate. - /// \tparam HS horizontal stride of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - simd replicate_vs_w_hs(uint16_t Offset) { - return __esimd_rdregion( - data(), Offset * sizeof(Ty)); - } - ///@} - - /// Any operation. - /// - /// \return 1 if any element is set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = Ty, - typename = sycl::detail::enable_if_t::value, T2>> - uint16_t any() { - return __esimd_any(data()); - } + /// Implicit conversion from simd. + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask_impl(const simd &Val) : base_type(Val.data()) {} - /// All operation. - /// - /// \return 1 if all elements are set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = Ty, - typename = sycl::detail::enable_if_t::value, T2>> - uint16_t all() { - return __esimd_all(data()); +private: + static inline constexpr bool mask_size_ok_for_mem_io() { + constexpr unsigned Sz = sizeof(element_type) * N; + return (Sz >= detail::OperandSize::OWORD) && + (Sz % detail::OperandSize::OWORD == 0) && + detail::isPowerOf2(Sz / detail::OperandSize::OWORD) && + (Sz <= 8 * detail::OperandSize::OWORD); } - /// Write a simd-vector into a basic region of a simd object. - template - ESIMD_INLINE void - writeRegion(RTy Region, - const detail::vector_type_t &Val) { - using ElemTy = typename RTy::element_type; - if constexpr (N * sizeof(Ty) == RTy::length * sizeof(ElemTy)) - // update the entire vector - set(detail::bitcast(Val)); - else { - static_assert(!RTy::Is_2D); - // If element type differs, do bitcast conversion first. - auto Base = detail::bitcast(data()); - constexpr int BN = (N * sizeof(Ty)) / sizeof(ElemTy); - // Access the region information. - constexpr int M = RTy::Size_x; - constexpr int Stride = RTy::Stride_x; - uint16_t Offset = Region.M_offset_x * sizeof(ElemTy); - - // Merge and update. - auto Merged = __esimd_wrregion(Base, Val, Offset); - // Convert back to the original element type, if needed. - set(detail::bitcast(Merged)); - } +public: + // TODO add accessor-based mask memory operations. + + /// Load constructor. + // Implementation note: use SFINAE to avoid overload ambiguity: + // 1) with 'simd_mask(element_type v)' in 'simd_mask m(0)' + // 2) with 'simd_mask(const T1(&&arr)[N])' in simd_mask + // m((element_type*)p)' + template >> + explicit simd_mask_impl(const T1 *ptr) { + base_type::copy_from(ptr); + } + + /// Broadcast assignment operator to support simd_mask_impl n = a > b; + simd_mask_impl &operator=(element_type val) noexcept { + base_type::set(val); + return *this; } - /// Write a simd-vector into a nested region of a simd object. - template - ESIMD_INLINE void writeRegion( - std::pair Region, - const detail::vector_type_t &Val) { - // parent-region type - using PaTy = typename shape_type::type; - using ElemTy = typename TR::element_type; - using BT = typename PaTy::element_type; - constexpr int BN = PaTy::length; - - if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) { - writeRegion(Region.second, detail::bitcast(Val)); - } else { - // Recursively read the base - auto Base = detail::readRegion(data(), Region.second); - // If element type differs, do bitcast conversion first. - auto Base1 = detail::bitcast(Base); - constexpr int BN1 = PaTy::Size_in_bytes / sizeof(ElemTy); - - if constexpr (!TR::Is_2D) { - // Access the region information. - constexpr int M = TR::Size_x; - constexpr int Stride = TR::Stride_x; - uint16_t Offset = Region.first.M_offset_x * sizeof(ElemTy); - - // Merge and update. - Base1 = __esimd_wrregion(Base1, Val, Offset); - } else { - static_assert(std::is_same::value); - // Read columns with non-trivial horizontal stride. - constexpr int M = TR::length; - constexpr int VS = PaTy::Size_x * TR::Stride_y; - constexpr int W = TR::Size_x; - constexpr int HS = TR::Stride_x; - constexpr int ParentWidth = PaTy::Size_x; - - // Compute the byte offset for the starting element. - uint16_t Offset = static_cast( - (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) * - sizeof(ElemTy)); - - // Merge and update. - Base1 = __esimd_wrregion( - Base1, Val, Offset); - } - // Convert back to the original element type, if needed. - auto Merged1 = detail::bitcast(Base1); - // recursively write it back to the base - writeRegion(Region.second, Merged1); - } + template > + operator bool() { + return base_type::data()[0] != 0; } - /// @name Memory operations - /// TODO NOTE: These APIs do not support cache hint specification yet, as this - /// is WIP. Later addition of hints is not expected to break code using these - /// APIs. - /// - /// @{ - - /// Copy a contiguous block of data from memory into this simd object. - /// The amount of memory copied equals the total size of vector elements in - /// this object. - /// @param addr the memory address to copy from. Must be a pointer to the - /// global address space, otherwise behavior is undefined. - ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION; - - /// Copy a contiguous block of data from memory into this simd object. - /// The amount of memory copied equals the total size of vector elements in - /// this object. - /// Source memory location is represented via a global accessor and offset. - /// @param acc accessor to copy from. - /// @param offset offset to copy from. - template - ESIMD_INLINE - detail::EnableIfAccessor - copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; - - /// Copy all vector elements of this object into a contiguous block in memory. - /// @param addr the memory address to copy to. Must be a pointer to the - /// global address space, otherwise behavior is undefined. - ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION; - - /// Copy all vector elements of this object into a contiguous block in memory. - /// Destination memory location is represented via a global accessor and - /// offset. - /// @param acc accessor to copy from. - /// @param offset offset to copy from. - template - ESIMD_INLINE - detail::EnableIfAccessor - copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; - - /// @} // Memory operations -private: - // The underlying data for this vector. - vector_type M_data; - - void set(const vector_type &Val) { -#ifndef __SYCL_DEVICE_ONLY__ - M_data = Val; -#else - __esimd_vstore(&M_data, Val); -#endif + /// Unary logical negation operator. + simd_mask_impl operator!() { + auto R = base_type::data() == vector_type(0); + return simd_mask_impl{__builtin_convertvector(R, vector_type) & + vector_type(1)}; } }; -template -ESIMD_INLINE simd convert(simd val) { - return __builtin_convertvector(val.data(), detail::vector_type_t); -} - -// ----------- Outlined implementations of esimd class APIs. - -template void simd::copy_from(const T *const Addr) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); - - uintptr_t AddrVal = reinterpret_cast(Addr); - *this = - __esimd_flat_block_read_unaligned( - AddrVal); -} - -template -template -ESIMD_INLINE - detail::EnableIfAccessor - simd::copy_from(AccessorT acc, uint32_t offset) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); -#if defined(__SYCL_DEVICE_ONLY__) - auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); - *this = __esimd_block_read(surf_ind, offset); -#else - *this = __esimd_block_read(acc, offset); -#endif // __SYCL_DEVICE_ONLY__ -} - -template void simd::copy_to(T *addr) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); - - uintptr_t AddrVal = reinterpret_cast(addr); - __esimd_flat_block_write(AddrVal, - data()); -} +} // namespace detail -template -template -ESIMD_INLINE - detail::EnableIfAccessor - simd::copy_to(AccessorT acc, uint32_t offset) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); - -#if defined(__SYCL_DEVICE_ONLY__) - auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); - __esimd_block_write(surf_ind, offset >> 4, data()); -#else - __esimd_block_write(acc, offset >> 4, data()); -#endif // __SYCL_DEVICE_ONLY__ -} +#undef __ESIMD_DEF_RELOP +#undef __ESIMD_DEF_BITWISE_OP } // namespace esimd } // namespace experimental @@ -688,11 +221,12 @@ ESIMD_INLINE } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) -#ifndef __SYCL_DEVICE_ONLY__ template -std::ostream & -operator<<(std::ostream &OS, - const sycl::ext::intel::experimental::esimd::simd &V) { +std::ostream &operator<<(std::ostream &OS, const __SEIEE::simd &V) +#ifdef __SYCL_DEVICE_ONLY__ + ; +#else +{ OS << "{"; for (int I = 0; I < N; I++) { OS << V[I]; @@ -702,5 +236,4 @@ operator<<(std::ostream &OS, OS << "}"; return OS; } - -#endif +#endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp index 51751e3cca65f..5263ab72031fd 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp @@ -11,6 +11,7 @@ #pragma once #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -25,143 +26,74 @@ namespace esimd { /// /// \ingroup sycl_esimd template -class simd_view : public detail::simd_view_impl> { +class simd_view : public detail::simd_view_impl { + template friend class detail::simd_obj_impl; + template friend class detail::simd_mask_impl; + template friend class simd_view; template friend class simd; - template friend class detail::simd_view_impl; + template friend class detail::simd_view_impl; public: - using BaseClass = - detail::simd_view_impl>; + static_assert(detail::is_simd_obj_impl_derivative_v); + using BaseClass = detail::simd_view_impl; + + // Deduce the corresponding value type from its region type. using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; + using base_type = BaseTy; + template + using get_simd_t = typename BaseClass::template get_simd_t; + + /// The region type of this class. + using region_type = RegionTy; + + /// The element type of this class, which could be different from the element + /// type of the base object type. using element_type = typename ShapeTy::element_type; - /// The simd type if reading this simd_view object. - using value_type = simd; + /// The simd type if reading the object. + using value_type = get_simd_t; -private: + /// The underlying builtin value type + using vector_type = detail::vector_type_t; + +protected: + /// @{ + /// Constructors. simd_view(BaseTy &Base, RegionTy Region) : BaseClass(Base, Region) {} simd_view(BaseTy &&Base, RegionTy Region) : BaseClass(Base, Region) {} + /// @} public: // Default copy and move constructors for simd_view. simd_view(const simd_view &Other) = default; simd_view(simd_view &&Other) = default; - /// @{ - /// Assignment operators. - simd_view &operator=(const simd_view &Other) { - *this = Other.read(); - return *this; - } - simd_view &operator=(const value_type &Val) { - this->M_base.writeRegion(this->M_region, Val.data()); - return *this; - } - /// @} - - /// Move assignment operator. - simd_view &operator=(simd_view &&Other) { - *this = Other.read(); - return *this; - } + using BaseClass::operator--; + using BaseClass::operator++; + using BaseClass::operator=; +}; -#define DEF_BINOP(BINOP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BINOP(const simd_view &X, \ - const value_type &Y) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = \ - detail::convert(X.read().data()); \ - auto V1 = detail::convert(Y.data()); \ - auto V2 = V0 BINOP V1; \ - return ComputeTy(V2); \ - } \ - ESIMD_INLINE friend auto operator BINOP(const simd_view &X, \ - const element_type &Y) { \ - return X BINOP(value_type) Y; \ - } \ - ESIMD_INLINE friend auto operator BINOP(const element_type &X, \ - const simd_view &Y) { \ - return (value_type)X BINOP Y; \ - } \ - ESIMD_INLINE friend auto operator BINOP(const value_type &X, \ +#define __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(RELOP) \ + /* simd_view RELOP simd_view */ \ + ESIMD_INLINE friend bool operator RELOP(const simd_view &X, \ const simd_view &Y) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(X.data()); \ - auto V1 = \ - detail::convert(Y.read().data()); \ - auto V2 = V0 BINOP V1; \ - return ComputeTy(V2); \ - } - - DEF_BINOP(+, +=) - DEF_BINOP(-, -=) - DEF_BINOP(*, *=) - DEF_BINOP(/, /=) - DEF_BINOP(%, %=) - -#undef DEF_BINOP - -#define DEF_BITWISE_OP(BITWISE_OP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BITWISE_OP(const simd_view &X, \ - const value_type &Y) { \ - static_assert(std::is_integral(), "not integral type"); \ - auto V2 = X.read().data() BITWISE_OP Y.data(); \ - return simd(V2); \ - } \ - ESIMD_INLINE friend auto operator BITWISE_OP(const simd_view &X, \ - const element_type &Y) { \ - return X BITWISE_OP(value_type) Y; \ - } \ - ESIMD_INLINE friend auto operator BITWISE_OP(const value_type &X, \ - const simd_view &Y) { \ - static_assert(std::is_integral(), "not integral type"); \ - auto V2 = X.data() BITWISE_OP Y.read().data(); \ - return simd(V2); \ - } - - DEF_BITWISE_OP(&, &=) - DEF_BITWISE_OP(|, |=) - DEF_BITWISE_OP(^, ^=) - DEF_BITWISE_OP(>>, >>=) - DEF_BITWISE_OP(<<, <<=) - -#undef DEF_BITWISE_OP - -#define DEF_RELOP(RELOP) \ - ESIMD_INLINE friend simd operator RELOP( \ - const simd_view &X, const value_type &Y) { \ - auto R = X.read().data() RELOP Y.data(); \ - mask_type_t M(1); \ - return M & detail::convert>(R); \ - } \ - ESIMD_INLINE friend simd operator RELOP( \ - const value_type &X, const simd_view &Y) { \ - auto R = X.data() RELOP Y.read().data(); \ - mask_type_t M(1); \ - return M & detail::convert>(R); \ + return (element_type)X RELOP(element_type) Y; \ } \ - ESIMD_INLINE friend simd operator RELOP( \ - const simd_view &X, const element_type &Y) { \ - return X RELOP(value_type) Y; \ + \ + /* simd_view RELOP SCALAR */ \ + template >> \ + ESIMD_INLINE friend bool operator RELOP(const simd_view &X, T1 Y) { \ + return (element_type)X RELOP Y; \ } \ - ESIMD_INLINE friend simd operator RELOP( \ - const simd_view &X, const simd_view &Y) { \ - return (X RELOP Y.read()); \ + \ + /* SCALAR RELOP simd_view */ \ + template >> \ + ESIMD_INLINE friend bool operator RELOP(T1 X, const simd_view &Y) { \ + return X RELOP(element_type) Y; \ } - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP -}; - /// This is a specialization of simd_view class with a single element. /// Objects of such a class are created in the following situation: /// simd v = 1; @@ -174,24 +106,26 @@ class simd_view : public detail::simd_view_impl v[1] && v[2] < 42; /// /// \ingroup sycl_esimd -template -class simd_view> - : public detail::simd_view_impl< - BaseTy, region1d_scalar_t, - simd_view>> { - template friend class simd; - template friend class detail::simd_view_impl; +template +class simd_view> + : public detail::simd_view_impl> { + template friend class detail::simd_obj_impl; + template friend class detail::simd_view_impl; public: - using RegionTy = region1d_scalar_t; - using BaseClass = - detail::simd_view_impl>; + using RegionTy = region1d_scalar_t; + using BaseClass = detail::simd_view_impl; using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; static_assert(1 == length, "length of this view is not equal to 1"); /// The element type of this class, which could be different from the element /// type of the base object type. - using element_type = T; + using element_type = typename ShapeTy::element_type; + using base_type = BaseTy; + template + using get_simd_t = typename BaseClass::template get_simd_t; + /// The simd type if reading the object. + using value_type = get_simd_t; private: simd_view(BaseTy &Base, RegionTy Region) : BaseClass(Base, Region) {} @@ -203,28 +137,16 @@ class simd_view> return v[0]; } + using BaseClass::operator--; + using BaseClass::operator++; using BaseClass::operator=; -#define DEF_RELOP(RELOP) \ - ESIMD_INLINE friend bool operator RELOP(const simd_view &X, \ - const simd_view &Y) { \ - return (element_type)X RELOP(element_type) Y; \ - } \ - template ::value && \ - detail::is_vectorizable_v::value>> \ - ESIMD_INLINE friend bool operator RELOP(const simd_view &X, T1 Y) { \ - return (element_type)X RELOP Y; \ - } - - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(==) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(!=) }; // TODO: remove code duplication in two class specializations for a simd_view @@ -235,23 +157,16 @@ class simd_view> /// simd v = 1; /// auto v1 = v.select<2, 1>(0); /// auto v2 = v1[0]; // simd_view of a nested region for a single element -template -class simd_view, NestedRegion>> +template +class simd_view, NestedRegion>> : public detail::simd_view_impl< - BaseTy, - std::pair, NestedRegion>, - simd_view, - NestedRegion>>> { + BaseTy, std::pair, NestedRegion>> { template friend class simd; template friend class detail::simd_view_impl; public: - using RegionTy = - std::pair, NestedRegion>; - using BaseClass = - detail::simd_view_impl>; + using RegionTy = std::pair, NestedRegion>; + using BaseClass = detail::simd_view_impl; using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; static_assert(1 == length, "length of this view is not equal to 1"); @@ -264,35 +179,23 @@ class simd_view::value && \ - detail::is_vectorizable_v::value>> \ - ESIMD_INLINE friend bool operator RELOP(const simd_view &X, T1 Y) { \ - return (element_type)X RELOP Y; \ - } - - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(==) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(!=) }; +#undef __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP + } // namespace esimd } // namespace experimental } // namespace intel diff --git a/sycl/test/esimd/esimd_math.cpp b/sycl/test/esimd/esimd_math.cpp index 17420b678fd2c..e5b0b5e7d2d3e 100644 --- a/sycl/test/esimd/esimd_math.cpp +++ b/sycl/test/esimd/esimd_math.cpp @@ -8,12 +8,12 @@ using namespace sycl::ext::intel::experimental::esimd; bool test_esimd_mask() __attribute__((sycl_device)) { - simd a(0); + simd_mask<16> a(0); a.select<4, 1>(4) = 1; a.select<4, 1>(12) = 1; unsigned int b = esimd_pack_mask(a); - simd c = esimd_unpack_mask<16>(b); + simd_mask<16> c = esimd_unpack_mask<16>(b); unsigned int d = esimd_pack_mask(c); diff --git a/sycl/test/esimd/intrins_trans.cpp b/sycl/test/esimd/intrins_trans.cpp index 23726115e189e..989ae37b6818b 100644 --- a/sycl/test/esimd/intrins_trans.cpp +++ b/sycl/test/esimd/intrins_trans.cpp @@ -42,7 +42,7 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { simd v1(0, x + z); simd offsets(0, y); simd v_addr(reinterpret_cast(ptr)); - simd pred; + simd_mask pred; v_addr += offsets; __esimd_flat_atomic0( @@ -50,10 +50,10 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) __esimd_flat_atomic1( - v_addr.data(), v1, pred.data()); + v_addr.data(), v1.data(), pred.data()); // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) __esimd_flat_atomic2( - v_addr.data(), v1, v1, pred.data()); + v_addr.data(), v1.data(), v1.data(), pred.data()); // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) uintptr_t addr = reinterpret_cast(ptr); diff --git a/sycl/test/esimd/operators.cpp b/sycl/test/esimd/operators.cpp new file mode 100644 index 0000000000000..6e9334cba2175 --- /dev/null +++ b/sycl/test/esimd/operators.cpp @@ -0,0 +1,514 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +using namespace sycl::ext::intel::experimental::esimd; + +// dummy use of 'v' - storing it to memory +#define USE_v *(decltype(v) *)(++out) = v + +// --- bitwise +template +[[intel::sycl_explicit_simd]] auto +bitwise_op_test_impl(const simd &x, simd &x1, + const simd &y, simd &y1, + const simd_mask<8> &m, simd_mask<8> &m1, + simd *out) { + int a = 1; + + // simd ^ simd + { + auto k = x1 ^= y; + auto v = k ^ y; + USE_v; + } + { + auto v = x ^ y; + USE_v; + } + // simd ^ SCALAR + { + auto k = x1 ^= 5; + auto v = k ^ y; + USE_v; + } + { + auto v = x ^ (T2)5; + USE_v; + } + // SCALAR ^ simd + { + auto v = (T1)5 ^ y; + USE_v; + } + + // mask ^ mask + { + auto k = m1 ^= m; + auto v = k ^ m; + USE_v; + } + { + auto v = m ^ m1; + USE_v; + } + // mask ^ SCALAR + { + auto k = m1 ^= a; + auto v = m ^ k; + USE_v; + } + { + auto v = m ^ 5; + USE_v; + } + // SCALAR ^ mask + { + auto v = 5 ^ m; + USE_v; + } + + // simd_view ^ simd_view + { + simd k = x1.template select<8, 1>() ^= y1.template select<8, 1>(); + auto v = k ^ y; + USE_v; + } + { + simd k = x1.template select<8, 1>().template select<8, 1>() ^= + y1.template select<8, 1>().template select<8, 1>(); + auto v = k ^ y; + USE_v; + } + { + auto v = x1.template select<8, 1>() ^ y1.template select<8, 1>(); + USE_v; + } + { + auto v = x1.template select<8, 1>().template select<8, 1>() ^ + y1.template select<8, 1>().template select<8, 1>(); + USE_v; + } + // simd ^ simd_view + { + auto k = x1 ^= y1.template select<8, 1>(); + auto v = k ^ y; + USE_v; + } + { + auto v = x ^ y1.template select<8, 1>(); + USE_v; + } + // simd_view ^ simd + { + simd k = x1.template select<8, 1>() ^= y; + auto v = k ^ y; + USE_v; + } + { + auto v = x1.template select<8, 1>() ^ y; + USE_v; + } + + // simd_view ^ simd_view + { + simd_mask<8> k = m1.select<8, 1>() ^= m1.select<8, 1>(); + auto v = k ^ m; + USE_v; + } + { + simd_mask<8> k = m1.select<8, 1>().select<8, 1>() ^= + m1.select<8, 1>().select<8, 1>(); + auto v = k ^ m; + USE_v; + } + { + auto v = m1.select<8, 1>() ^ m1.select<8, 1>(); + USE_v; + } + { + auto v = + m1.select<8, 1>().select<8, 1>() ^ m1.select<8, 1>().select<8, 1>(); + USE_v; + } + // simd_mask ^ simd_view + { + auto k = m1 ^= m1.select<8, 1>(); + auto v = k ^ m; + USE_v; + } + { + auto v = m ^ m1.select<8, 1>(); + USE_v; + } + { + auto v = m ^ m1.select<8, 1>().select<8, 1>(); + USE_v; + } + // simd_view ^ simd_mask + { + simd_mask<8> k = m1.select<8, 1>() ^= m; + auto v = k ^ m; + USE_v; + } + { + auto v = m1.select<8, 1>() ^ m; + USE_v; + } + { + auto v = m1.select<8, 1>().select<8, 1>() ^ m; + USE_v; + } + + // simd_view ^ SCALAR + { + simd k = x1.template select<8, 1>() ^= (T2)5; + auto v = k ^ y; + USE_v; + } + { + auto v = x1.template select<8, 1>() ^ (T2)5; + USE_v; + } + { + auto v = x1.template select<8, 1>().template select<8, 1>() ^ (T2)5; + USE_v; + } + // SCALAR ^ simd_view + { + auto v = (T1)5 ^ y1.template select<8, 1>(); + USE_v; + } + { + auto v = (T1)5 ^ y1.template select<8, 1>().template select<8, 1>(); + USE_v; + } + + // simd_view ^ SCALAR + { + simd_mask<8> k = m1.template select<8, 1>() ^= a; + auto v = k ^ m; + USE_v; + } + { + auto v = m1.template select<8, 1>() ^ a; + USE_v; + } + { + auto v = m1.template select<8, 1>().template select<8, 1>() ^ a; + USE_v; + } + // SCALAR ^ simd_view + { + auto v = a ^ y1.template select<8, 1>(); + USE_v; + } + { + auto v = a ^ y1.template select<8, 1>().template select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void bitwise_op_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + const simd_mask<8> m(1); + simd_mask<8> m1(0); + + bitwise_op_test_impl(x, x1, y, y1, m, m1, out); +} + +[[intel::sycl_explicit_simd]] void bitwise_op_tests(simd *out) { + bitwise_op_test(out); + bitwise_op_test(out); + bitwise_op_test(out); +} + +// --- arithmetic +template +[[intel::sycl_explicit_simd]] auto +arith_bin_op_test_impl(const simd &x, simd &x1, + const simd &y, simd &y1, + simd *out) { + // simd * simd + { + auto k = x1 *= y; + auto v = x * k; + USE_v; + } + { + auto v = x * y; + USE_v; + } + // simd * SCALAR + { + auto k = x1 *= (T2)5; + auto v = x * k; + USE_v; + } + { + auto v = x * (T2)5; + USE_v; + } + // SCALAR * simd + { + auto v = (T1)5 * y; + USE_v; + } + + // simd_view * simd_view + { + simd k = x1.template select<8, 1>() *= y1.template select<8, 1>(); + auto v = x1.template select<8, 1>() * k; + USE_v; + } + { + auto v = x1.template select<8, 1>() * y1.template select<8, 1>(); + USE_v; + } + // simd * simd_view + { + auto k = x1 *= y1.template select<8, 1>(); + auto v = x * k; + USE_v; + } + { + auto v = x * y1.template select<8, 1>(); + USE_v; + } + // simd_view * simd + { + simd k = x1.template select<8, 1>() *= y; + auto v = k * y; + USE_v; + } + { + auto v = x1.template select<8, 1>() * y; + USE_v; + } + + // simd_view * SCALAR + { + simd k = x1.template select<8, 1>() *= (T2)5; + auto v = k * (T2)5; + USE_v; + } + { + auto v = x1.template select<8, 1>() * (T2)5; + USE_v; + } + // SCALAR * simd_view + { + auto v = (T1)5 * y1.template select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void arith_bin_op_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + + arith_bin_op_test_impl(x, x1, y, y1, out); +} + +[[intel::sycl_explicit_simd]] void +arith_bin_op_tests(simd *out) { + arith_bin_op_test(out); + arith_bin_op_test(out); + arith_bin_op_test(out); + arith_bin_op_test(out); + arith_bin_op_test(out); +} + +// --- equality comparison + +template +[[intel::sycl_explicit_simd]] auto +equ_cmp_test_impl(const simd &x, simd &x1, const simd &y, + simd &y1, const simd_mask<8> &m, simd_mask<8> &m1, + simd *out) { + // simd == simd + { + auto v = x == y; + USE_v; + } + // simd == SCALAR + { + auto v = x == (T2)5; + USE_v; + } + // SCALAR == simd + { + auto v = (T1)5 == y; + USE_v; + } + + // mask == mask + { + auto v = m == m1; + USE_v; + } + // mask == SCALAR + { + auto v = m == 5; + USE_v; + } + // SCALAR == mask + { + auto v = 5 == m; + USE_v; + } + + // simd_view == simd_view + { + auto v = x1.template select<8, 1>() == y1.template select<8, 1>(); + USE_v; + } + // simd == simd_view + { + auto v = x == y1.template select<8, 1>(); + USE_v; + } + // simd_view == simd + { + auto v = x1.template select<8, 1>() == y; + USE_v; + } + + // simd_view == simd_view + { + auto v = m1.select<8, 1>() == m1.select<8, 1>(); + USE_v; + } + // simd_mask == simd_view + { + auto v = m == m1.select<8, 1>(); + USE_v; + } + // simd_view == simd_mask + { + auto v = m1.select<8, 1>() == m; + USE_v; + } + + // simd_view == SCALAR + { + auto v = x1.template select<8, 1>() == (T2)5; + USE_v; + } + // SCALAR == simd_view + { + auto v = (T1)5 == y1.template select<8, 1>(); + USE_v; + } + + // simd_view == SCALAR + int a = 1; + { + auto v = m1.select<8, 1>() == a; + USE_v; + } + // SCALAR == simd_view + { + auto v = a == m1.select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void equ_cmp_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + const simd_mask<8> m(1); + simd_mask<8> m1(0); + + equ_cmp_test_impl(x, x1, y, y1, m, m1, out); +} + +[[intel::sycl_explicit_simd]] void equ_cmp_tests(simd *out) { + equ_cmp_test(out); + equ_cmp_test(out); + equ_cmp_test(out); + equ_cmp_test(out); + equ_cmp_test(out); +} + +// --- comparison + +template +[[intel::sycl_explicit_simd]] auto +lt_cmp_test_impl(const simd &x, simd &x1, const simd &y, + simd &y1, const simd_mask<8> &m, simd_mask<8> &m1, + simd *out) { + // simd < simd + { + auto v = x < y; + USE_v; + } + // simd < SCALAR + { + auto v = x < (T2)5; + USE_v; + } + // SCALAR < simd + { + auto v = (T1)5 == y; + USE_v; + } + + // simd_view < simd_view + { + auto v = x1.template select<8, 1>() < y1.template select<8, 1>(); + USE_v; + } + // simd < simd_view + { + auto v = x < y1.template select<8, 1>(); + USE_v; + } + // simd_view < simd + { + auto v = x1.template select<8, 1>() < y; + USE_v; + } + + // simd_view < SCALAR + { + auto v = x1.template select<8, 1>() < (T2)5; + USE_v; + } + // SCALAR == simd_view + { + auto v = (T1)5 < y1.template select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void lt_cmp_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + const simd_mask<8> m(1); + simd_mask<8> m1(0); + + lt_cmp_test_impl(x, x1, y, y1, m, m1, out); +} + +[[intel::sycl_explicit_simd]] void lt_cmp_tests(simd *out) { + lt_cmp_test(out); + lt_cmp_test(out); + lt_cmp_test(out); + lt_cmp_test(out); + lt_cmp_test(out); +} diff --git a/sycl/test/esimd/regression/simd_wrapper.cpp b/sycl/test/esimd/regression/simd_wrapper.cpp index 0655e220a6e73..dee22b23fa5b3 100644 --- a/sycl/test/esimd/regression/simd_wrapper.cpp +++ b/sycl/test/esimd/regression/simd_wrapper.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// RUN: %clangxx -fsycl -fsyntax-only -fsycl-device-only -Xclang -verify %s #include #include diff --git a/sycl/test/esimd/simd.cpp b/sycl/test/esimd/simd.cpp index 5b19035211d15..a38b732751312 100644 --- a/sycl/test/esimd/simd.cpp +++ b/sycl/test/esimd/simd.cpp @@ -43,6 +43,7 @@ void test_conversion() SYCL_ESIMD_FUNCTION { simd f = v; simd c = f; simd c1 = f.select<16, 1>(0); + c.select<32, 1>(0) = f; f = v + static_cast>(c); } @@ -64,16 +65,24 @@ bool test_simd_format() SYCL_ESIMD_FUNCTION { (decltype(ref3)::getSizeX() == 4) && (decltype(ref3)::getSizeY() == 8); } -bool test_simd_select() SYCL_ESIMD_FUNCTION { - simd v(0, 1); - auto ref0 = v.select<4, 2>(1); // r{1, 3, 5, 7} - auto ref1 = v.bit_cast_view(); // 0,1,2,3; - // 4,5,6,7; - // 8,9,10,11; - // 12,13,14,15 - auto ref2 = ref1.select<2, 1, 2, 2>(0, 1); - return ref0[0] == 1 && decltype(ref2)::getSizeX() == 2 && - decltype(ref2)::getStrideY() == 1; +bool test_simd_select(int a) SYCL_ESIMD_FUNCTION { + { + simd f = a; + simd c1 = 2; + c1.select<16, 1>(0) = f.select<16, 1>(0); + c1.select<16, 1>(0).select<16, 1>(0) = f.select<16, 1>(0).select<16, 1>(0); + } + { + simd v(0, 1); + auto ref0 = v.select<4, 2>(1); // r{1, 3, 5, 7} + auto ref1 = v.bit_cast_view(); // 0,1,2,3; + // 4,5,6,7; + // 8,9,10,11; + // 12,13,14,15 + auto ref2 = ref1.select<2, 1, 2, 2>(0, 1); + return ref0[0] == 1 && decltype(ref2)::getSizeX() == 2 && + decltype(ref2)::getStrideY() == 1; + } } bool test_2d_offset() SYCL_ESIMD_FUNCTION { @@ -111,7 +120,6 @@ bool test_simd_unary_ops() SYCL_ESIMD_FUNCTION { v0 <<= v1; v1 = -v0; v0 = ~v1; - v1 = !v0; return v1[0] == 1; } @@ -259,7 +267,7 @@ bool test_simd_iselect() SYCL_ESIMD_FUNCTION { simd a(0, 2); auto data = v.iselect(a); data += 16; - v.iupdate(a, data, 1); + v.iupdate(a, data, simd_mask<8>(1)); auto ref = v.select<8, 2>(0); return ref[0] == 16 && ref[14] == 32; } diff --git a/sycl/test/esimd/simd_mask.cpp b/sycl/test/esimd/simd_mask.cpp new file mode 100644 index 0000000000000..be1d36af6b646 --- /dev/null +++ b/sycl/test/esimd/simd_mask.cpp @@ -0,0 +1,71 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s +// RUN: %clangxx -fsycl -fsyntax-only %s + +// This test checks that both host and device compilers can +// successfully compile simd_mask APIs. + +#include +#include +#include +#include + +using namespace sycl::ext::intel::experimental::esimd; +using namespace cl::sycl; + +#define DEFINE_BIN_OP_TEST(op, name) \ + template \ + SYCL_EXTERNAL SYCL_ESIMD_FUNCTION simd_mask test_impl_##name( \ + simd_mask &m1, simd_mask &m2) { \ + return m1 op m2; \ + } \ + \ + simd_mask<1> test_impl_1_##name(simd_mask<1> &m1, simd_mask<1> &m2) { \ + return test_impl_##name(m1, m2); \ + } \ + \ + simd_mask<17> test_impl_17_##name(simd_mask<17> &m1, simd_mask<17> &m2) { \ + return test_impl_##name(m1, m2); \ + } \ + \ + simd_mask<32> test_impl_32_##name(simd_mask<32> &m1, simd_mask<32> &m2) { \ + return test_impl_##name(m1, m2); \ + } + +DEFINE_BIN_OP_TEST(&&, and) +DEFINE_BIN_OP_TEST(||, or) +DEFINE_BIN_OP_TEST(&, bit_and) +DEFINE_BIN_OP_TEST(|, bit_or) +DEFINE_BIN_OP_TEST(^, xor) +DEFINE_BIN_OP_TEST(==, eq) +DEFINE_BIN_OP_TEST(!=, ne) +DEFINE_BIN_OP_TEST(&=, bit_and_eq) +DEFINE_BIN_OP_TEST(|=, bit_or_eq) +DEFINE_BIN_OP_TEST(^=, xor_eq) + +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION simd_mask<8> misc_tests(bool val) { + simd_mask<8> m1(val); // broadcast constructor + simd_mask<8> m2; // default constructor + simd_mask<8> m3(m1[4]); // operator[] + simd_mask<8> m4 = !m3; // operator! + static_assert(m4.length == 8, "size() failed"); + simd ch1(1); + simd ch2(2); + simd_mask<8> m5 = ch1 > ch2; + return m5; +} + +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void compat_test(float *ptr) { + simd pred(1); + simd offsets; + + // expected-warning@+1 {{deprecated}} + auto x1 = gather(ptr, offsets, pred); + // expected-warning@+1 {{deprecated}} + auto x2 = gather(ptr, offsets, simd{}); + simd_mask<16> m1(0); + // expected-warning@+1 {{deprecated}} + m1 = pred; + simd_mask<16> m2(0); + // expected-warning@+1 {{deprecated}} + m2 = std::move(pred); +} diff --git a/sycl/test/esimd/simd_merge.cpp b/sycl/test/esimd/simd_merge.cpp index 5fed799cf90c8..bd52b914df74c 100644 --- a/sycl/test/esimd/simd_merge.cpp +++ b/sycl/test/esimd/simd_merge.cpp @@ -10,7 +10,7 @@ using namespace sycl::ext::intel::experimental::esimd; bool test_simd_merge1() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; v0.merge(v1, mask); return v0[0] == 2 && v0[4] == 2 && v0[8] == 2 && v0[12] == 2; @@ -20,7 +20,7 @@ bool test_simd_merge2() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; simd v2 = 3; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; v0.merge(v1, v2, (v1 < v2) & mask); return v0[0] == 2 && v0[4] == 2 && v0[8] == 2 && v0[12] == 2 && v0[3] == 3 && @@ -30,7 +30,7 @@ bool test_simd_merge2() __attribute__((sycl_device)) { bool test_simd_merge2d1() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; auto v0_2d = v0.bit_cast_view(); v0_2d.merge(v1, mask); @@ -41,7 +41,7 @@ bool test_simd_merge2d2() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; simd v2 = 3; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; auto v0_2d = v0.bit_cast_view(); v0_2d.merge(v1, v2, mask); diff --git a/sycl/test/esimd/simd_replicate_deprecated.cpp b/sycl/test/esimd/simd_replicate_deprecated.cpp index 52adc19670b16..07d6d05b54b30 100644 --- a/sycl/test/esimd/simd_replicate_deprecated.cpp +++ b/sycl/test/esimd/simd_replicate_deprecated.cpp @@ -9,8 +9,8 @@ using namespace sycl::ext::intel::experimental::esimd; bool test_replicate1() { simd v0(0, 1); // expected-warning@+3 2 {{deprecated}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{has been explicitly marked deprecated here}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{has been explicitly marked deprecated here}} auto v0_rep = v0.replicate<4, 2>(2); return v0[2] == v0_rep[2] && v0[3] == v0_rep[5]; @@ -19,8 +19,8 @@ bool test_replicate1() { bool test_replicate2() { simd v0(0, 1); // expected-warning@+3 2 {{deprecated}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{has been explicitly marked deprecated here}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{has been explicitly marked deprecated here}} auto v0_rep = v0.replicate<2, 4, 2>(1); return v0_rep[0] == v0[1] && v0_rep[1] == v0[2] && v0_rep[2] == v0[5]; @@ -29,8 +29,8 @@ bool test_replicate2() { bool test_replicate3() { simd v0(0, 1); // expected-warning@+3 2 {{deprecated}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{has been explicitly marked deprecated here}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{has been explicitly marked deprecated here}} auto v0_rep = v0.replicate<2, 4, 2, 2>(1); return v0_rep[0] == v0[1] && v0_rep[1] == v0[3] && v0_rep[2] == v0[5]; diff --git a/sycl/test/esimd/simd_view.cpp b/sycl/test/esimd/simd_view.cpp index ffdb3b6f4f791..e107ca45b65f7 100644 --- a/sycl/test/esimd/simd_view.cpp +++ b/sycl/test/esimd/simd_view.cpp @@ -20,8 +20,28 @@ SYCL_ESIMD_FUNCTION bool test_simd_view_bin_ops() { ref0 *= 2; ref0 /= ref1; ref0 /= 2; - return v0[0] == 1; -} + if (v0[0] == 1) + return ref0 + (short)3; + else + return ref0 + ref1; +} + +// auto test_simd_view_bitwise_ops() __attribute__((sycl_device)) { +// simd v0 = 1; +// simd v1 = 2; +// auto ref0 = v0.select<8, 2>(0); +// auto ref1 = v1.select<8, 2>(0); +// simd v2 = (ref0 | ref1) & (ref0 | 3); +// ref0 |= 3; +// ref0 |= ref1; +// simd v3 = (ref0 ^ ref1) & (ref0 ^ 3); +// ref0 ^= 3; +// ref0 ^= ref1; +// simd v4 = (ref0 & ref1) | (ref0 & 3); +// ref0 &= 3; +// ref0 &= ref1; +// return ref0; +// } SYCL_ESIMD_FUNCTION bool test_simd_view_unary_ops() { simd v0 = 1; @@ -54,10 +74,10 @@ SYCL_ESIMD_FUNCTION bool test_simd_view_assign3() { simd v1 = 1; auto mask = (v0.select<16, 1>(0) > v1.select<16, 1>(0)); auto mask2 = (v0 > v1); - simd s = 0; + simd_mask<64> s = 0; auto g4 = s.bit_cast_view(); - simd val = (g4.row(2) & mask); - simd val1 = + simd_mask<16> val = (g4.row(2) & mask); + simd_mask<16> val1 = (g4.row(2) & mask2.bit_cast_view().row(0)); return val[0] == 0 && val1[0] == 0; } From 841ea9cdc33b53a0683d43692e449342dd7460e5 Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Thu, 16 Sep 2021 18:03:47 -0700 Subject: [PATCH 2/8] Fix test failures, address review comments. Signed-off-by: kbobrovs --- .../experimental/esimd/detail/operators.hpp | 42 ++++++++++++------- .../esimd/detail/simd_obj_impl.hpp | 3 +- .../esimd/detail/simd_view_impl.hpp | 20 +++++---- .../intel/experimental/esimd/detail/types.hpp | 7 +++- .../intel/experimental/esimd/simd_view.hpp | 7 +++- sycl/test/esimd/esimd-util-compiler-eval.cpp | 13 +++--- sycl/test/esimd/lane_id.cpp | 2 +- sycl/test/esimd/simd_subscript.cpp | 4 +- sycl/test/esimd/simd_view.cpp | 25 ++++++----- sycl/test/esimd/simd_view_ret_warn.cpp | 2 +- 10 files changed, 74 insertions(+), 51 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp index 1f332a1b46220..039d27ca95874 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp @@ -235,16 +235,21 @@ namespace __SEIEE { template ::element_type, \ class T2 = typename __SEIEE::shape_type::element_type, \ + auto N1 = __SEIEE::shape_type::length, \ + auto N2 = __SEIEE::shape_type::length, \ class = \ std::enable_if_t<__SEIEED::is_simd_type_v == \ __SEIEED::is_simd_type_v && \ - (__SEIEE::shape_type::length == \ - __SEIEE::shape_type::length) && \ - COND>> \ + (N1 == N2 || N1 == 1 || N2 == 1) && COND>> \ inline auto operator BINOP( \ const __SEIEE::simd_view &LHS, \ const __SEIEE::simd_view &RHS) { \ - return LHS.read() BINOP RHS.read(); \ + if constexpr (N1 == 1) \ + return (T1)LHS.read()[0] BINOP RHS.read(); \ + else if constexpr (N2 == 1) \ + return LHS.read() BINOP(T2) RHS.read()[0]; \ + else \ + return LHS.read() BINOP RHS.read(); \ } \ \ /* simd* BINOP simd_view */ \ @@ -337,20 +342,27 @@ __ESIMD_DEF_SIMD_VIEW_BIN_OP(||, __SEIEED::is_simd_mask_type_v) \ /* simd_view CMPOP simd_view */ \ template == \ - __SEIEED::is_simd_type_v< \ - SimdT2>)&&/* the length of the views \ - must match as well: */ \ - (__SEIEE::shape_type::length == \ - __SEIEE::shape_type::length) && \ - COND>> \ + auto N1 = __SEIEE::shape_type::length, \ + auto N2 = __SEIEE::shape_type::length, \ + class = std::enable_if_t == \ + __SEIEED::is_simd_type_v< \ + SimdT2>)&&/* the length of the views \ + must match as well: */ \ + (N1 == N2 || N1 == 1 || N2 == 1) && \ + COND>> \ inline auto operator CMPOP( \ const __SEIEE::simd_view &LHS, \ const __SEIEE::simd_view &RHS) { \ - return LHS.read() CMPOP RHS.read(); \ + using T1 = typename __SEIEE::shape_type::element_type; \ + using T2 = typename __SEIEE::shape_type::element_type; \ + if constexpr (N1 == 1) \ + return (T1)LHS.read()[0] CMPOP RHS.read(); \ + else if constexpr (N2 == 1) \ + return LHS.read() CMPOP(T2) RHS.read()[0]; \ + else \ + return LHS.read() CMPOP RHS.read(); \ } \ \ /* simd_view CMPOP simd_obj_impl */ \ diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp index fe65f64dff832..9f86353f14830 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp @@ -1,5 +1,4 @@ -//==------------ - simd_obj_impl.hpp - DPC++ Explicit SIMD API -//--------------------==// +//==------------ - simd_obj_impl.hpp - DPC++ Explicit SIMD API -------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp index da1f3f5d901f5..a64d9cd8e13fc 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp @@ -286,12 +286,16 @@ template class simd_view_impl { /// @{ /// Assignment operators. - Derived &operator=(const Derived &Other) { return write(Other.read()); } + simd_view_impl &operator=(const simd_view_impl &Other) { + return write(Other.read()); + } Derived &operator=(const value_type &Val) { return write(Val); } /// Move assignment operator. - Derived &operator=(Derived &&Other) { return write(Other.read()); } + simd_view_impl &operator=(simd_view_impl &&Other) { + return write(Other.read()); + } template == @@ -342,7 +346,7 @@ template class simd_view_impl { template > auto row(int i) { - return select<1, 0, getSizeX(), 1>(i, 0) + return select<1, 1, getSizeX(), 1>(i, 0) .template bit_cast_view(); } @@ -351,7 +355,7 @@ template class simd_view_impl { template > auto column(int i) { - return select(0, i); + return select(0, i); } /// Read a single element from a 1D region, by value only. @@ -375,7 +379,7 @@ template class simd_view_impl { template > auto operator[](int i) { - return select<1, 0>(i); + return select<1, 1>(i); } /// Return a writeable view of a single element. @@ -383,7 +387,7 @@ template class simd_view_impl { typename = sycl::detail::enable_if_t> __SYCL_DEPRECATED("use operator[] form.") auto operator()(int i) { - return select<1, 0>(i); + return select<1, 1>(i); } /// \name Replicate @@ -402,7 +406,7 @@ template class simd_view_impl { /// \return replicated simd instance. template get_simd_t replicate(uint16_t OffsetX) { - return replicate(0, OffsetX); + return replicate(0, OffsetX); } /// \tparam Rep is number of times region has to be replicated. @@ -413,7 +417,7 @@ template class simd_view_impl { template get_simd_t replicate(uint16_t OffsetY, uint16_t OffsetX) { - return replicate(OffsetY, OffsetX); + return replicate(OffsetY, OffsetX); } /// \tparam Rep is number of times region has to be replicated. diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp index d5096e5f00e24..2746886c8253b 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp @@ -329,13 +329,16 @@ static inline constexpr bool is_simd_mask_type_v = is_simd_mask_type::value; // @{ // Checks if given type is a view of the simd type. -template struct is_simd_view_type : std::false_type {}; +template struct is_simd_view_type_impl : std::false_type {}; template -struct is_simd_view_type> +struct is_simd_view_type_impl> : std::conditional_t, std::true_type, std::false_type> {}; +template +struct is_simd_view_type : is_simd_view_type_impl> {}; + template static inline constexpr bool is_simd_view_type_v = is_simd_view_type::value; // @} diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp index 5263ab72031fd..a6d18fcde31a4 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp @@ -70,6 +70,11 @@ class simd_view : public detail::simd_view_impl { simd_view(const simd_view &Other) = default; simd_view(simd_view &&Other) = default; + simd_view &operator=(const simd_view &Other) { + BaseClass::operator=(Other); + return *this; + } + using BaseClass::operator--; using BaseClass::operator++; using BaseClass::operator=; @@ -162,7 +167,7 @@ class simd_view, NestedRegion>> : public detail::simd_view_impl< BaseTy, std::pair, NestedRegion>> { template friend class simd; - template friend class detail::simd_view_impl; + template friend class detail::simd_view_impl; public: using RegionTy = std::pair, NestedRegion>; diff --git a/sycl/test/esimd/esimd-util-compiler-eval.cpp b/sycl/test/esimd/esimd-util-compiler-eval.cpp index ed03ea3ce81d1..52cac182d32b3 100644 --- a/sycl/test/esimd/esimd-util-compiler-eval.cpp +++ b/sycl/test/esimd/esimd-util-compiler-eval.cpp @@ -19,11 +19,8 @@ static_assert(log2<1024 * 1024>() == 20, ""); using BaseTy = simd; using RegionTy = region1d_t; -using RegionTy1 = region1d_scalar_t; -static_assert( - !is_simd_view_v< - simd_view_impl>>::value, - ""); -static_assert(is_simd_view_v>::value, ""); -static_assert(is_simd_view_v>::value, ""); -static_assert(!is_simd_view_v::value, ""); +using RegionTy1 = region1d_scalar_t; +static_assert(!is_simd_view_type_v>, ""); +static_assert(is_simd_view_type_v>, ""); +static_assert(is_simd_view_type_v>, ""); +static_assert(!is_simd_view_type_v, ""); diff --git a/sycl/test/esimd/lane_id.cpp b/sycl/test/esimd/lane_id.cpp index df6031ac1810f..0f25e7cc4c6e6 100644 --- a/sycl/test/esimd/lane_id.cpp +++ b/sycl/test/esimd/lane_id.cpp @@ -26,7 +26,7 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo(int x) { SIMT_BEGIN(16, lane) //CHECK: define internal spir_func void @_ZZ3fooiENKUlvE_clEv({{.*}}) {{.*}} #[[ATTR:[0-9]+]] //CHECK: %{{[0-9a-zA-Z_.]+}} = tail call spir_func i32 @_Z15__esimd_lane_idv() - v.select<1, 0>(lane) = x++; + v.select<1, 1>(lane) = x++; SIMT_END return v; } diff --git a/sycl/test/esimd/simd_subscript.cpp b/sycl/test/esimd/simd_subscript.cpp index 5e1ae1b1f56fa..bf41f2d8b895b 100644 --- a/sycl/test/esimd/simd_subscript.cpp +++ b/sycl/test/esimd/simd_subscript.cpp @@ -65,7 +65,7 @@ void test_simd_writable_subscript() SYCL_ESIMD_FUNCTION { v[1] = 0; // returns simd_view // CHECK: simd_subscript.cpp:69{{.*}}warning: {{.*}} deprecated - // CHECK: sycl/ext/intel/experimental/esimd/simd.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here + // CHECK: sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here v(1) = 0; } @@ -76,7 +76,7 @@ void test_simd_const_subscript() SYCL_ESIMD_FUNCTION { cv[1] = 0; // CHECK: simd_subscript.cpp:80{{.*}}warning: {{.*}} deprecated - // CHECK: sycl/ext/intel/experimental/esimd/simd.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here + // CHECK: sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here int val3 = cv(0); } diff --git a/sycl/test/esimd/simd_view.cpp b/sycl/test/esimd/simd_view.cpp index e107ca45b65f7..56436312db5f3 100644 --- a/sycl/test/esimd/simd_view.cpp +++ b/sycl/test/esimd/simd_view.cpp @@ -6,7 +6,7 @@ using namespace sycl::ext::intel::experimental::esimd; -SYCL_ESIMD_FUNCTION bool test_simd_view_bin_ops() { +SYCL_ESIMD_FUNCTION auto test_simd_view_bin_ops() { simd v0 = 1; simd v1 = 2; auto ref0 = v0.select<8, 2>(0); @@ -51,7 +51,7 @@ SYCL_ESIMD_FUNCTION bool test_simd_view_unary_ops() { ref0 <<= ref1; ref1 = -ref0; ref0 = ~ref1; - auto mask = !ref0; + auto mask = !(ref0 < ref1); return v1[0] == 1; } @@ -143,21 +143,21 @@ void test_simd_view_impl_api_ret_types() SYCL_ESIMD_FUNCTION { simd x = 0; auto v1 = x.select<2, 1>(0); // simd_view, region1d_t> - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v2 = v1.select<1, 1>( 0); // simd_view, std::pair, region_base>> - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v2_int = v2.bit_cast_view(); - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v2_int_2D = v2.bit_cast_view(); - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v3 = x.select<2, 1>(2); auto &v4 = (v1 += v3); - static_assert(detail::is_simd_view_v::value, ""); - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); + static_assert(detail::is_simd_view_type_v, ""); } void test_simd_view_subscript() SYCL_ESIMD_FUNCTION { @@ -189,9 +189,12 @@ void test_simd_view_writeable_subscript() SYCL_ESIMD_FUNCTION { void test_simd_view_binop_with_conv_to_scalar() SYCL_ESIMD_FUNCTION { simd s = 0; auto g = s.bit_cast_view(); - auto x = g.row(1) - (g.row(1))[0]; // binary op - auto y = g.row(1) & (g.row(1))[0]; // bitwise op - auto z = g.row(1) < (g.row(1))[0]; // relational op + auto x1 = g.row(1) - (g.row(1))[0]; // binary op + auto x2 = (g.row(1))[0] - g.row(1); // binary op + auto y1 = g.row(1) & (g.row(1))[0]; // bitwise op + auto y2 = (g.row(1))[0] & g.row(1); // bitwise op + auto z1 = g.row(1) < (g.row(1))[0]; // relational op + auto z2 = (g.row(1))[0] < g.row(1); // relational op } // This code is OK. The result of bit_cast_view should be mapped diff --git a/sycl/test/esimd/simd_view_ret_warn.cpp b/sycl/test/esimd/simd_view_ret_warn.cpp index 072c6299331ba..97324bc7bbecf 100644 --- a/sycl/test/esimd/simd_view_ret_warn.cpp +++ b/sycl/test/esimd/simd_view_ret_warn.cpp @@ -7,7 +7,7 @@ using namespace sycl::ext::intel::experimental::esimd; // and it should be programmers fault, similar to string_view. // However, sometimes we could return simd_view from a function // implicitly. This test checks that users will see a warning in such situation. -simd_view, region1d_t> f1(simd x) { +simd_view, region1d_t> f1(simd x) { // expected-warning@+1 {{address of stack memory associated with parameter 'x' returned}} return x[0]; } From eb578e522b93880ad98fc3e5e2bd252ae7eb97f2 Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Sat, 18 Sep 2021 14:04:32 -0700 Subject: [PATCH 3/8] Signed-off-by: kbobrovs Fix test failures: - fix vertical stride in replicate - allow unary logical negation for simd with integer element type - use data() to access M_data in non-constructors --- .../experimental/esimd/detail/simd_obj_impl.hpp | 14 +++++++++++--- .../experimental/esimd/detail/simd_view_impl.hpp | 16 +++++++++++----- .../sycl/ext/intel/experimental/esimd/simd.hpp | 7 ------- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp index 9f86353f14830..70f488fa80146 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp @@ -534,9 +534,17 @@ template class simd_obj_impl { /// @} // Memory operations /// Bitwise inversion, available in all subclasses. - template Derived operator~() { - static_assert(std::is_integral_v, "'~' applies only to integral types"); - return Derived(~M_data); + template >> + Derived operator~() { + return Derived(~data()); + } + + /// Unary logical negation operator, available in all subclasses. + template >> + simd_mask operator!() { + using MaskVecT = typename simd_mask::vector_type; + auto R = data() == vector_type(0); + return simd_mask{__builtin_convertvector(R, MaskVecT) & MaskVecT(1)}; } #define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \ diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp index a64d9cd8e13fc..af498e887952d 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp @@ -277,11 +277,14 @@ template class simd_view_impl { #undef __ESIMD_DEF_UNARY_OP - template >> + /// Unary logical negeation operator. Applies only to integer element types. + template >> auto operator!() { + using MaskVecT = typename simd_mask_type::vector_type; auto V = read().data() == 0; - return get_simd_t(V); + return simd_mask_type{__builtin_convertvector(V, MaskVecT) & + MaskVecT(1)}; } /// @{ @@ -290,9 +293,12 @@ template class simd_view_impl { return write(Other.read()); } + Derived &operator=(const Derived &Other) { return write(Other.read()); } + Derived &operator=(const value_type &Val) { return write(Val); } /// Move assignment operator. + Derived &operator=(Derived &&Other) { return write(Other.read()); } simd_view_impl &operator=(simd_view_impl &&Other) { return write(Other.read()); } @@ -406,7 +412,7 @@ template class simd_view_impl { /// \return replicated simd instance. template get_simd_t replicate(uint16_t OffsetX) { - return replicate(0, OffsetX); + return replicate(0, OffsetX); } /// \tparam Rep is number of times region has to be replicated. @@ -417,7 +423,7 @@ template class simd_view_impl { template get_simd_t replicate(uint16_t OffsetY, uint16_t OffsetX) { - return replicate(OffsetY, OffsetX); + return replicate(OffsetY, OffsetX); } /// \tparam Rep is number of times region has to be replicated. diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp index 70065d7142e58..b85883fe36a52 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp @@ -200,13 +200,6 @@ class simd_mask_impl operator bool() { return base_type::data()[0] != 0; } - - /// Unary logical negation operator. - simd_mask_impl operator!() { - auto R = base_type::data() == vector_type(0); - return simd_mask_impl{__builtin_convertvector(R, vector_type) & - vector_type(1)}; - } }; } // namespace detail From b01eb88becc2a8fc9a53924382ee3cbbdd2d7a98 Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Sun, 19 Sep 2021 16:52:52 -0700 Subject: [PATCH 4/8] Remove redundant code; add more test cases for simd_view. Signed-off-by: kbobrovs --- .../esimd/detail/simd_view_impl.hpp | 12 +- sycl/test/esimd/simd_view.cpp | 111 +++++++++++++++--- 2 files changed, 98 insertions(+), 25 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp index af498e887952d..72f9b9381ad70 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp @@ -311,15 +311,6 @@ template class simd_view_impl { return write(convert(reinterpret_cast(Other))); } - template == is_simd_type_v)&&( - __SEIEE::shape_type::length == - __SEIEE::shape_type::length)>> - Derived &operator=(const simd_view &Other) { - return write(convert(Other.read())); - } - template >> Derived &operator=(T1 RHS) { return write(value_type((element_type)RHS)); @@ -332,15 +323,18 @@ template class simd_view_impl { *this += 1; return cast_this_to_derived(); } + value_type operator++(int) { value_type Ret(read()); operator++(); return Ret; } + Derived &operator--() { *this -= 1; return cast_this_to_derived(); } + value_type operator--(int) { value_type Ret(read()); operator--(); diff --git a/sycl/test/esimd/simd_view.cpp b/sycl/test/esimd/simd_view.cpp index 56436312db5f3..d7cbb2ed2c35a 100644 --- a/sycl/test/esimd/simd_view.cpp +++ b/sycl/test/esimd/simd_view.cpp @@ -26,22 +26,39 @@ SYCL_ESIMD_FUNCTION auto test_simd_view_bin_ops() { return ref0 + ref1; } -// auto test_simd_view_bitwise_ops() __attribute__((sycl_device)) { -// simd v0 = 1; -// simd v1 = 2; -// auto ref0 = v0.select<8, 2>(0); -// auto ref1 = v1.select<8, 2>(0); -// simd v2 = (ref0 | ref1) & (ref0 | 3); -// ref0 |= 3; -// ref0 |= ref1; -// simd v3 = (ref0 ^ ref1) & (ref0 ^ 3); -// ref0 ^= 3; -// ref0 ^= ref1; -// simd v4 = (ref0 & ref1) | (ref0 & 3); -// ref0 &= 3; -// ref0 &= ref1; -// return ref0; -// } +SYCL_ESIMD_FUNCTION auto test_simd_view_bitwise_ops() { + simd v0 = 1; + simd v1 = 2; + auto ref0 = v0.select<8, 2>(0); + auto ref1 = v1.select<8, 2>(0); + simd v2 = (ref0 | ref1) & (ref0 | 3); + ref0 |= 3; + ref0 |= ref1; + simd v3 = (ref0 ^ ref1) & (ref0 ^ 3); + ref0 ^= 3; + ref0 ^= ref1; + simd v4 = (ref0 & ref1) | (ref0 & 3); + ref0 &= 3; + ref0 &= ref1; + return ref0; +} + +SYCL_ESIMD_FUNCTION auto test_simd_mask_view_bitwise_ops() { + simd_mask<16> v0 = 1; + simd_mask<16> v1 = 2; + auto ref0 = v0.select<8, 2>(0); + auto ref1 = v1.select<8, 2>(0); + simd_mask<8> v2 = (ref0 | ref1) & (ref0 | 3); + ref0 |= 3; + ref0 |= ref1; + simd_mask<8> v3 = (ref0 ^ ref1) & (ref0 ^ 3); + ref0 ^= 3; + ref0 ^= ref1; + simd_mask<8> v4 = (ref0 & ref1) | (ref0 & 3); + ref0 &= 3; + ref0 &= ref1; + return ref0; +} SYCL_ESIMD_FUNCTION bool test_simd_view_unary_ops() { simd v0 = 1; @@ -224,3 +241,65 @@ void test_simd_view_len1_binop() SYCL_ESIMD_FUNCTION { auto v2 = s.select<2, 1>(0); auto x = v1 * v2; } + +void test_simd_view_assign_op() SYCL_ESIMD_FUNCTION { + // multiple elements + { +#define N 4 + // simd - assign views of different element type + simd v1 = 0; + simd v2 = 0; + // - region is a region type (top-level region) + v1.select(0) = v2.select(0); + v2.select(0) = v1.select(0); + // - region is a std::pair (nested region) + v1.select<8, 2>(0).select(1) = v2.select<8, 2>(0).select(1); + v2.select<8, 2>(0).select(1) = v1.select<8, 2>(0).select(1); + // - first region is top-level, second - nested + v1.select<4, 2>(0) = v2.select<8, 2>(0).select<4, 1>(1); + // - first region is nested, second - top-level + v2.select<8, 2>(0).select<4, 1>(1) = v1.select<4, 2>(0); + + // simd_mask + simd_mask<32> m1 = 0; + simd_mask<16> m2 = 0; + // - region is a region type (top-level region) + m1.select<4, 2>(0) = m2.select<4, 2>(0); + m2.select<4, 2>(0) = m1.select<4, 2>(0); + // - region is a std::pair (nested region) + m1.select<8, 2>(0).select(1) = m2.select<8, 2>(0).select(1); + m2.select<8, 2>(0).select(1) = m1.select<8, 2>(0).select(1); + // - first region is top-level, second - nested + m1.select<4, 2>(0) = m2.select<8, 2>(0).select<4, 1>(1); + // - first region is nested, second - top-level + m2.select<8, 2>(0).select<4, 1>(1) = m1.select<4, 2>(0); +#undef N + } + // single element + { +#define N 1 + // simd - assign views of different element type + simd v1 = 0; + simd v2 = 0; + // - region is a region type (top-level region) + v1.select(0) = v2.select(0); + v2[0] = v1[0]; + v2[1] = v1.select(1); + // - region is a std::pair (nested region) + v1.select<4, 2>(0).select(1) = v2.select<4, 2>(0).select(1); + v2.select<4, 2>(0).select(1) = v1.select<4, 2>(0).select(1); + + // simd_mask + simd_mask<16> m1 = 0; + simd_mask<8> m2 = 0; + // - region is a region type (top-level region) + m1.select(0) = m2.select(0); + m2[0] = m1[0]; + m2[1] = m1.select(1); + // - region is a std::pair (nested region) + m1.select<4, 2>(0).select(1) = m2.select<4, 2>(0).select(1); + m2.select<4, 2>(0)[1] = m1.select<4, 2>(0)[1]; + m2.select<4, 2>(0)[2] = m1.select<4, 2>(0).select(2); +#undef N + } +} From ce590380600015c62d0f3741f0bb9d06ca828d0a Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Mon, 20 Sep 2021 18:02:29 -0700 Subject: [PATCH 5/8] Use simd<...>(val) instead of simd<...>{val} to avoid costly init list ctor. Signed-off-by: kbobrovs --- .../sycl/ext/intel/experimental/esimd/math.hpp | 12 ++++++------ .../sycl/ext/intel/experimental/esimd/memory.hpp | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp index dc2fba53bba9e..c4b42655d7456 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp @@ -693,17 +693,17 @@ esimd_max(simd src0, simd src1, int flag = saturation_off) { if constexpr (std::is_floating_point::value) { auto Result = __esimd_fmax(src0.data(), src1.data()); Result = (flag == saturation_off) ? Result : __esimd_satf(Result); - return simd{Result}; + return simd(Result); } else if constexpr (std::is_unsigned::value) { auto Result = __esimd_umax(src0.data(), src1.data()); Result = (flag == saturation_off) ? Result : __esimd_uutrunc_sat(Result); - return simd{Result}; + return simd(Result); } else { auto Result = __esimd_smax(src0.data(), src1.data()); Result = (flag == saturation_off) ? Result : __esimd_sstrunc_sat(Result); - return simd{Result}; + return simd(Result); } } @@ -780,17 +780,17 @@ esimd_min(simd src0, simd src1, int flag = saturation_off) { if constexpr (std::is_floating_point::value) { auto Result = __esimd_fmin(src0.data(), src1.data()); Result = (flag == saturation_off) ? Result : __esimd_satf(Result); - return simd{Result}; + return simd(Result); } else if constexpr (std::is_unsigned::value) { auto Result = __esimd_umin(src0.data(), src1.data()); Result = (flag == saturation_off) ? Result : __esimd_uutrunc_sat(Result); - return simd{Result}; + return simd(Result); } else { auto Result = __esimd_smin(src0.data(), src1.data()); Result = (flag == saturation_off) ? Result : __esimd_sstrunc_sat(Result); - return simd{Result}; + return simd(Result); } } diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 606844a790027..96b6e7bbfbcdb 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -317,7 +317,7 @@ ESIMD_INLINE ESIMD_NODEBUG !std::is_pointer::value, void> scatter(AccessorTy acc, simd vals, simd offsets, - uint32_t glob_offset = 0, simd_mask pred = simd_mask{1}) { + uint32_t glob_offset = 0, simd_mask pred = simd_mask(1)) { constexpr int TypeSizeLog2 = detail::ElemsPerAddrEncoding(); // TODO (performance) use hardware-supported scale once BE supports it @@ -362,7 +362,7 @@ ESIMD_INLINE ESIMD_NODEBUG template ESIMD_INLINE ESIMD_NODEBUG T scalar_load(AccessorTy acc, uint32_t offset) { - const simd Res = gather(acc, simd{offset}); + const simd Res = gather(acc, simd(offset)); return Res[0]; } @@ -372,7 +372,7 @@ template ESIMD_INLINE ESIMD_NODEBUG void scalar_store(AccessorTy acc, uint32_t offset, T val) { - scatter(acc, simd{val}, simd{offset}); + scatter(acc, simd(val), simd(offset)); } /// Gathering read for the given starting pointer \p p and \p offsets. From a732cc8da8a98aac670d598def6ef63a4eae1598 Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Tue, 21 Sep 2021 10:53:22 -0700 Subject: [PATCH 6/8] moved simd_mask_impl to a separate header. Signed-off-by: kbobrovs --- .../esimd/detail/simd_mask_impl.hpp | 111 ++++++++++++++++++ .../ext/intel/experimental/esimd/simd.hpp | 83 +------------ 2 files changed, 112 insertions(+), 82 deletions(-) create mode 100644 sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp new file mode 100644 index 0000000000000..2fb14989d4369 --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp @@ -0,0 +1,111 @@ +//==------------ - simd_mask_impl.hpp - DPC++ Explicit SIMD API ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implementation detail of Explicit SIMD mask class. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace intel { +namespace experimental { +namespace esimd { +namespace detail { + +#define __ESIMD_MASK_DEPRECATION_MSG \ + "Use of 'simd' class to represent predicate or mask is deprecated. Use " \ + "'simd_mask' instead." + +template +class simd_mask_impl + : public detail::simd_obj_impl< + T, N, simd_mask_impl, + std::enable_if_t>> { + using base_type = detail::simd_obj_impl>; + +public: + using element_type = T; + using vector_type = typename base_type::vector_type; + static_assert(std::is_same_v> && + "mask impl type mismatch"); + + simd_mask_impl() = default; + simd_mask_impl(const simd_mask_impl &other) : base_type(other) {} + + /// Broadcast constructor with conversion. + template >> + simd_mask_impl(T1 Val) : base_type((T)Val) {} + + /// Implicit conversion constructor from a raw vector object. + // TODO this should be made inaccessible from user code. + simd_mask_impl(const vector_type &Val) : base_type(Val) {} + + /// Initializer list constructor. + __SYCL_DEPRECATED("use constructor from array, e.g: simd_mask<3> x({0,1,1});") + simd_mask_impl(std::initializer_list Ilist) : base_type(Ilist) {} + + /// Construct from an array. To allow e.g. simd_mask m({1,0,0,1,...}). + template > + simd_mask_impl(const element_type(&&Arr)[N1]) { + base_type::template init_from_array(std::move(Arr)); + } + + /// Implicit conversion from simd. + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask_impl(const simd &Val) : base_type(Val.data()) {} + +private: + static inline constexpr bool mask_size_ok_for_mem_io() { + constexpr unsigned Sz = sizeof(element_type) * N; + return (Sz >= detail::OperandSize::OWORD) && + (Sz % detail::OperandSize::OWORD == 0) && + detail::isPowerOf2(Sz / detail::OperandSize::OWORD) && + (Sz <= 8 * detail::OperandSize::OWORD); + } + +public: + // TODO add accessor-based mask memory operations. + + /// Load constructor. + // Implementation note: use SFINAE to avoid overload ambiguity: + // 1) with 'simd_mask(element_type v)' in 'simd_mask m(0)' + // 2) with 'simd_mask(const T1(&&arr)[N])' in simd_mask + // m((element_type*)p)' + template >> + explicit simd_mask_impl(const T1 *ptr) { + base_type::copy_from(ptr); + } + + /// Broadcast assignment operator to support simd_mask_impl n = a > b; + simd_mask_impl &operator=(element_type val) noexcept { + base_type::set(val); + return *this; + } + + template > + operator bool() { + return base_type::data()[0] != 0; + } +}; + +#undef __ESIMD_MASK_DEPRECATION_MSG + +} // namespace detail +} // namespace esimd +} // namespace experimental +} // namespace intel +} // namespace ext +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp index b85883fe36a52..6b6e3ab0d72f9 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp @@ -10,6 +10,7 @@ #pragma once +#include #include #include @@ -22,10 +23,6 @@ #include #endif // __SYCL_DEVICE_ONLY__ -#define __ESIMD_MASK_DEPRECATION_MSG \ - "Use of 'simd' class to represent predicate or mask is deprecated. Use " \ - "'simd_mask' instead." - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace ext { @@ -126,84 +123,6 @@ ESIMD_INLINE simd convert(const simd &val) { return __builtin_convertvector(val.data(), detail::vector_type_t); } -namespace detail { -template -class simd_mask_impl - : public detail::simd_obj_impl< - T, N, simd_mask_impl, - std::enable_if_t>> { - using base_type = detail::simd_obj_impl>; - -public: - using element_type = T; - using vector_type = typename base_type::vector_type; - static_assert(std::is_same_v> && - "mask impl type mismatch"); - - simd_mask_impl() = default; - simd_mask_impl(const simd_mask_impl &other) : base_type(other) {} - - /// Broadcast constructor with conversion. - template >> - simd_mask_impl(T1 Val) : base_type((T)Val) {} - - /// Implicit conversion constructor from a raw vector object. - // TODO this should be made inaccessible from user code. - simd_mask_impl(const vector_type &Val) : base_type(Val) {} - - /// Initializer list constructor. - __SYCL_DEPRECATED("use constructor from array, e.g: simd_mask<3> x({0,1,1});") - simd_mask_impl(std::initializer_list Ilist) : base_type(Ilist) {} - - /// Construct from an array. To allow e.g. simd_mask m({1,0,0,1,...}). - template > - simd_mask_impl(const element_type(&&Arr)[N1]) { - base_type::template init_from_array(std::move(Arr)); - } - - /// Implicit conversion from simd. - __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) - simd_mask_impl(const simd &Val) : base_type(Val.data()) {} - -private: - static inline constexpr bool mask_size_ok_for_mem_io() { - constexpr unsigned Sz = sizeof(element_type) * N; - return (Sz >= detail::OperandSize::OWORD) && - (Sz % detail::OperandSize::OWORD == 0) && - detail::isPowerOf2(Sz / detail::OperandSize::OWORD) && - (Sz <= 8 * detail::OperandSize::OWORD); - } - -public: - // TODO add accessor-based mask memory operations. - - /// Load constructor. - // Implementation note: use SFINAE to avoid overload ambiguity: - // 1) with 'simd_mask(element_type v)' in 'simd_mask m(0)' - // 2) with 'simd_mask(const T1(&&arr)[N])' in simd_mask - // m((element_type*)p)' - template >> - explicit simd_mask_impl(const T1 *ptr) { - base_type::copy_from(ptr); - } - - /// Broadcast assignment operator to support simd_mask_impl n = a > b; - simd_mask_impl &operator=(element_type val) noexcept { - base_type::set(val); - return *this; - } - - template > - operator bool() { - return base_type::data()[0] != 0; - } -}; - -} // namespace detail - #undef __ESIMD_DEF_RELOP #undef __ESIMD_DEF_BITWISE_OP From 4f26aa59dc884242a51bd7790e18b43ee698703f Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Tue, 21 Sep 2021 10:56:36 -0700 Subject: [PATCH 7/8] Removed init list-based ctor usage in __esimd_abs_common_internal. Signed-off-by: kbobrovs --- sycl/include/sycl/ext/intel/experimental/esimd/math.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp index c4b42655d7456..04d93061e02b5 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp @@ -63,7 +63,7 @@ namespace detail { template ESIMD_NODEBUG ESIMD_INLINE simd __esimd_abs_common_internal(simd src0, int flag = saturation_off) { - simd Result = simd{__esimd_abs(src0.data())}; + simd Result = simd(__esimd_abs(src0.data())); if (flag != saturation_on) return Result; From ff3248143cbb3a314ad651f102dc1223d1ba3193 Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Tue, 21 Sep 2021 11:37:13 -0700 Subject: [PATCH 8/8] Added simd_view -> simd_mask implicit conversion. Improved tests per review commetns. Signed-off-by: kbobrovs --- .../esimd/detail/simd_mask_impl.hpp | 21 +++++++++++++++++-- .../esimd/detail/simd_obj_impl.hpp | 17 +++++++-------- .../ext/intel/experimental/esimd/simd.hpp | 3 +++ sycl/test/esimd/simd_mask.cpp | 7 +++++++ 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp index 2fb14989d4369..407d592cf780c 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp @@ -22,8 +22,9 @@ namespace esimd { namespace detail { #define __ESIMD_MASK_DEPRECATION_MSG \ - "Use of 'simd' class to represent predicate or mask is deprecated. Use " \ - "'simd_mask' instead." + "Use of 'simd'/'simd_view' class to represent predicate or mask " \ + "is deprecated. Use " \ + "'simd_mask'/'simd_view' instead." template class simd_mask_impl @@ -63,6 +64,22 @@ class simd_mask_impl __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) simd_mask_impl(const simd &Val) : base_type(Val.data()) {} + /// Implicit conversion from simd_view. + template < + // viewed simd class parameters + int N1, class T1, + // view region + class RegionT2, + // view element type + class T2 = typename __SEIEE::shape_type::element_type, + // view size in elements + int N2 = __SEIEE::shape_type::length, + // enable only if view length and element type match this object + class = std::enable_if_t>> + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask_impl(const simd_view, RegionT2> &Val) + : base_type(Val.read().data()) {} + private: static inline constexpr bool mask_size_ok_for_mem_io() { constexpr unsigned Sz = sizeof(element_type) * N; diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp index 70f488fa80146..e7bb5ff37eace 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp @@ -22,10 +22,6 @@ namespace ext { namespace intel { namespace experimental { namespace esimd { - -/// Represents a simd mask. -template using simd_mask = detail::simd_mask_type; - namespace detail { /// The simd_obj_impl vector class. @@ -200,12 +196,12 @@ template class simd_obj_impl { } /// Whole region update with predicates. - void merge(const Derived &Val, const simd_mask &Mask) { + void merge(const Derived &Val, const simd_mask_type &Mask) { set(__esimd_wrregion(data(), Val.data(), 0, Mask.data())); } - void merge(const Derived &Val1, Derived Val2, const simd_mask &Mask) { + void merge(const Derived &Val1, Derived Val2, const simd_mask_type &Mask) { Val2.merge(Val1, Mask); set(Val2.data()); } @@ -308,7 +304,7 @@ template class simd_obj_impl { template void iupdate(const simd &Indices, const resize_a_simd_type_t &Val, - const simd_mask &Mask) { + const simd_mask_type &Mask) { vector_type_t Offsets = Indices.data() * sizeof(Ty); set(__esimd_wrindirect(data(), Val.data(), Offsets, Mask.data())); @@ -541,10 +537,11 @@ template class simd_obj_impl { /// Unary logical negation operator, available in all subclasses. template >> - simd_mask operator!() { - using MaskVecT = typename simd_mask::vector_type; + simd_mask_type operator!() { + using MaskVecT = typename simd_mask_type::vector_type; auto R = data() == vector_type(0); - return simd_mask{__builtin_convertvector(R, MaskVecT) & MaskVecT(1)}; + return simd_mask_type{__builtin_convertvector(R, MaskVecT) & + MaskVecT(1)}; } #define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \ diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp index 6b6e3ab0d72f9..bbab78bdd103f 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp @@ -126,6 +126,9 @@ ESIMD_INLINE simd convert(const simd &val) { #undef __ESIMD_DEF_RELOP #undef __ESIMD_DEF_BITWISE_OP +/// Represents a simd mask. +template using simd_mask = detail::simd_mask_type; + } // namespace esimd } // namespace experimental } // namespace intel diff --git a/sycl/test/esimd/simd_mask.cpp b/sycl/test/esimd/simd_mask.cpp index be1d36af6b646..a83c1f53545f9 100644 --- a/sycl/test/esimd/simd_mask.cpp +++ b/sycl/test/esimd/simd_mask.cpp @@ -51,16 +51,23 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION simd_mask<8> misc_tests(bool val) { simd ch1(1); simd ch2(2); simd_mask<8> m5 = ch1 > ch2; + m1[3] ^= 1; // binop on writable single-element view + ch1.merge(ch2, m1.select<8, 1>(0)); // simd_view used as mask + return m5; } SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void compat_test(float *ptr) { simd pred(1); simd offsets; + simd pred1(1); + auto pred2 = pred1.bit_cast_view(); // expected-warning@+1 {{deprecated}} auto x1 = gather(ptr, offsets, pred); // expected-warning@+1 {{deprecated}} + auto x11 = gather(ptr, offsets, pred2); + // expected-warning@+1 {{deprecated}} auto x2 = gather(ptr, offsets, simd{}); simd_mask<16> m1(0); // expected-warning@+1 {{deprecated}}