From 6242227fe32ea0c3117def90b0d331fe959bfa1d Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Mon, 1 Apr 2019 23:03:00 -0700 Subject: [PATCH] [SYCL] Common Reference Semantics for accessor class Signed-off-by: Vyacheslav N Klochkov --- clang/lib/Sema/SemaSYCL.cpp | 50 ++-- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 7 +- clang/test/CodeGenSYCL/integration_header.cpp | 20 +- .../test/CodeGenSYCL/struct_kernel_param.cpp | 5 +- clang/test/SemaSYCL/Inputs/sycl.hpp | 4 +- sycl/include/CL/sycl/accessor.hpp | 253 +++++++++++++++--- .../CL/sycl/detail/scheduler/commands.cpp | 105 +++++++- .../CL/sycl/detail/scheduler/commands.h | 12 + 8 files changed, 352 insertions(+), 104 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ab656707086c3..c6ebb7e1d8882 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -650,21 +650,20 @@ static void buildArgTys(ASTContext &Context, CXXRecordDecl *KernelObj, CreateAndAddPrmDsc(Fld, PointerType); FieldDecl *AccessRangeFld = - getFieldDeclByName(RecordDecl, {"__impl", "AccessRange"}); + getFieldDeclByName(RecordDecl, {"impl", "AccessRange"}); assert(AccessRangeFld && - "The accessor must contain the AccessRange from the __impl field"); + "The accessor.impl must contain the AccessRange field"); CreateAndAddPrmDsc(AccessRangeFld, AccessRangeFld->getType()); FieldDecl *MemRangeFld = - getFieldDeclByName(RecordDecl, {"__impl", "MemRange"}); + getFieldDeclByName(RecordDecl, {"impl", "MemRange"}); assert(MemRangeFld && - "The accessor must contain the MemRange from the __impl field"); + "The accessor.impl must contain the MemRange field"); CreateAndAddPrmDsc(MemRangeFld, MemRangeFld->getType()); FieldDecl *OffsetFld = - getFieldDeclByName(RecordDecl, {"__impl", "Offset"}); - assert(OffsetFld && - "The accessor must contain the Offset from the __impl field"); + getFieldDeclByName(RecordDecl, {"impl", "Offset"}); + assert(OffsetFld && "The accessor.impl must contain the Offset field"); CreateAndAddPrmDsc(OffsetFld, OffsetFld->getType()); } else if (Util::isSyclStreamType(ArgTy)) { // the parameter is a SYCL stream object @@ -710,37 +709,18 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, uint64_t Offset = Layout.getFieldOffset(Fld->getFieldIndex()) / 8; if (Util::isSyclAccessorType(ArgTy)) { - // The parameter is a SYCL accessor object - split into three - // parameters, so need to generate three descriptors. - // ... first descriptor (translated to pointer kernel parameter): + // The parameter is a SYCL accessor object. + // The Info field of the parameter descriptor for accessor contains + // two template parameters packed into thid integer field: + // - target (e.g. global_buffer, constant_buffer, local); + // - dimension of the accessor. const auto *AccTy = ArgTy->getAsCXXRecordDecl(); assert(AccTy && "accessor must be of a record type"); const auto *AccTmplTy = cast(AccTy); - H.addParamDesc(SYCLIntegrationHeader::kind_accessor, - getAccessTarget(AccTmplTy), Offset); - // ... second descriptor (translated to access range kernel parameter): - FieldDecl *AccessRngFld = - getFieldDeclByName(AccTy, {"__impl", "AccessRange"}, &Offset); - uint64_t Sz = - Ctx.getTypeSizeInChars(AccessRngFld->getType()).getQuantity(); - H.addParamDesc(SYCLIntegrationHeader::kind_std_layout, - static_cast(Sz), static_cast(Offset)); - // ... third descriptor (translated to mem range kernel parameter): - // Get offset in bytes - Offset = Layout.getFieldOffset(Fld->getFieldIndex()) / 8; - FieldDecl *MemRngFld = - getFieldDeclByName(AccTy, {"__impl", "MemRange"}, &Offset); - Sz = Ctx.getTypeSizeInChars(MemRngFld->getType()).getQuantity(); - H.addParamDesc(SYCLIntegrationHeader::kind_std_layout, - static_cast(Sz), static_cast(Offset)); - // ... fourth descriptor (translated to id kernel parameter): - // Get offset in bytes - Offset = Layout.getFieldOffset(Fld->getFieldIndex()) / 8; - FieldDecl *OffstFld = - getFieldDeclByName(AccTy, {"__impl", "Offset"}, &Offset); - Sz = Ctx.getTypeSizeInChars(OffstFld->getType()).getQuantity(); - H.addParamDesc(SYCLIntegrationHeader::kind_std_layout, - static_cast(Sz), static_cast(Offset)); + int Dims = static_cast( + AccTmplTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); + int Info = getAccessTarget(AccTmplTy) | (Dims << 11); + H.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, Offset); } else if (Util::isSyclStreamType(ArgTy)) { // the parameter is a SYCL stream object llvm_unreachable("streams not supported yet"); diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index cffc9804eb507..8f7bdbc21ff4a 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -106,14 +106,15 @@ template AccessRange, - range MemRange, id Offset) {} void use(void) const {} template void use(T... args) {} template void use(T... args) const {} - _ImplT __impl; + _ImplT impl; +private: + void __init(__global dataT *Ptr, range AccessRange, + range MemRange, id Offset) {} }; class kernel {}; diff --git a/clang/test/CodeGenSYCL/integration_header.cpp b/clang/test/CodeGenSYCL/integration_header.cpp index 729861e544cac..6c98780ac0d36 100644 --- a/clang/test/CodeGenSYCL/integration_header.cpp +++ b/clang/test/CodeGenSYCL/integration_header.cpp @@ -22,28 +22,16 @@ // CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { // CHECK-NEXT: //--- _ZTSZ4mainE12first_kernel // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 2014, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 5 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 6 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 2016, 7 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 7 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 8 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 9 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 4 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 6112, 7 }, // CHECK-EMPTY: // CHECK-NEXT: //--- _ZTSN16second_namespace13second_kernelIcEE // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 2016, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 5 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 6 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 6112, 4 }, // CHECK-EMPTY: // CHECK-NEXT: //--- _ZTS12third_kernelILi1Ei5pointIZ4mainE1XEE // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 2016, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 5 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 6 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 6112, 4 }, // CHECK-EMPTY: // CHECK-NEXT: }; // diff --git a/clang/test/CodeGenSYCL/struct_kernel_param.cpp b/clang/test/CodeGenSYCL/struct_kernel_param.cpp index 089c3ab1d987e..4143c5d045558 100644 --- a/clang/test/CodeGenSYCL/struct_kernel_param.cpp +++ b/clang/test/CodeGenSYCL/struct_kernel_param.cpp @@ -3,10 +3,7 @@ // CHECK: const kernel_param_desc_t kernel_signatures[] = { // CHECK-NEXT: //--- _ZTSZZ5test0vENK3$_0clERN2cl4sycl7handlerEE8MyKernel -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 2014, 0 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 0 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 1 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 2 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 24, 4 }, // CHECK-EMPTY: // CHECK-NEXT:}; diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 8015e3ecd9210..3f780317e1534 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -64,10 +64,10 @@ class accessor { public: void use(void) const {} void use(void*) const {} + _ImplT impl; +private: void __init(__global dataT *Ptr, range AccessRange, range MemRange, id Offset) {} - - _ImplT __impl; }; } // namespace sycl diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 9bdc641371b14..6ab337be1d075 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -16,10 +16,11 @@ #include #include #include +#include +#include namespace cl { namespace sycl { -// TODO: 4.3.2 Implement common reference semantics namespace detail { template ) - sizeof(dataT *)]; accessor_impl(dataT *Data) : Data(Data) {} // Returns the number of accessed elements. size_t get_count() const { return 1; } + +#ifdef __SYCL_DEVICE_ONLY__ + bool operator==(const accessor_impl &Rhs) const { return Data == Rhs.Data; } +#endif }; /// Implementation of host accessor. @@ -137,6 +145,13 @@ SYCL_ACCESSOR_IMPL(isTargetHostAccess(accessTarget) && dimensions > 0) { // Returns the number of accessed elements. size_t get_count() const { return AccessRange.size(); } + +#ifdef __SYCL_DEVICE_ONLY__ + bool operator==(const accessor_impl &Rhs) const { + return Data == Rhs.Data && AccessRange == Rhs.AccessRange && + MemRange == Rhs.MemRange && Offset == Rhs.Offset; + } +#endif }; /// Implementation of device (kernel) accessor providing access to a single @@ -167,6 +182,10 @@ SYCL_ACCESSOR_IMPL(!isTargetHostAccess(accessTarget) && // Returns the number of accessed elements. size_t get_count() const { return 1; } +#ifdef __SYCL_DEVICE_ONLY__ + bool operator==(const accessor_impl &Rhs) const { return Data == Rhs.Data; } +#endif + static_assert( std::is_same::type, dataT>::value, @@ -209,6 +228,13 @@ SYCL_ACCESSOR_IMPL(!isTargetHostAccess(accessTarget) && // Returns the number of accessed elements. size_t get_count() const { return AccessRange.size(); } +#ifdef __SYCL_DEVICE_ONLY__ + bool operator==(const accessor_impl &Rhs) const { + return Data == Rhs.Data && AccessRange == Rhs.AccessRange && + MemRange == Rhs.MemRange && Offset == Rhs.Offset; + } +#endif + static_assert( std::is_same::type, dataT>::value, @@ -248,6 +274,12 @@ SYCL_ACCESSOR_IMPL(accessTarget == access::target::local && // Returns the number of accessed elements. size_t get_count() const { return 1; } +#ifdef __SYCL_DEVICE_ONLY__ + bool operator==(const accessor_impl &Rhs) const { + return ByteSize == Rhs.ByteSize && Data == Rhs.Data; + } +#endif + static_assert( std::is_same::type, dataT>::value, @@ -298,6 +330,14 @@ SYCL_ACCESSOR_IMPL(accessTarget == access::target::local && // Returns the number of accessed elements. size_t get_count() const { return AccessRange.size(); } +#ifdef __SYCL_DEVICE_ONLY__ + bool operator==(const accessor_impl &Rhs) const { + return ByteSize == Rhs.ByteSize && Data == Rhs.Data && + AccessRange == Rhs.AccessRange && MemRange == Rhs.MemRange && + Offset == Rhs.Offset; + } +#endif + static_assert( std::is_same::type, dataT>::value, @@ -319,10 +359,24 @@ class accessor_base { accessor_impl; const _ImplT *__get_impl() const { +#ifdef __SYCL_DEVICE_ONLY__ return reinterpret_cast(this); +#else + auto ImplPtrPtr = reinterpret_cast *>(this); + const _ImplT* I = &**ImplPtrPtr; + return I; +#endif } - _ImplT *__get_impl() { return reinterpret_cast<_ImplT *>(this); } + _ImplT *__get_impl() { +#ifdef __SYCL_DEVICE_ONLY__ + return reinterpret_cast<_ImplT *>(this); +#else + auto ImplPtrPtr = reinterpret_cast *>(this); + _ImplT* I = &**ImplPtrPtr; + return I; +#endif + } static_assert( std::is_same::type, @@ -569,15 +623,77 @@ class accessor // Make sure Impl field is the first in the class, so that it is // safe to reinterpret a pointer to accessor as a pointer to the // implementation. - _ImplT __impl; +#ifdef __SYCL_DEVICE_ONLY__ + _ImplT impl; +#else + std::shared_ptr<_ImplT> impl; + char padding[sizeof(_ImplT) - sizeof(std::shared_ptr<_ImplT>)]; +#endif +#ifdef __SYCL_DEVICE_ONLY__ void __init(_ValueType *Ptr, range AccessRange, range MemRange, id Offset) { - __impl.Data = Ptr; - __impl.AccessRange = AccessRange; - __impl.MemRange = MemRange; - __impl.Offset = Offset; + impl.Data = Ptr; + impl.AccessRange = AccessRange; + impl.MemRange = MemRange; + impl.Offset = Offset; } +#endif + +#ifndef __SYCL_DEVICE_ONLY__ + detail::buffer_impl *getBufImpl() const { + return impl->m_Buf; + } +#endif + + range getAccessRange() const { +#ifdef __SYCL_DEVICE_ONLY__ + return impl.AccessRange; +#else + return impl->AccessRange; +#endif + }; + + range getMemRange() const { +#ifdef __SYCL_DEVICE_ONLY__ + return impl.MemRange; +#else + return impl->MemRange; +#endif + }; + + id getOffset() const { +#ifdef __SYCL_DEVICE_ONLY__ + return impl.Offset; +#else + return impl->Offset; +#endif + }; + + size_t getByteSize() const { +#ifdef __SYCL_DEVICE_ONLY__ + return impl.ByteSize; +#else + return impl->ByteSize; +#endif + }; + + template + friend class cl::sycl::simple_scheduler::ExecuteKernelCommand; + + template + friend uint cl::sycl::simple_scheduler::passGlobalAccessorAsArg( + uint I, int LambdaOffset, cl_kernel ClKernel, + const KernelType &HostKernel); + + template + friend uint cl::sycl::simple_scheduler::passLocalAccessorAsArg( + uint I, int LambdaOffset, cl_kernel ClKernel, + const KernelType &HostKernel); + + template + friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); public: using value_type = dataT; @@ -606,7 +722,12 @@ class accessor AccessTarget == access::target::constant_buffer))) && Dimensions == 0), buffer>::type &bufferRef) - : __impl(detail::getSyclObjImpl(bufferRef)->BufPtr) { +#ifdef __SYCL_DEVICE_ONLY__ + : impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr) { +#else + : impl(std::make_shared<_ImplT>( + (dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr)) { +#endif auto BufImpl = detail::getSyclObjImpl(bufferRef); if (AccessTarget == access::target::host_buffer) { if (BufImpl->OpenCLInterop) { @@ -644,11 +765,16 @@ class accessor buffer>::type &bufferRef, handler &commandGroupHandlerRef) #ifdef __SYCL_DEVICE_ONLY__ - ; // This ctor can't be used in device code, so no need to define it. + // Even though this ctor can not be used in device code, some + // dummy implementation is still needed. + // Pass nullptr as a pointer to mem and use buffers from the ctor + // arguments to avoid the need in adding utility functions for + // dummy/default initialization of range fields. + : impl(nullptr, (handler *)nullptr) {} #else // !__SYCL_DEVICE_ONLY__ - : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, - bufferRef.get_range(), bufferRef.get_range(), - &commandGroupHandlerRef) { + : impl(std::make_shared<_ImplT>( + (dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, + &commandGroupHandlerRef)) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (BufImpl->OpenCLInterop && !BufImpl->isValidAccessToMem(accessMode)) { throw cl::sycl::runtime_error( @@ -656,7 +782,7 @@ class accessor "interoperability buffer"); } commandGroupHandlerRef.AddBufDep(*BufImpl); - __impl.m_Buf = BufImpl.get(); + impl->m_Buf = BufImpl.get(); } #endif // !__SYCL_DEVICE_ONLY__ @@ -682,8 +808,14 @@ class accessor AccessTarget == access::target::constant_buffer))) && Dimensions > 0), buffer>::type &bufferRef) - : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, - bufferRef.get_range(), bufferRef.get_range()) { +#ifdef __SYCL_DEVICE_ONLY__ + : impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, + bufferRef.get_range(), bufferRef.get_range()) { +#else + : impl(std::make_shared<_ImplT>( + (dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, + bufferRef.get_range(), bufferRef.get_range())) { +#endif auto BufImpl = detail::getSyclObjImpl(bufferRef); if (AccessTarget == access::target::host_buffer) { if (BufImpl->OpenCLInterop) { @@ -721,11 +853,18 @@ class accessor buffer>::type &bufferRef, handler &commandGroupHandlerRef) #ifdef __SYCL_DEVICE_ONLY__ - ; // This ctor can't be used in device code, so no need to define it. + // Even though this ctor can not be used in device code, some + // dummy implementation is still needed. + // Pass nullptr as a pointer to mem and use buffers from the ctor + // arguments to avoid the need in adding utility functions for + // dummy/default initialization of range fields. + : impl(nullptr, bufferRef.get_range(), bufferRef.get_range(), + &commandGroupHandlerRef) {} #else - : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, - bufferRef.get_range(), bufferRef.get_range(), - &commandGroupHandlerRef) { + : impl(std::make_shared<_ImplT>( + (dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, + bufferRef.get_range(), bufferRef.get_range(), + &commandGroupHandlerRef)) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (BufImpl->OpenCLInterop && !BufImpl->isValidAccessToMem(accessMode)) { throw cl::sycl::runtime_error( @@ -733,7 +872,7 @@ class accessor "interoperability buffer"); } commandGroupHandlerRef.AddBufDep(*BufImpl); - __impl.m_Buf = BufImpl.get(); + impl->m_Buf = BufImpl.get(); } #endif @@ -761,10 +900,17 @@ class accessor buffer>::type &bufferRef, range Range, id Offset = {}) #ifdef __SYCL_DEVICE_ONLY__ - ; // This ctor can't be used in device code, so no need to define it. + // Even though this ctor can not be used in device code, some + // dummy implementation is still needed. + // Pass nullptr as a pointer to mem and use buffers from the ctor + // arguments to avoid the need in adding utility functions for + // dummy/default initialization of range and + // id fields. + : impl(nullptr, Range, bufferRef.get_range(), Offset) {} #else // !__SYCL_DEVICE_ONLY__ - : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, Range, - bufferRef.get_range(), Offset) { + : impl(std::make_shared<_ImplT>( + (dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, Range, + bufferRef.get_range(), Offset)) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (AccessTarget == access::target::host_buffer) { if (BufImpl->OpenCLInterop) { @@ -781,7 +927,7 @@ class accessor "interoperability buffer"); } } -#endif // !__SYCL_DEVICE_ONLY__ +#endif // !__SYCL_DEVICE_ONLY__ // buffer ctor #6: // accessor(buffer &, handler &, range Range, id Offset); @@ -804,10 +950,18 @@ class accessor handler &commandGroupHandlerRef, range Range, id Offset = {}) #ifdef __SYCL_DEVICE_ONLY__ - ; // This ctor can't be used in device code, so no need to define it. + // Even though this ctor can not be used in device code, some + // dummy implementation is still needed. + // Pass nullptr as a pointer to mem and use buffers from the ctor + // arguments to avoid the need in adding utility functions for + // dummy/default initialization of range and + // id fields. + : impl(nullptr, Range, bufferRef.get_range(), + &commandGroupHandlerRef, Offset) {} #else // !__SYCL_DEVICE_ONLY__ - : __impl((dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, Range, - bufferRef.get_range(), &commandGroupHandlerRef, Offset) { + : impl(std::make_shared<_ImplT>( + (dataT *)detail::getSyclObjImpl(bufferRef)->BufPtr, Range, + bufferRef.get_range(), &commandGroupHandlerRef, Offset)) { auto BufImpl = detail::getSyclObjImpl(bufferRef); if (BufImpl->OpenCLInterop && !BufImpl->isValidAccessToMem(accessMode)) { throw cl::sycl::runtime_error( @@ -815,9 +969,9 @@ class accessor "interoperability buffer"); } commandGroupHandlerRef.AddBufDep(*BufImpl); - __impl.m_Buf = BufImpl.get(); + impl->m_Buf = BufImpl.get(); } -#endif // !__SYCL_DEVICE_ONLY__ +#endif // !__SYCL_DEVICE_ONLY__ // TODO: // local accessor ctor #1 @@ -845,7 +999,23 @@ class accessor Dimensions > 0), range>::type allocationSize, handler &commandGroupHandlerRef) - : __impl(allocationSize, &commandGroupHandlerRef) {} +#ifdef __SYCL_DEVICE_ONLY__ + : impl(allocationSize, &commandGroupHandlerRef) {} +#else + : impl(std::make_shared<_ImplT>(allocationSize, + &commandGroupHandlerRef)) {} +#endif + + accessor(const accessor &rhs) = default; + accessor(accessor &&rhs) = default; + + accessor &operator=(const accessor &rhs) = default; + accessor &operator=(accessor &&rhs) = default; + + ~accessor() = default; + + bool operator==(const accessor &rhs) const { return impl == rhs.impl; } + bool operator!=(const accessor &rhs) const { return !(*this == rhs); } }; } // namespace sycl @@ -854,4 +1024,25 @@ class accessor #undef SYCL_ACCESSOR_IMPL #undef SYCL_ACCESSOR_SUBCLASS -//TODO hash for accessor +namespace std { +template +struct hash> { + using AccType = cl::sycl::accessor< + T, Dimensions, AccessMode, AccessTarget, IsPlaceholder>; + using ImplType = cl::sycl::detail::accessor_impl< + T, Dimensions, AccessMode, AccessTarget, IsPlaceholder>; + + size_t operator()(const AccType &A) const { +#ifdef __SYCL_DEVICE_ONLY__ + // Hash is not supported on DEVICE. Just return 0 here. + return 0; +#else + std::shared_ptr ImplPtr = cl::sycl::detail::getSyclObjImpl(A); + return hash>()(ImplPtr); +#endif + } +}; +} // namespace std diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.cpp b/sycl/include/CL/sycl/detail/scheduler/commands.cpp index 59d4dcf91cabe..6030be7f20c3f 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.cpp +++ b/sycl/include/CL/sycl/detail/scheduler/commands.cpp @@ -27,6 +27,52 @@ const Dst *getParamAddress(const Src *ptr, uint64_t Offset) { return reinterpret_cast((const char *)ptr + Offset); } +template +uint passGlobalAccessorAsArg(uint I, int LambdaOffset, cl_kernel ClKernel, + const KernelType &HostKernel) { + using AccType = accessor; + const AccType *Acc = getParamAddress(&HostKernel, LambdaOffset); + cl_mem CLBuf = Acc->getBufImpl()->getOpenCLMem(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I, sizeof(cl_mem), &CLBuf)); + + range AccessRange = Acc->getAccessRange(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I + 1, + sizeof(range), + &AccessRange)); + range MemRange = Acc->getMemRange(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I + 2, + sizeof(range), &MemRange)); + id Offset = Acc->getOffset(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I + 3, + sizeof(id), &Offset)); + return 4; +} + +template +uint passLocalAccessorAsArg(uint I, int LambdaOffset, cl_kernel ClKernel, + const KernelType &HostKernel) { + using AccType = accessor; + const AccType *Acc = getParamAddress(&HostKernel, LambdaOffset); + size_t ByteSize = Acc->getByteSize(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I, ByteSize, nullptr)); + + range AccessRange = Acc->getAccessRange(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I + 1, + sizeof(range), + &AccessRange)); + range MemRange = Acc->getMemRange(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I + 2, + sizeof(range), &MemRange)); + id Offset = Acc->getOffset(); + CHECK_OCL_CODE(clSetKernelArg(ClKernel, I + 3, + sizeof(id), &Offset)); + return 4; +} + template void ExecuteKernelCommand< @@ -46,30 +92,62 @@ void ExecuteKernelCommand< } if (m_KernelArgs != nullptr) { + unsigned ArgumentID = 0; for (unsigned I = 0; I < m_KernelArgsNum; ++I) { switch (m_KernelArgs[I].kind) { case csd::kernel_param_kind_t::kind_std_layout: { const void *Ptr = getParamAddress(&m_HostKernel, m_KernelArgs[I].offset); CHECK_OCL_CODE( - clSetKernelArg(m_ClKernel, I, m_KernelArgs[I].info, Ptr)); + clSetKernelArg(m_ClKernel, ArgumentID, m_KernelArgs[I].info, Ptr)); + ArgumentID++; break; } case csd::kernel_param_kind_t::kind_accessor: { - switch (static_cast(m_KernelArgs[I].info)) { - case cl::sycl::access::target::global_buffer: - case cl::sycl::access::target::constant_buffer: { - auto *Ptr = *(getParamAddress< - cl::sycl::detail::buffer_impl> *>( - &m_HostKernel, m_KernelArgs[I].offset)); - cl_mem CLBuf = Ptr->getOpenCLMem(); - CHECK_OCL_CODE(clSetKernelArg(m_ClKernel, I, sizeof(cl_mem), &CLBuf)); + int AccDims = m_KernelArgs[I].info >> 11; + int AccTarget = m_KernelArgs[I].info & 0x7ff; + switch (static_cast(AccTarget)) { + case access::target::global_buffer: + case access::target::constant_buffer: { + switch (AccDims) { + case 1: + ArgumentID += passGlobalAccessorAsArg<1, KernelType>( + ArgumentID, m_KernelArgs[I].offset, m_ClKernel, m_HostKernel); + break; + case 2: + ArgumentID += passGlobalAccessorAsArg<2, KernelType>( + ArgumentID, m_KernelArgs[I].offset, m_ClKernel, m_HostKernel); + break; + case 3: + ArgumentID += passGlobalAccessorAsArg<3, KernelType>( + ArgumentID, m_KernelArgs[I].offset, m_ClKernel, m_HostKernel); + break; + case 0: + default: + assert(0 && "Passing accessor with dimensions=0 is unsupported"); + break; + } break; } - case cl::sycl::access::target::local: { - auto *Ptr = - getParamAddress(&m_HostKernel, m_KernelArgs[I].offset); - CHECK_OCL_CODE(clSetKernelArg(m_ClKernel, I, *Ptr, nullptr)); + case access::target::local: { + switch (AccDims) { + case 1: + ArgumentID += passLocalAccessorAsArg<1, KernelType>( + ArgumentID, m_KernelArgs[I].offset, m_ClKernel, m_HostKernel); + break; + case 2: + ArgumentID += passLocalAccessorAsArg<2, KernelType>( + ArgumentID, m_KernelArgs[I].offset, m_ClKernel, m_HostKernel); + break; + case 3: + ArgumentID += passLocalAccessorAsArg<3, KernelType>( + ArgumentID, m_KernelArgs[I].offset, m_ClKernel, m_HostKernel); + break; + case 0: + default: + assert(0 && "Passing accessor with dimensions=0 is unsupported"); + break; + } break; } // TODO handle these cases @@ -87,6 +165,7 @@ void ExecuteKernelCommand< } } } + for (const auto &Arg : m_InteropArgs) { if (Arg.m_Ptr.get() != nullptr) { CHECK_OCL_CODE(clSetKernelArg(m_ClKernel, Arg.m_ArgIndex, Arg.m_Size, diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.h b/sycl/include/CL/sycl/detail/scheduler/commands.h index dd6785eda228c..85ba82aad881c 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.h +++ b/sycl/include/CL/sycl/detail/scheduler/commands.h @@ -402,6 +402,18 @@ template class CopyCommand : public Command { range m_BuffDestRange; }; +// The next two functions pass global/local accessor as a parameter +// to the kernel. The paramter 'I' defines the current argument index +// being passed to the kernel. 'LambdaOffset' gives the offset of the passed +// accessor in lambda function. 'ClKernel' is the kernel. 'HostKernel' +// is the pointer to the lambda function. +template +uint passGlobalAccessorAsArg(uint I, int LambdaOffset, cl_kernel ClKernel, + const KernelType &HostKernel); +template +uint passLocalAccessorAsArg(uint I, int LambdaOffset, cl_kernel ClKernel, + const KernelType &HostKernel); + } // namespace simple_scheduler } // namespace sycl } // namespace cl