diff --git a/sycl/include/CL/sycl/detail/buffer_impl.hpp b/sycl/include/CL/sycl/detail/buffer_impl.hpp index 09595d31bae52..8ac49277d25df 100644 --- a/sycl/include/CL/sycl/detail/buffer_impl.hpp +++ b/sycl/include/CL/sycl/detail/buffer_impl.hpp @@ -169,6 +169,7 @@ class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { ~buffer_impl() { try { BaseT::updateHostMemory(); + BaseT::detachResources(); } catch (...) { } destructorNotification(this); diff --git a/sycl/include/CL/sycl/detail/image_impl.hpp b/sycl/include/CL/sycl/detail/image_impl.hpp index b708e29e9f674..4f89f536f2732 100644 --- a/sycl/include/CL/sycl/detail/image_impl.hpp +++ b/sycl/include/CL/sycl/detail/image_impl.hpp @@ -229,6 +229,7 @@ class __SYCL_EXPORT image_impl final : public SYCLMemObjT { ~image_impl() { try { BaseT::updateHostMemory(); + BaseT::detachResources(); } catch (...) { } } diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp index 09639d011f923..61af7a9630e2d 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp @@ -203,6 +203,9 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { // members must be alive. void updateHostMemory(); + // Detach additional resources associated with the memory object. + void detachResources() const; + public: __SYCL_DLL_LOCAL bool useHostPtr() { return has_property() || diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 341a46ca9679d..c758013ad1057 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -473,6 +473,7 @@ class __SYCL_EXPORT handler { /// They are then forwarded to command group and destroyed only after /// the command group finishes the work on device/host. /// The 'MSharedPtrStorage' suits that need. + /// NOTE: This is no longer in use and should be removed with next ABI break. /// /// @param ReduObj is a pointer to object that must be stored. void addReduction(const std::shared_ptr &ReduObj) { diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 13e1c312fcc89..329118e43b25c 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -366,6 +366,16 @@ template struct AreAllButLastReductions { static constexpr bool value = !std::is_base_of::value; }; +/// Helper for attaching a resource to the lifetime of the memory associated +/// with accessor. +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, + detail::AccessorBaseHost &AttachTo); + +/// Helper for attaching a resource to the lifetime of USM memory. +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Queue, + std::shared_ptr &Resource, + void *AttachTo); + /// This class encapsulates the reduction variable/accessor, /// the reduction operator and an optional operator identity. template getReadAccToPreviousPartialReds(handler &CGH) const { - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return {*MOutBufPtr, CGH}; } @@ -673,7 +683,7 @@ class reduction_impl : private reduction_impl_base { std::enable_if_t getWriteMemForPartialReds(size_t Size, handler &CGH) { MOutBufPtr = std::make_shared>(range<1>(Size)); - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -691,7 +701,7 @@ class reduction_impl : private reduction_impl_base { // Create a new output buffer and return an accessor to it. MOutBufPtr = std::make_shared>(range<1>(Size)); - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -707,9 +717,9 @@ class reduction_impl : private reduction_impl_base { return *MRWAcc; auto RWReduVal = std::make_shared(MIdentity); - CGH.addReduction(RWReduVal); + attachResourceLifetimeToMem(CGH, RWReduVal); MOutBufPtr = std::make_shared>(RWReduVal.get(), range<1>(1)); - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -717,9 +727,9 @@ class reduction_impl : private reduction_impl_base { access::placeholder::false_t> getReadWriteAccessorToInitializedGroupsCounter(handler &CGH) { auto CounterMem = std::make_shared(0); - CGH.addReduction(CounterMem); + attachResourceLifetimeToMem(CGH, CounterMem); auto CounterBuf = std::make_shared>(CounterMem.get(), 1); - CGH.addReduction(CounterBuf); + attachResourceLifetimeToMem(CGH, CounterBuf); return {*CounterBuf, CGH}; } @@ -767,6 +777,23 @@ class reduction_impl : private reduction_impl_base { return Acc; } + /// Attaches the resource to the lifetime of the associated memory of the + /// reduction. + void attachResourceLifetimeToMem(handler &CGH, + std::shared_ptr Resource) const { +#ifndef __SYCL_DEVICE_ONLY__ + if (is_usm) + detail::attachLifetime(CGH.MQueue, Resource, MUSMPointer); + else if (MDWAcc != nullptr) + detail::attachLifetime(Resource, *MDWAcc); + else + detail::attachLifetime(Resource, *MRWAcc); +#else + (void)CGH; + (void)Resource; +#endif + } + /// Identity of the BinaryOperation. /// The result of BinaryOperation(X, MIdentity) is equal to X for any X. const T MIdentity; diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 18390606df128..7decaff889b3c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -115,6 +115,17 @@ cl_context context_impl::get() const { bool context_impl::is_host() const { return MHostContext; } context_impl::~context_impl() { + // In case a user is leaking a memory object we may still have attached + // resources. These resources will be released with the context, but we need + // to do it before we release the backend context to avoid confusing errors. + std::unordered_map>> + USMLTARes; + { + std::lock_guard lock(MUSMLifetimeAttachedResourcesMutex); + std::swap(USMLTARes, MUSMLifetimeAttachedResources); + } + USMLTARes.clear(); + for (auto LibProg : MCachedLibPrograms) { assert(LibProg.second && "Null program must not be kept in the cache"); getPlugin().call(LibProg.second); @@ -206,6 +217,31 @@ pi_native_handle context_impl::getNative() const { return Handle; } +void context_impl::attachLifetimeToUSM(std::shared_ptr &Resource, + const void *AttachTo) { + std::lock_guard lock(MUSMLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = MUSMLifetimeAttachedResources.find(AttachTo); + if (AttachedResourcesIt != MUSMLifetimeAttachedResources.end()) + AttachedResourcesIt->second.push_back(Resource); + else + MUSMLifetimeAttachedResources.insert({AttachTo, {Resource}}); +} + +void context_impl::detachUSMLifetimeResources(const void *AttachedTo) { + // Swap the attached resources and let them go out of scope without the lock. + // This is required as they could potentially have their own attached + // resources they need to detach. + std::vector> AttachedResources; + { + std::lock_guard lock(MUSMLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = MUSMLifetimeAttachedResources.find(AttachedTo); + if (AttachedResourcesIt == MUSMLifetimeAttachedResources.end()) + return; + std::swap(AttachedResourcesIt->second, AttachedResources); + MUSMLifetimeAttachedResources.erase(AttachedResourcesIt); + } +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 722d2c0789d03..d015d36c66e06 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -167,6 +167,18 @@ class context_impl { /// \return a native handle. pi_native_handle getNative() const; + /// Attach a resource to a USM pointer. + /// + /// \param Resource is the resource to attach to the USM pointer + /// \param AttachTo is the USM pointer to attach the resource to + void attachLifetimeToUSM(std::shared_ptr &Resource, + const void *AttachTo); + + /// Detach all resources attached to a USM pointer. + /// + /// \param AttachedTo is the USM pointer to detach resources from + void detachUSMLifetimeResources(const void *AttachedTo); + private: async_handler MAsyncHandler; std::vector MDevices; @@ -177,6 +189,13 @@ class context_impl { std::map, RT::PiProgram> MCachedLibPrograms; mutable KernelProgramCache MKernelProgramCache; + + /// Matches USM pointers to attached resources. + std::unordered_map>> + MUSMLifetimeAttachedResources; + + /// Protects m_USMLifetimeAttachedResources. + std::mutex MUSMLifetimeAttachedResourcesMutex; }; } // namespace detail diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index c0174f81d6d80..e156237fc9e1d 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -98,6 +98,16 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { return TP; } +std::unordered_map>> & +GlobalHandler::getMemObjLifetimeAttachedResources() { + return getOrCreate(MMemObjLifetimeAttachedResources); +} + +std::mutex &GlobalHandler::getMemObjLifetimeAttachedResourcesMutex() { + return getOrCreate(MMemObjLifetimeAttachedResourcesMutex); +} + void releaseDefaultContexts() { // Release shared-pointers to SYCL objects. #ifndef _WIN32 @@ -121,6 +131,12 @@ void GlobalHandler::registerDefaultContextReleaseHandler() { } void shutdown() { + // In case a user is leaking a memory object we may still have attached + // resources. These resources will be released with the context, but we need + // to do it before we release the backend context to avoid confusing errors. + if (GlobalHandler::instance().MMemObjLifetimeAttachedResources.Inst) + GlobalHandler::instance().MMemObjLifetimeAttachedResources.Inst->clear(); + // Ensure neither host task is working so that no default context is accessed // upon its release if (GlobalHandler::instance().MHostTaskThreadPool.Inst) diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index b3d6357686e0c..f574bece151f8 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -26,6 +26,7 @@ class plugin; class device_filter_list; class XPTIRegistry; class ThreadPool; +class SYCLMemObjI; using PlatformImplPtr = std::shared_ptr; using ContextImplPtr = std::shared_ptr; @@ -70,6 +71,11 @@ class GlobalHandler { std::mutex &getHandlerExtendedMembersMutex(); ThreadPool &getHostTaskThreadPool(); + std::unordered_map>> & + getMemObjLifetimeAttachedResources(); + std::mutex &getMemObjLifetimeAttachedResourcesMutex(); + static void registerDefaultContextReleaseHandler(); private: @@ -105,6 +111,12 @@ class GlobalHandler { InstWithLock MHandlerExtendedMembersMutex; // Thread pool for host task and event callbacks execution InstWithLock MHostTaskThreadPool; + + /// TODO: On ABI break this should be made part of SYCLMemObjT. + InstWithLock>>> + MMemObjLifetimeAttachedResources; + InstWithLock MMemObjLifetimeAttachedResourcesMutex; }; } // namespace detail } // namespace sycl diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 508600b796e83..41f9903094bdf 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -110,6 +110,27 @@ reduGetMaxWGSize(std::shared_ptr Queue, return WGSize; } +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, + detail::AccessorBaseHost &AttachTo) { + SYCLMemObjI *MemObj = getSyclObjImpl(AttachTo)->MSYCLMemObj; + // On ABI break this should attach directly to the memory object. + std::lock_guard lock( + GlobalHandler::instance().getMemObjLifetimeAttachedResourcesMutex()); + auto &AttachedResourcesMap = + GlobalHandler::instance().getMemObjLifetimeAttachedResources(); + auto AttachedResourcesIt = AttachedResourcesMap.find(MemObj); + if (AttachedResourcesIt != AttachedResourcesMap.end()) + AttachedResourcesIt->second.push_back(Resource); + else + AttachedResourcesMap.insert({MemObj, {Resource}}); +} + +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Queue, + std::shared_ptr &Resource, + void *AttachTo) { + Queue->getContextImplPtr()->attachLifetimeToUSM(Resource, AttachTo); +} + } // namespace detail } // namespace oneapi } // namespace ext diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 191a15c0d5b8e..d3956e0d0a5db 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,28 @@ void SYCLMemObjT::updateHostMemory() { pi::cast(MInteropMemObject)); } } + +// TODO: With ABI break the attached resources can be held by this type. When +// that happens this will be obsolete as the resources will automatically be +// destroyed with the object. +void SYCLMemObjT::detachResources() const { + // Swap the attached resources and let them go out of scope without the lock. + // This is required as they could potentially have their own attached + // resources they need to detach. + std::vector> AttachedResources; + { + std::lock_guard lock( + GlobalHandler::instance().getMemObjLifetimeAttachedResourcesMutex()); + auto &AttachedResourcesMap = + GlobalHandler::instance().getMemObjLifetimeAttachedResources(); + auto AttachedResourcesIt = AttachedResourcesMap.find(this); + if (AttachedResourcesIt == AttachedResourcesMap.end()) + return; + std::swap(AttachedResourcesIt->second, AttachedResources); + AttachedResourcesMap.erase(AttachedResourcesIt); + } +} + const plugin &SYCLMemObjT::getPlugin() const { assert((MInteropContext != nullptr) && "Trying to get Plugin from SYCLMemObjT with nullptr ContextImpl."); diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 0650877131c50..897a7fb641b72 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -164,6 +164,8 @@ void free(void *Ptr, const context &Ctxt, const detail::code_location &CL) { const detail::plugin &Plugin = CtxImpl->getPlugin(); Plugin.call(C, Ptr); } + // Detach resources. + detail::getSyclObjImpl(Ctxt)->detachUSMLifetimeResources(Ptr); } // For ABI compatibility diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2681453647fb6..f7edfd3cfaa7d 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3697,6 +3697,8 @@ _ZN2cl4sycl3ext6oneapi10level_zero12make_programERKNS0_7contextEm _ZN2cl4sycl3ext6oneapi10level_zero13make_platformEm _ZN2cl4sycl3ext6oneapi15filter_selectorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN2cl4sycl3ext6oneapi15filter_selectorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE +_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrIKvERNS0_6detail16AccessorBaseHostE +_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrINS0_6detail10queue_implEERS4_IKvEPv _ZN2cl4sycl3ext6oneapi6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm _ZN2cl4sycl3ext6oneapi6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl3ext6oneapi6detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS0_6detail10queue_implEE @@ -4148,6 +4150,7 @@ _ZNK2cl4sycl6detail10image_implILi3EE4sizeEv _ZNK2cl4sycl6detail10image_implILi3EE7getTypeEv _ZNK2cl4sycl6detail10image_implILi3EE9get_countEv _ZNK2cl4sycl6detail10image_implILi3EE9get_rangeEv +_ZNK2cl4sycl6detail11SYCLMemObjT15detachResourcesEv _ZNK2cl4sycl6detail11SYCLMemObjT9getPluginEv _ZNK2cl4sycl6detail11SYCLMemObjT9isInteropEv _ZNK2cl4sycl6detail11stream_impl22get_max_statement_sizeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2359db7857240..4f8a1a21bed10 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1313,6 +1313,8 @@ ?atanpi@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V34562@@Z ?atanpi@__host_std@cl@@YAMM@Z ?atanpi@__host_std@cl@@YANN@Z +?attachLifetime@detail@oneapi@ext@sycl@cl@@YAXAEAV?$shared_ptr@$$CBX@std@@AEAVAccessorBaseHost@145@@Z +?attachLifetime@detail@oneapi@ext@sycl@cl@@YAXAEAV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@AEAV?$shared_ptr@$$CBX@7@PEAX@Z ?barrier@handler@sycl@cl@@QEAAXAEBV?$vector@Vevent@sycl@cl@@V?$allocator@Vevent@sycl@cl@@@std@@@std@@@Z ?barrier@handler@sycl@cl@@QEAAXXZ ?begin@exception_list@sycl@cl@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@Vexception_ptr@std@@@std@@@std@@@std@@XZ @@ -1689,6 +1691,7 @@ ?depends_on@handler@sycl@cl@@QEAAXAEBV?$vector@Vevent@sycl@cl@@V?$allocator@Vevent@sycl@cl@@@std@@@std@@@Z ?depends_on@handler@sycl@cl@@QEAAXVevent@23@@Z ?destructorNotification@buffer_impl@detail@sycl@cl@@QEAAXPEAX@Z +?detachResources@SYCLMemObjT@detail@sycl@cl@@IEBAXXZ ?determineHostPtr@SYCLMemObjT@detail@sycl@cl@@IEAAXAEBV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@_NAEAPEAXAEA_N@Z ?device_has@queue@sycl@cl@@QEBA_NW4aspect@23@@Z ?die@pi@detail@sycl@cl@@YAXPEBD@Z