From 041f7ee20b08323ed25f41cf4478606302fb25c2 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 15 Feb 2022 21:43:32 +0300 Subject: [PATCH 1/6] [SYCL] Attach auxiliary resources to memory objects Some reductions may require additional resources. These resources can be buffers, so to avoid introducing unintended synchronization points these resources must be stored until later. Previously however, storing these resources were done as part of the associated command, which could mean that there would be a circular reference keeping both alive, consequently causing a memory leak. These changes introduce a new strategy for these auxiliary resources as being "attached" to memory objects (USM and SYCLMemObjI). Attachments are tracked by the scheduler and when a memory object is being freed it will detach - and for USM defer the deletion of - the resources. Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl/detail/buffer_impl.hpp | 1 + sycl/include/CL/sycl/detail/image_impl.hpp | 1 + .../include/CL/sycl/detail/sycl_mem_obj_t.hpp | 3 + sycl/include/CL/sycl/handler.hpp | 1 + sycl/include/sycl/ext/oneapi/reduction.hpp | 38 ++++- sycl/source/detail/reduction.cpp | 11 ++ sycl/source/detail/scheduler/scheduler.cpp | 133 +++++++++++++++--- sycl/source/detail/scheduler/scheduler.hpp | 45 ++++++ sycl/source/detail/sycl_mem_obj_t.cpp | 8 ++ sycl/source/detail/usm/usm_impl.cpp | 2 + sycl/test/abi/sycl_symbols_linux.dump | 3 + 11 files changed, 218 insertions(+), 28 deletions(-) diff --git a/sycl/include/CL/sycl/detail/buffer_impl.hpp b/sycl/include/CL/sycl/detail/buffer_impl.hpp index 09595d31bae52..8ac49277d25df 100644 --- a/sycl/include/CL/sycl/detail/buffer_impl.hpp +++ b/sycl/include/CL/sycl/detail/buffer_impl.hpp @@ -169,6 +169,7 @@ class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { ~buffer_impl() { try { BaseT::updateHostMemory(); + BaseT::detachResources(); } catch (...) { } destructorNotification(this); diff --git a/sycl/include/CL/sycl/detail/image_impl.hpp b/sycl/include/CL/sycl/detail/image_impl.hpp index b708e29e9f674..4f89f536f2732 100644 --- a/sycl/include/CL/sycl/detail/image_impl.hpp +++ b/sycl/include/CL/sycl/detail/image_impl.hpp @@ -229,6 +229,7 @@ class __SYCL_EXPORT image_impl final : public SYCLMemObjT { ~image_impl() { try { BaseT::updateHostMemory(); + BaseT::detachResources(); } catch (...) { } } diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp index 09639d011f923..61af7a9630e2d 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp @@ -203,6 +203,9 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { // members must be alive. void updateHostMemory(); + // Detach additional resources associated with the memory object. + void detachResources() const; + public: __SYCL_DLL_LOCAL bool useHostPtr() { return has_property() || diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 341a46ca9679d..c758013ad1057 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -473,6 +473,7 @@ class __SYCL_EXPORT handler { /// They are then forwarded to command group and destroyed only after /// the command group finishes the work on device/host. /// The 'MSharedPtrStorage' suits that need. + /// NOTE: This is no longer in use and should be removed with next ABI break. /// /// @param ReduObj is a pointer to object that must be stored. void addReduction(const std::shared_ptr &ReduObj) { diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 13e1c312fcc89..02cef9b712325 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -366,6 +366,15 @@ template struct AreAllButLastReductions { static constexpr bool value = !std::is_base_of::value; }; +/// Helper for attaching a resource to the lifetime of the memory associated +/// with accessor. +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, + detail::AccessorBaseHost &AttachTo); + +/// Helper for attaching a resource to the lifetime of USM memory. +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, + void *AttachTo); + /// This class encapsulates the reduction variable/accessor, /// the reduction operator and an optional operator identity. template getReadAccToPreviousPartialReds(handler &CGH) const { - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(MOutBufPtr); return {*MOutBufPtr, CGH}; } @@ -673,7 +682,7 @@ class reduction_impl : private reduction_impl_base { std::enable_if_t getWriteMemForPartialReds(size_t Size, handler &CGH) { MOutBufPtr = std::make_shared>(range<1>(Size)); - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -691,7 +700,7 @@ class reduction_impl : private reduction_impl_base { // Create a new output buffer and return an accessor to it. MOutBufPtr = std::make_shared>(range<1>(Size)); - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -707,9 +716,9 @@ class reduction_impl : private reduction_impl_base { return *MRWAcc; auto RWReduVal = std::make_shared(MIdentity); - CGH.addReduction(RWReduVal); + attachResourceLifetimeToMem(RWReduVal); MOutBufPtr = std::make_shared>(RWReduVal.get(), range<1>(1)); - CGH.addReduction(MOutBufPtr); + attachResourceLifetimeToMem(MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -717,9 +726,9 @@ class reduction_impl : private reduction_impl_base { access::placeholder::false_t> getReadWriteAccessorToInitializedGroupsCounter(handler &CGH) { auto CounterMem = std::make_shared(0); - CGH.addReduction(CounterMem); + attachResourceLifetimeToMem(CounterMem); auto CounterBuf = std::make_shared>(CounterMem.get(), 1); - CGH.addReduction(CounterBuf); + attachResourceLifetimeToMem(CounterBuf); return {*CounterBuf, CGH}; } @@ -767,6 +776,21 @@ class reduction_impl : private reduction_impl_base { return Acc; } + /// Attaches the resource to the lifetime of the associated memory of the + /// reduction. + void attachResourceLifetimeToMem(std::shared_ptr Resource) const { +#ifndef __SYCL_DEVICE_ONLY__ + if (is_usm) + detail::attachLifetime(Resource, MUSMPointer); + else if (MDWAcc != nullptr) + detail::attachLifetime(Resource, *MDWAcc); + else + detail::attachLifetime(Resource, *MRWAcc); +#else + (void)Resource; +#endif + } + /// Identity of the BinaryOperation. /// The result of BinaryOperation(X, MIdentity) is equal to X for any X. const T MIdentity; diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 508600b796e83..ca3d04bd9021c 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -110,6 +110,17 @@ reduGetMaxWGSize(std::shared_ptr Queue, return WGSize; } +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, + detail::AccessorBaseHost &AttachTo) { + Scheduler::getInstance().attachLifetimeToMemObj( + Resource, getSyclObjImpl(AttachTo)->MSYCLMemObj); +} + +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, + void *AttachTo) { + Scheduler::getInstance().attachLifetimeToUSM(Resource, AttachTo); +} + } // namespace detail } // namespace oneapi } // namespace ext diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 232ee0a5d6e47..d802468f62ef3 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -371,6 +371,60 @@ void Scheduler::deallocateStreamBuffers(stream_impl *Impl) { StreamBuffersPool.erase(Impl); } +void Scheduler::attachLifetimeToMemObj(std::shared_ptr &Resource, + const SYCLMemObjI *AttachTo) { + std::lock_guard lock(m_MemObjLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = m_MemObjLifetimeAttachedResources.find(AttachTo); + if (AttachedResourcesIt != m_MemObjLifetimeAttachedResources.end()) + AttachedResourcesIt->second.push_back(Resource); + else + m_MemObjLifetimeAttachedResources.insert({AttachTo, {Resource}}); +} + +void Scheduler::detachMemObjLifetimeResources(const SYCLMemObjI *AttachedTo) { + // Swap the attached resources and let them go out of scope without the lock. + // This is required as they could potentially have their own attached + // resources they need to detach. + std::vector> AttachedResources; + { + std::lock_guard lock(m_MemObjLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = + m_MemObjLifetimeAttachedResources.find(AttachedTo); + if (AttachedResourcesIt == m_MemObjLifetimeAttachedResources.end()) + return; + std::swap(AttachedResourcesIt->second, AttachedResources); + m_MemObjLifetimeAttachedResources.erase(AttachedResourcesIt); + } +} + +void Scheduler::attachLifetimeToUSM(std::shared_ptr &Resource, + const void *AttachTo) { + std::lock_guard lock(m_USMLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = m_USMLifetimeAttachedResources.find(AttachTo); + if (AttachedResourcesIt != m_USMLifetimeAttachedResources.end()) + AttachedResourcesIt->second.push_back(Resource); + else + m_USMLifetimeAttachedResources.insert({AttachTo, {Resource}}); +} + +void Scheduler::deferredDetachUSMLifetimeResources(const void *AttachedTo) { + std::vector> AttachedResources; + { + std::lock_guard lock(m_USMLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = m_USMLifetimeAttachedResources.find(AttachedTo); + if (AttachedResourcesIt == m_USMLifetimeAttachedResources.end()) + return; + std::swap(AttachedResourcesIt->second, AttachedResources); + m_USMLifetimeAttachedResources.erase(AttachedResourcesIt); + } + { + std::lock_guard Lock{MDeferredCleanupMutex}; + MDeferredCleanupResources.insert(MDeferredCleanupResources.begin(), + AttachedResources.begin(), + AttachedResources.end()); + } +} + Scheduler::Scheduler() { sycl::device HostDevice; sycl::context HostContext{HostDevice}; @@ -396,10 +450,10 @@ Scheduler::~Scheduler() { "not all resources were released. Please be sure that all kernels " "have synchronization points.\n\n"); } - // There might be some commands scheduled for post enqueue cleanup that - // haven't been freed because of the graph mutex being locked at the time, - // clean them up now. - cleanupCommands({}); + // There might be some commands and resources scheduled for post enqueue + // cleanup that haven't been freed because of the graph mutex being locked at + // the time, clean them up now. + cleanupDeferred(); } void Scheduler::acquireWriteLock(WriteLockT &Lock) { @@ -427,29 +481,66 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { return Req->MSYCLMemObj->MRecord.get(); } -void Scheduler::cleanupCommands(const std::vector &Cmds) { - if (Cmds.empty()) - return; - WriteLockT Lock(MGraphLock, std::try_to_lock); - // In order to avoid deadlocks related to blocked commands, defer cleanup if - // the lock wasn't acquired. - if (Lock.owns_lock()) { - for (Command *Cmd : Cmds) { - MGraphBuilder.cleanupCommand(Cmd); - } - std::vector DeferredCleanupCommands; +void Scheduler::cleanupDeferred() { + std::vector DeferredCleanupCommands; + std::vector> DeferredCleanupResources; + // Cleaning up commands and resources may create more deferred commands and + // resources to clean up. + while (true) { { + // Note: Operations acquiring the graph lock are prohibited here as they + // may lead to dead-locks. std::lock_guard Lock{MDeferredCleanupMutex}; + + // Cleanup is done when there are no more deferred commands and resources. + if (MDeferredCleanupCommands.empty() && MDeferredCleanupResources.empty()) + return; + std::swap(DeferredCleanupCommands, MDeferredCleanupCommands); + std::swap(DeferredCleanupResources, MDeferredCleanupResources); } - for (Command *Cmd : DeferredCleanupCommands) { - MGraphBuilder.cleanupCommand(Cmd); + { + WriteLockT Lock(MGraphLock); + for (Command *Cmd : DeferredCleanupCommands) + MGraphBuilder.cleanupCommand(Cmd); } + DeferredCleanupCommands.clear(); + // Release resources without holding the graph-lock. + DeferredCleanupResources.clear(); + } +} - } else { - std::lock_guard Lock{MDeferredCleanupMutex}; - MDeferredCleanupCommands.insert(MDeferredCleanupCommands.end(), - Cmds.begin(), Cmds.end()); +void Scheduler::cleanupCommands(const std::vector &Cmds) { + if (Cmds.empty()) + return; + + // Create holder for cleaning up resources outside the scope that acquires the + // graph-lock. Once it goes out of scope it may cause destruction of resources + // that will acquire the lock themselves. + std::vector> DeferredCleanupResources; + { + WriteLockT Lock(MGraphLock, std::try_to_lock); + // In order to avoid deadlocks related to blocked commands, defer cleanup if + // the lock wasn't acquired. + if (Lock.owns_lock()) { + for (Command *Cmd : Cmds) { + MGraphBuilder.cleanupCommand(Cmd); + } + std::vector DeferredCleanupCommands; + { + std::lock_guard Lock{MDeferredCleanupMutex}; + std::swap(DeferredCleanupCommands, MDeferredCleanupCommands); + std::swap(DeferredCleanupResources, MDeferredCleanupResources); + } + for (Command *Cmd : DeferredCleanupCommands) { + MGraphBuilder.cleanupCommand(Cmd); + } + + } else { + std::lock_guard Lock{MDeferredCleanupMutex}; + MDeferredCleanupCommands.insert(MDeferredCleanupCommands.end(), + Cmds.begin(), Cmds.end()); + } } } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 18ed2f5004c06..9370528e248da 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -443,6 +443,31 @@ class Scheduler { static MemObjRecord *getMemObjRecord(const Requirement *const Req); + /// Attach a resource to a memory object. + /// + /// \param Resource is the resource to attach to the memory object + /// \param AttachTo is the memory object to attach the resource to + void attachLifetimeToMemObj(std::shared_ptr &Resource, + const SYCLMemObjI *AttachTo); + + /// Detach all resources attached to a memory object. + /// + /// \param AttachedTo is the memory object to detach resources from + void detachMemObjLifetimeResources(const SYCLMemObjI *AttachedTo); + + /// Attach a resource to a USM pointer. + /// + /// \param Resource is the resource to attach to the USM pointer + /// \param AttachTo is the USM pointer to attach the resource to + void attachLifetimeToUSM(std::shared_ptr &Resource, + const void *AttachTo); + + /// Detach all resources attached to a USM pointer. Release of these resources + /// is deferred. + /// + /// \param AttachedTo is the USM pointer to detach resources from + void deferredDetachUSMLifetimeResources(const void *AttachedTo); + Scheduler(); ~Scheduler(); @@ -459,6 +484,9 @@ class Scheduler { /// \param Lock is an instance of WriteLockT, created with \c std::defer_lock void acquireWriteLock(WriteLockT &Lock); + /// Forces a cleanup of all deferred commands and resources. + void cleanupDeferred(); + void cleanupCommands(const std::vector &Cmds); static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, @@ -766,6 +794,7 @@ class Scheduler { RWLockT MGraphLock; std::vector MDeferredCleanupCommands; + std::vector> MDeferredCleanupResources; std::mutex MDeferredCleanupMutex; QueueImplPtr DefaultHostQueue; @@ -820,6 +849,22 @@ class Scheduler { // scheduler. If program is not correct and doesn't have necessary sync point // then warning will be issued. std::unordered_map StreamBuffersPool; + + /// Matches SYCL memory objects to attached resources. + /// TODO: On ABI break this could be made part of SYCLMemObjT instead. + std::unordered_map>> + m_MemObjLifetimeAttachedResources; + + /// Protects m_MemObjLifetimeAttachedResources. + std::mutex m_MemObjLifetimeAttachedResourcesMutex; + + /// Matches USM pointers to attached resources. + std::unordered_map>> + m_USMLifetimeAttachedResources; + + /// Protects m_USMLifetimeAttachedResources. + std::mutex m_USMLifetimeAttachedResourcesMutex; }; } // namespace detail diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 191a15c0d5b8e..0f396d34bd471 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -90,6 +90,14 @@ void SYCLMemObjT::updateHostMemory() { pi::cast(MInteropMemObject)); } } + +// TODO: With ABI break the attached resources can be held by this type. When +// that happens this will be obsolete as the resources will automatically be +// destroyed with the object. +void SYCLMemObjT::detachResources() const { + Scheduler::getInstance().detachMemObjLifetimeResources(this); +} + const plugin &SYCLMemObjT::getPlugin() const { assert((MInteropContext != nullptr) && "Trying to get Plugin from SYCLMemObjT with nullptr ContextImpl."); diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 0650877131c50..502078c0b143c 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -164,6 +164,8 @@ void free(void *Ptr, const context &Ctxt, const detail::code_location &CL) { const detail::plugin &Plugin = CtxImpl->getPlugin(); Plugin.call(C, Ptr); } + // Detach resources and mark for deferred deletion. + Scheduler::getInstance().deferredDetachUSMLifetimeResources(Ptr); } // For ABI compatibility diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2681453647fb6..686c2acc3fe05 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3697,6 +3697,8 @@ _ZN2cl4sycl3ext6oneapi10level_zero12make_programERKNS0_7contextEm _ZN2cl4sycl3ext6oneapi10level_zero13make_platformEm _ZN2cl4sycl3ext6oneapi15filter_selectorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN2cl4sycl3ext6oneapi15filter_selectorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE +_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrIKvEPv +_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrIKvERNS0_6detail16AccessorBaseHostE _ZN2cl4sycl3ext6oneapi6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm _ZN2cl4sycl3ext6oneapi6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl3ext6oneapi6detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS0_6detail10queue_implEE @@ -4148,6 +4150,7 @@ _ZNK2cl4sycl6detail10image_implILi3EE4sizeEv _ZNK2cl4sycl6detail10image_implILi3EE7getTypeEv _ZNK2cl4sycl6detail10image_implILi3EE9get_countEv _ZNK2cl4sycl6detail10image_implILi3EE9get_rangeEv +_ZNK2cl4sycl6detail11SYCLMemObjT15detachResourcesEv _ZNK2cl4sycl6detail11SYCLMemObjT9getPluginEv _ZNK2cl4sycl6detail11SYCLMemObjT9isInteropEv _ZNK2cl4sycl6detail11stream_impl22get_max_statement_sizeEv From 6a7079bc55ece81e08fda5bf08702f6d7fba1ee7 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 16 Feb 2022 16:13:40 +0300 Subject: [PATCH 2/6] Don't defer USM resource release Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/scheduler.cpp | 87 ++++++---------------- sycl/source/detail/scheduler/scheduler.hpp | 9 +-- sycl/source/detail/usm/usm_impl.cpp | 4 +- 3 files changed, 26 insertions(+), 74 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index d802468f62ef3..8df2241d9997a 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -407,7 +407,7 @@ void Scheduler::attachLifetimeToUSM(std::shared_ptr &Resource, m_USMLifetimeAttachedResources.insert({AttachTo, {Resource}}); } -void Scheduler::deferredDetachUSMLifetimeResources(const void *AttachedTo) { +void Scheduler::detachUSMLifetimeResources(const void *AttachedTo) { std::vector> AttachedResources; { std::lock_guard lock(m_USMLifetimeAttachedResourcesMutex); @@ -417,12 +417,6 @@ void Scheduler::deferredDetachUSMLifetimeResources(const void *AttachedTo) { std::swap(AttachedResourcesIt->second, AttachedResources); m_USMLifetimeAttachedResources.erase(AttachedResourcesIt); } - { - std::lock_guard Lock{MDeferredCleanupMutex}; - MDeferredCleanupResources.insert(MDeferredCleanupResources.begin(), - AttachedResources.begin(), - AttachedResources.end()); - } } Scheduler::Scheduler() { @@ -450,10 +444,10 @@ Scheduler::~Scheduler() { "not all resources were released. Please be sure that all kernels " "have synchronization points.\n\n"); } - // There might be some commands and resources scheduled for post enqueue - // cleanup that haven't been freed because of the graph mutex being locked at - // the time, clean them up now. - cleanupDeferred(); + // There might be some commands scheduled for post enqueue cleanup that + // haven't been freed because of the graph mutex being locked at the time, + // clean them up now. + cleanupCommands({}); } void Scheduler::acquireWriteLock(WriteLockT &Lock) { @@ -481,66 +475,29 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { return Req->MSYCLMemObj->MRecord.get(); } -void Scheduler::cleanupDeferred() { - std::vector DeferredCleanupCommands; - std::vector> DeferredCleanupResources; - // Cleaning up commands and resources may create more deferred commands and - // resources to clean up. - while (true) { +void Scheduler::cleanupCommands(const std::vector &Cmds) { + if (Cmds.empty()) + return; + WriteLockT Lock(MGraphLock, std::try_to_lock); + // In order to avoid deadlocks related to blocked commands, defer cleanup if + // the lock wasn't acquired. + if (Lock.owns_lock()) { + for (Command *Cmd : Cmds) { + MGraphBuilder.cleanupCommand(Cmd); + } + std::vector DeferredCleanupCommands; { - // Note: Operations acquiring the graph lock are prohibited here as they - // may lead to dead-locks. std::lock_guard Lock{MDeferredCleanupMutex}; - - // Cleanup is done when there are no more deferred commands and resources. - if (MDeferredCleanupCommands.empty() && MDeferredCleanupResources.empty()) - return; - std::swap(DeferredCleanupCommands, MDeferredCleanupCommands); - std::swap(DeferredCleanupResources, MDeferredCleanupResources); } - { - WriteLockT Lock(MGraphLock); - for (Command *Cmd : DeferredCleanupCommands) - MGraphBuilder.cleanupCommand(Cmd); + for (Command *Cmd : DeferredCleanupCommands) { + MGraphBuilder.cleanupCommand(Cmd); } - DeferredCleanupCommands.clear(); - // Release resources without holding the graph-lock. - DeferredCleanupResources.clear(); - } -} -void Scheduler::cleanupCommands(const std::vector &Cmds) { - if (Cmds.empty()) - return; - - // Create holder for cleaning up resources outside the scope that acquires the - // graph-lock. Once it goes out of scope it may cause destruction of resources - // that will acquire the lock themselves. - std::vector> DeferredCleanupResources; - { - WriteLockT Lock(MGraphLock, std::try_to_lock); - // In order to avoid deadlocks related to blocked commands, defer cleanup if - // the lock wasn't acquired. - if (Lock.owns_lock()) { - for (Command *Cmd : Cmds) { - MGraphBuilder.cleanupCommand(Cmd); - } - std::vector DeferredCleanupCommands; - { - std::lock_guard Lock{MDeferredCleanupMutex}; - std::swap(DeferredCleanupCommands, MDeferredCleanupCommands); - std::swap(DeferredCleanupResources, MDeferredCleanupResources); - } - for (Command *Cmd : DeferredCleanupCommands) { - MGraphBuilder.cleanupCommand(Cmd); - } - - } else { - std::lock_guard Lock{MDeferredCleanupMutex}; - MDeferredCleanupCommands.insert(MDeferredCleanupCommands.end(), - Cmds.begin(), Cmds.end()); - } + } else { + std::lock_guard Lock{MDeferredCleanupMutex}; + MDeferredCleanupCommands.insert(MDeferredCleanupCommands.end(), + Cmds.begin(), Cmds.end()); } } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 9370528e248da..1ee489d162c04 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -462,11 +462,10 @@ class Scheduler { void attachLifetimeToUSM(std::shared_ptr &Resource, const void *AttachTo); - /// Detach all resources attached to a USM pointer. Release of these resources - /// is deferred. + /// Detach all resources attached to a USM pointer. /// /// \param AttachedTo is the USM pointer to detach resources from - void deferredDetachUSMLifetimeResources(const void *AttachedTo); + void detachUSMLifetimeResources(const void *AttachedTo); Scheduler(); ~Scheduler(); @@ -484,9 +483,6 @@ class Scheduler { /// \param Lock is an instance of WriteLockT, created with \c std::defer_lock void acquireWriteLock(WriteLockT &Lock); - /// Forces a cleanup of all deferred commands and resources. - void cleanupDeferred(); - void cleanupCommands(const std::vector &Cmds); static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, @@ -794,7 +790,6 @@ class Scheduler { RWLockT MGraphLock; std::vector MDeferredCleanupCommands; - std::vector> MDeferredCleanupResources; std::mutex MDeferredCleanupMutex; QueueImplPtr DefaultHostQueue; diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 502078c0b143c..8c83bd9e8908e 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -164,8 +164,8 @@ void free(void *Ptr, const context &Ctxt, const detail::code_location &CL) { const detail::plugin &Plugin = CtxImpl->getPlugin(); Plugin.call(C, Ptr); } - // Detach resources and mark for deferred deletion. - Scheduler::getInstance().deferredDetachUSMLifetimeResources(Ptr); + // Detach resources. + Scheduler::getInstance().detachUSMLifetimeResources(Ptr); } // For ABI compatibility From 738d5ee909aac8226a4138bfccd1e035a749ab7a Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 16 Feb 2022 17:09:18 +0300 Subject: [PATCH 3/6] Add Windows symbols Signed-off-by: Steffen Larsen --- sycl/test/abi/sycl_symbols_windows.dump | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2359db7857240..3371ec3b82691 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1313,6 +1313,8 @@ ?atanpi@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V34562@@Z ?atanpi@__host_std@cl@@YAMM@Z ?atanpi@__host_std@cl@@YANN@Z +?attachLifetime@detail@oneapi@ext@sycl@cl@@YAXAEAV?$shared_ptr@$$CBX@std@@AEAVAccessorBaseHost@145@@Z +?attachLifetime@detail@oneapi@ext@sycl@cl@@YAXAEAV?$shared_ptr@$$CBX@std@@PEAX@Z ?barrier@handler@sycl@cl@@QEAAXAEBV?$vector@Vevent@sycl@cl@@V?$allocator@Vevent@sycl@cl@@@std@@@std@@@Z ?barrier@handler@sycl@cl@@QEAAXXZ ?begin@exception_list@sycl@cl@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@Vexception_ptr@std@@@std@@@std@@@std@@XZ @@ -1689,6 +1691,7 @@ ?depends_on@handler@sycl@cl@@QEAAXAEBV?$vector@Vevent@sycl@cl@@V?$allocator@Vevent@sycl@cl@@@std@@@std@@@Z ?depends_on@handler@sycl@cl@@QEAAXVevent@23@@Z ?destructorNotification@buffer_impl@detail@sycl@cl@@QEAAXPEAX@Z +?detachResources@SYCLMemObjT@detail@sycl@cl@@IEBAXXZ ?determineHostPtr@SYCLMemObjT@detail@sycl@cl@@IEAAXAEBV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@_NAEAPEAXAEA_N@Z ?device_has@queue@sycl@cl@@QEBA_NW4aspect@23@@Z ?die@pi@detail@sycl@cl@@YAXPEBD@Z From 6e977a1468fbe36c0efaa70b2c973a3f9b8e74b9 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 17 Feb 2022 02:15:50 +0300 Subject: [PATCH 4/6] Moving resource tracking to context Signed-off-by: Steffen Larsen --- sycl/include/sycl/ext/oneapi/reduction.hpp | 30 +++++---- sycl/source/detail/context_impl.cpp | 71 ++++++++++++++++++++++ sycl/source/detail/context_impl.hpp | 41 +++++++++++++ sycl/source/detail/reduction.cpp | 12 ++-- sycl/source/detail/scheduler/scheduler.cpp | 48 --------------- sycl/source/detail/scheduler/scheduler.hpp | 40 ------------ sycl/source/detail/sycl_mem_obj_t.cpp | 3 +- sycl/source/detail/usm/usm_impl.cpp | 2 +- sycl/test/abi/sycl_symbols_linux.dump | 4 +- 9 files changed, 141 insertions(+), 110 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 02cef9b712325..f0cc01122049a 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -368,11 +368,13 @@ template struct AreAllButLastReductions { /// Helper for attaching a resource to the lifetime of the memory associated /// with accessor. -__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Context, + std::shared_ptr &Resource, detail::AccessorBaseHost &AttachTo); /// Helper for attaching a resource to the lifetime of USM memory. -__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Context, + std::shared_ptr &Resource, void *AttachTo); /// This class encapsulates the reduction variable/accessor, @@ -654,7 +656,7 @@ class reduction_impl : private reduction_impl_base { accessor getReadAccToPreviousPartialReds(handler &CGH) const { - attachResourceLifetimeToMem(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return {*MOutBufPtr, CGH}; } @@ -682,7 +684,7 @@ class reduction_impl : private reduction_impl_base { std::enable_if_t getWriteMemForPartialReds(size_t Size, handler &CGH) { MOutBufPtr = std::make_shared>(range<1>(Size)); - attachResourceLifetimeToMem(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -700,7 +702,7 @@ class reduction_impl : private reduction_impl_base { // Create a new output buffer and return an accessor to it. MOutBufPtr = std::make_shared>(range<1>(Size)); - attachResourceLifetimeToMem(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -716,9 +718,9 @@ class reduction_impl : private reduction_impl_base { return *MRWAcc; auto RWReduVal = std::make_shared(MIdentity); - attachResourceLifetimeToMem(RWReduVal); + attachResourceLifetimeToMem(CGH, RWReduVal); MOutBufPtr = std::make_shared>(RWReduVal.get(), range<1>(1)); - attachResourceLifetimeToMem(MOutBufPtr); + attachResourceLifetimeToMem(CGH, MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -726,9 +728,9 @@ class reduction_impl : private reduction_impl_base { access::placeholder::false_t> getReadWriteAccessorToInitializedGroupsCounter(handler &CGH) { auto CounterMem = std::make_shared(0); - attachResourceLifetimeToMem(CounterMem); + attachResourceLifetimeToMem(CGH, CounterMem); auto CounterBuf = std::make_shared>(CounterMem.get(), 1); - attachResourceLifetimeToMem(CounterBuf); + attachResourceLifetimeToMem(CGH, CounterBuf); return {*CounterBuf, CGH}; } @@ -778,15 +780,17 @@ class reduction_impl : private reduction_impl_base { /// Attaches the resource to the lifetime of the associated memory of the /// reduction. - void attachResourceLifetimeToMem(std::shared_ptr Resource) const { + void attachResourceLifetimeToMem(handler &CGH, + std::shared_ptr Resource) const { #ifndef __SYCL_DEVICE_ONLY__ if (is_usm) - detail::attachLifetime(Resource, MUSMPointer); + detail::attachLifetime(CGH.MQueue, Resource, MUSMPointer); else if (MDWAcc != nullptr) - detail::attachLifetime(Resource, *MDWAcc); + detail::attachLifetime(CGH.MQueue, Resource, *MDWAcc); else - detail::attachLifetime(Resource, *MRWAcc); + detail::attachLifetime(CGH.MQueue, Resource, *MRWAcc); #else + (void)CGH; (void)Resource; #endif } diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 18390606df128..ceabd6dffd3bf 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -115,6 +115,25 @@ cl_context context_impl::get() const { bool context_impl::is_host() const { return MHostContext; } context_impl::~context_impl() { + // In case a user is leaking a memory object we may still have attached + // resources. These resources will be released with the context, but we need + // to do it before we release the backend context to avoid confusing errors. + std::unordered_map>> + MemObjLTARes; + { + std::lock_guard lock(MMemObjLifetimeAttachedResourcesMutex); + std::swap(MemObjLTARes, MMemObjLifetimeAttachedResources); + } + MemObjLTARes.clear(); + std::unordered_map>> + USMLTARes; + { + std::lock_guard lock(MUSMLifetimeAttachedResourcesMutex); + std::swap(USMLTARes, MUSMLifetimeAttachedResources); + } + USMLTARes.clear(); + for (auto LibProg : MCachedLibPrograms) { assert(LibProg.second && "Null program must not be kept in the cache"); getPlugin().call(LibProg.second); @@ -206,6 +225,58 @@ pi_native_handle context_impl::getNative() const { return Handle; } +void context_impl::attachLifetimeToMemObj(std::shared_ptr &Resource, + const SYCLMemObjI *AttachTo) { + std::lock_guard lock(MMemObjLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = MMemObjLifetimeAttachedResources.find(AttachTo); + if (AttachedResourcesIt != MMemObjLifetimeAttachedResources.end()) + AttachedResourcesIt->second.push_back(Resource); + else + MMemObjLifetimeAttachedResources.insert({AttachTo, {Resource}}); +} + +void context_impl::detachMemObjLifetimeResources( + const SYCLMemObjI *AttachedTo) { + // Swap the attached resources and let them go out of scope without the lock. + // This is required as they could potentially have their own attached + // resources they need to detach. + std::vector> AttachedResources; + { + std::lock_guard lock(MMemObjLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = + MMemObjLifetimeAttachedResources.find(AttachedTo); + if (AttachedResourcesIt == MMemObjLifetimeAttachedResources.end()) + return; + std::swap(AttachedResourcesIt->second, AttachedResources); + MMemObjLifetimeAttachedResources.erase(AttachedResourcesIt); + } +} + +void context_impl::attachLifetimeToUSM(std::shared_ptr &Resource, + const void *AttachTo) { + std::lock_guard lock(MUSMLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = MUSMLifetimeAttachedResources.find(AttachTo); + if (AttachedResourcesIt != MUSMLifetimeAttachedResources.end()) + AttachedResourcesIt->second.push_back(Resource); + else + MUSMLifetimeAttachedResources.insert({AttachTo, {Resource}}); +} + +void context_impl::detachUSMLifetimeResources(const void *AttachedTo) { + // Swap the attached resources and let them go out of scope without the lock. + // This is required as they could potentially have their own attached + // resources they need to detach. + std::vector> AttachedResources; + { + std::lock_guard lock(MUSMLifetimeAttachedResourcesMutex); + auto AttachedResourcesIt = MUSMLifetimeAttachedResources.find(AttachedTo); + if (AttachedResourcesIt == MUSMLifetimeAttachedResources.end()) + return; + std::swap(AttachedResourcesIt->second, AttachedResources); + MUSMLifetimeAttachedResources.erase(AttachedResourcesIt); + } +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 722d2c0789d03..08677ee2654a1 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -167,6 +168,30 @@ class context_impl { /// \return a native handle. pi_native_handle getNative() const; + /// Attach a resource to a memory object. + /// + /// \param Resource is the resource to attach to the memory object + /// \param AttachTo is the memory object to attach the resource to + void attachLifetimeToMemObj(std::shared_ptr &Resource, + const SYCLMemObjI *AttachTo); + + /// Detach all resources attached to a memory object. + /// + /// \param AttachedTo is the memory object to detach resources from + void detachMemObjLifetimeResources(const SYCLMemObjI *AttachedTo); + + /// Attach a resource to a USM pointer. + /// + /// \param Resource is the resource to attach to the USM pointer + /// \param AttachTo is the USM pointer to attach the resource to + void attachLifetimeToUSM(std::shared_ptr &Resource, + const void *AttachTo); + + /// Detach all resources attached to a USM pointer. + /// + /// \param AttachedTo is the USM pointer to detach resources from + void detachUSMLifetimeResources(const void *AttachedTo); + private: async_handler MAsyncHandler; std::vector MDevices; @@ -177,6 +202,22 @@ class context_impl { std::map, RT::PiProgram> MCachedLibPrograms; mutable KernelProgramCache MKernelProgramCache; + + /// Matches SYCL memory objects to attached resources. + /// TODO: On ABI break this could be made part of SYCLMemObjT instead. + std::unordered_map>> + MMemObjLifetimeAttachedResources; + + /// Protects m_MemObjLifetimeAttachedResources. + std::mutex MMemObjLifetimeAttachedResourcesMutex; + + /// Matches USM pointers to attached resources. + std::unordered_map>> + MUSMLifetimeAttachedResources; + + /// Protects m_USMLifetimeAttachedResources. + std::mutex MUSMLifetimeAttachedResourcesMutex; }; } // namespace detail diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index ca3d04bd9021c..5e8f9388460a7 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -110,15 +110,17 @@ reduGetMaxWGSize(std::shared_ptr Queue, return WGSize; } -__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Queue, + std::shared_ptr &Resource, detail::AccessorBaseHost &AttachTo) { - Scheduler::getInstance().attachLifetimeToMemObj( - Resource, getSyclObjImpl(AttachTo)->MSYCLMemObj); + Queue->getContextImplPtr()->attachLifetimeToMemObj(Resource, + getSyclObjImpl(AttachTo)->MSYCLMemObj); } -__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Queue, + std::shared_ptr &Resource, void *AttachTo) { - Scheduler::getInstance().attachLifetimeToUSM(Resource, AttachTo); + Queue->getContextImplPtr()->attachLifetimeToUSM(Resource, AttachTo); } } // namespace detail diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 8df2241d9997a..232ee0a5d6e47 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -371,54 +371,6 @@ void Scheduler::deallocateStreamBuffers(stream_impl *Impl) { StreamBuffersPool.erase(Impl); } -void Scheduler::attachLifetimeToMemObj(std::shared_ptr &Resource, - const SYCLMemObjI *AttachTo) { - std::lock_guard lock(m_MemObjLifetimeAttachedResourcesMutex); - auto AttachedResourcesIt = m_MemObjLifetimeAttachedResources.find(AttachTo); - if (AttachedResourcesIt != m_MemObjLifetimeAttachedResources.end()) - AttachedResourcesIt->second.push_back(Resource); - else - m_MemObjLifetimeAttachedResources.insert({AttachTo, {Resource}}); -} - -void Scheduler::detachMemObjLifetimeResources(const SYCLMemObjI *AttachedTo) { - // Swap the attached resources and let them go out of scope without the lock. - // This is required as they could potentially have their own attached - // resources they need to detach. - std::vector> AttachedResources; - { - std::lock_guard lock(m_MemObjLifetimeAttachedResourcesMutex); - auto AttachedResourcesIt = - m_MemObjLifetimeAttachedResources.find(AttachedTo); - if (AttachedResourcesIt == m_MemObjLifetimeAttachedResources.end()) - return; - std::swap(AttachedResourcesIt->second, AttachedResources); - m_MemObjLifetimeAttachedResources.erase(AttachedResourcesIt); - } -} - -void Scheduler::attachLifetimeToUSM(std::shared_ptr &Resource, - const void *AttachTo) { - std::lock_guard lock(m_USMLifetimeAttachedResourcesMutex); - auto AttachedResourcesIt = m_USMLifetimeAttachedResources.find(AttachTo); - if (AttachedResourcesIt != m_USMLifetimeAttachedResources.end()) - AttachedResourcesIt->second.push_back(Resource); - else - m_USMLifetimeAttachedResources.insert({AttachTo, {Resource}}); -} - -void Scheduler::detachUSMLifetimeResources(const void *AttachedTo) { - std::vector> AttachedResources; - { - std::lock_guard lock(m_USMLifetimeAttachedResourcesMutex); - auto AttachedResourcesIt = m_USMLifetimeAttachedResources.find(AttachedTo); - if (AttachedResourcesIt == m_USMLifetimeAttachedResources.end()) - return; - std::swap(AttachedResourcesIt->second, AttachedResources); - m_USMLifetimeAttachedResources.erase(AttachedResourcesIt); - } -} - Scheduler::Scheduler() { sycl::device HostDevice; sycl::context HostContext{HostDevice}; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 1ee489d162c04..18ed2f5004c06 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -443,30 +443,6 @@ class Scheduler { static MemObjRecord *getMemObjRecord(const Requirement *const Req); - /// Attach a resource to a memory object. - /// - /// \param Resource is the resource to attach to the memory object - /// \param AttachTo is the memory object to attach the resource to - void attachLifetimeToMemObj(std::shared_ptr &Resource, - const SYCLMemObjI *AttachTo); - - /// Detach all resources attached to a memory object. - /// - /// \param AttachedTo is the memory object to detach resources from - void detachMemObjLifetimeResources(const SYCLMemObjI *AttachedTo); - - /// Attach a resource to a USM pointer. - /// - /// \param Resource is the resource to attach to the USM pointer - /// \param AttachTo is the USM pointer to attach the resource to - void attachLifetimeToUSM(std::shared_ptr &Resource, - const void *AttachTo); - - /// Detach all resources attached to a USM pointer. - /// - /// \param AttachedTo is the USM pointer to detach resources from - void detachUSMLifetimeResources(const void *AttachedTo); - Scheduler(); ~Scheduler(); @@ -844,22 +820,6 @@ class Scheduler { // scheduler. If program is not correct and doesn't have necessary sync point // then warning will be issued. std::unordered_map StreamBuffersPool; - - /// Matches SYCL memory objects to attached resources. - /// TODO: On ABI break this could be made part of SYCLMemObjT instead. - std::unordered_map>> - m_MemObjLifetimeAttachedResources; - - /// Protects m_MemObjLifetimeAttachedResources. - std::mutex m_MemObjLifetimeAttachedResourcesMutex; - - /// Matches USM pointers to attached resources. - std::unordered_map>> - m_USMLifetimeAttachedResources; - - /// Protects m_USMLifetimeAttachedResources. - std::mutex m_USMLifetimeAttachedResourcesMutex; }; } // namespace detail diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 0f396d34bd471..924e067d8d3d5 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -95,7 +95,8 @@ void SYCLMemObjT::updateHostMemory() { // that happens this will be obsolete as the resources will automatically be // destroyed with the object. void SYCLMemObjT::detachResources() const { - Scheduler::getInstance().detachMemObjLifetimeResources(this); + if (MInteropContext) + MInteropContext->detachMemObjLifetimeResources(this); } const plugin &SYCLMemObjT::getPlugin() const { diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 8c83bd9e8908e..897a7fb641b72 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -165,7 +165,7 @@ void free(void *Ptr, const context &Ctxt, const detail::code_location &CL) { Plugin.call(C, Ptr); } // Detach resources. - Scheduler::getInstance().detachUSMLifetimeResources(Ptr); + detail::getSyclObjImpl(Ctxt)->detachUSMLifetimeResources(Ptr); } // For ABI compatibility diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 686c2acc3fe05..6d8f200934da2 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3697,8 +3697,8 @@ _ZN2cl4sycl3ext6oneapi10level_zero12make_programERKNS0_7contextEm _ZN2cl4sycl3ext6oneapi10level_zero13make_platformEm _ZN2cl4sycl3ext6oneapi15filter_selectorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN2cl4sycl3ext6oneapi15filter_selectorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE -_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrIKvEPv -_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrIKvERNS0_6detail16AccessorBaseHostE +_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrINS0_6detail10queue_implEERS4_IKvEPv +_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrINS0_6detail10queue_implEERS4_IKvERNS5_16AccessorBaseHostE _ZN2cl4sycl3ext6oneapi6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm _ZN2cl4sycl3ext6oneapi6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl3ext6oneapi6detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS0_6detail10queue_implEE From 97a90a5bebafb4769e668a03acd4089549adeb18 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 17 Feb 2022 16:32:38 +0300 Subject: [PATCH 5/6] Temporarily attach memory object map to global handler Signed-off-by: Steffen Larsen --- sycl/include/sycl/ext/oneapi/reduction.hpp | 9 +++--- sycl/source/detail/context_impl.cpp | 35 ---------------------- sycl/source/detail/context_impl.hpp | 22 -------------- sycl/source/detail/global_handler.cpp | 16 ++++++++++ sycl/source/detail/global_handler.hpp | 12 ++++++++ sycl/source/detail/reduction.cpp | 16 +++++++--- sycl/source/detail/sycl_mem_obj_t.cpp | 18 +++++++++-- sycl/test/abi/sycl_symbols_linux.dump | 2 +- 8 files changed, 61 insertions(+), 69 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index f0cc01122049a..329118e43b25c 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -368,12 +368,11 @@ template struct AreAllButLastReductions { /// Helper for attaching a resource to the lifetime of the memory associated /// with accessor. -__SYCL_EXPORT void attachLifetime(std::shared_ptr &Context, - std::shared_ptr &Resource, +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, detail::AccessorBaseHost &AttachTo); /// Helper for attaching a resource to the lifetime of USM memory. -__SYCL_EXPORT void attachLifetime(std::shared_ptr &Context, +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Queue, std::shared_ptr &Resource, void *AttachTo); @@ -786,9 +785,9 @@ class reduction_impl : private reduction_impl_base { if (is_usm) detail::attachLifetime(CGH.MQueue, Resource, MUSMPointer); else if (MDWAcc != nullptr) - detail::attachLifetime(CGH.MQueue, Resource, *MDWAcc); + detail::attachLifetime(Resource, *MDWAcc); else - detail::attachLifetime(CGH.MQueue, Resource, *MRWAcc); + detail::attachLifetime(Resource, *MRWAcc); #else (void)CGH; (void)Resource; diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index ceabd6dffd3bf..7decaff889b3c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -118,14 +118,6 @@ context_impl::~context_impl() { // In case a user is leaking a memory object we may still have attached // resources. These resources will be released with the context, but we need // to do it before we release the backend context to avoid confusing errors. - std::unordered_map>> - MemObjLTARes; - { - std::lock_guard lock(MMemObjLifetimeAttachedResourcesMutex); - std::swap(MemObjLTARes, MMemObjLifetimeAttachedResources); - } - MemObjLTARes.clear(); std::unordered_map>> USMLTARes; { @@ -225,33 +217,6 @@ pi_native_handle context_impl::getNative() const { return Handle; } -void context_impl::attachLifetimeToMemObj(std::shared_ptr &Resource, - const SYCLMemObjI *AttachTo) { - std::lock_guard lock(MMemObjLifetimeAttachedResourcesMutex); - auto AttachedResourcesIt = MMemObjLifetimeAttachedResources.find(AttachTo); - if (AttachedResourcesIt != MMemObjLifetimeAttachedResources.end()) - AttachedResourcesIt->second.push_back(Resource); - else - MMemObjLifetimeAttachedResources.insert({AttachTo, {Resource}}); -} - -void context_impl::detachMemObjLifetimeResources( - const SYCLMemObjI *AttachedTo) { - // Swap the attached resources and let them go out of scope without the lock. - // This is required as they could potentially have their own attached - // resources they need to detach. - std::vector> AttachedResources; - { - std::lock_guard lock(MMemObjLifetimeAttachedResourcesMutex); - auto AttachedResourcesIt = - MMemObjLifetimeAttachedResources.find(AttachedTo); - if (AttachedResourcesIt == MMemObjLifetimeAttachedResources.end()) - return; - std::swap(AttachedResourcesIt->second, AttachedResources); - MMemObjLifetimeAttachedResources.erase(AttachedResourcesIt); - } -} - void context_impl::attachLifetimeToUSM(std::shared_ptr &Resource, const void *AttachTo) { std::lock_guard lock(MUSMLifetimeAttachedResourcesMutex); diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 08677ee2654a1..d015d36c66e06 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -168,18 +167,6 @@ class context_impl { /// \return a native handle. pi_native_handle getNative() const; - /// Attach a resource to a memory object. - /// - /// \param Resource is the resource to attach to the memory object - /// \param AttachTo is the memory object to attach the resource to - void attachLifetimeToMemObj(std::shared_ptr &Resource, - const SYCLMemObjI *AttachTo); - - /// Detach all resources attached to a memory object. - /// - /// \param AttachedTo is the memory object to detach resources from - void detachMemObjLifetimeResources(const SYCLMemObjI *AttachedTo); - /// Attach a resource to a USM pointer. /// /// \param Resource is the resource to attach to the USM pointer @@ -203,15 +190,6 @@ class context_impl { MCachedLibPrograms; mutable KernelProgramCache MKernelProgramCache; - /// Matches SYCL memory objects to attached resources. - /// TODO: On ABI break this could be made part of SYCLMemObjT instead. - std::unordered_map>> - MMemObjLifetimeAttachedResources; - - /// Protects m_MemObjLifetimeAttachedResources. - std::mutex MMemObjLifetimeAttachedResourcesMutex; - /// Matches USM pointers to attached resources. std::unordered_map>> MUSMLifetimeAttachedResources; diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index c0174f81d6d80..e156237fc9e1d 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -98,6 +98,16 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { return TP; } +std::unordered_map>> & +GlobalHandler::getMemObjLifetimeAttachedResources() { + return getOrCreate(MMemObjLifetimeAttachedResources); +} + +std::mutex &GlobalHandler::getMemObjLifetimeAttachedResourcesMutex() { + return getOrCreate(MMemObjLifetimeAttachedResourcesMutex); +} + void releaseDefaultContexts() { // Release shared-pointers to SYCL objects. #ifndef _WIN32 @@ -121,6 +131,12 @@ void GlobalHandler::registerDefaultContextReleaseHandler() { } void shutdown() { + // In case a user is leaking a memory object we may still have attached + // resources. These resources will be released with the context, but we need + // to do it before we release the backend context to avoid confusing errors. + if (GlobalHandler::instance().MMemObjLifetimeAttachedResources.Inst) + GlobalHandler::instance().MMemObjLifetimeAttachedResources.Inst->clear(); + // Ensure neither host task is working so that no default context is accessed // upon its release if (GlobalHandler::instance().MHostTaskThreadPool.Inst) diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index b3d6357686e0c..f574bece151f8 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -26,6 +26,7 @@ class plugin; class device_filter_list; class XPTIRegistry; class ThreadPool; +class SYCLMemObjI; using PlatformImplPtr = std::shared_ptr; using ContextImplPtr = std::shared_ptr; @@ -70,6 +71,11 @@ class GlobalHandler { std::mutex &getHandlerExtendedMembersMutex(); ThreadPool &getHostTaskThreadPool(); + std::unordered_map>> & + getMemObjLifetimeAttachedResources(); + std::mutex &getMemObjLifetimeAttachedResourcesMutex(); + static void registerDefaultContextReleaseHandler(); private: @@ -105,6 +111,12 @@ class GlobalHandler { InstWithLock MHandlerExtendedMembersMutex; // Thread pool for host task and event callbacks execution InstWithLock MHostTaskThreadPool; + + /// TODO: On ABI break this should be made part of SYCLMemObjT. + InstWithLock>>> + MMemObjLifetimeAttachedResources; + InstWithLock MMemObjLifetimeAttachedResourcesMutex; }; } // namespace detail } // namespace sycl diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 5e8f9388460a7..41f9903094bdf 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -110,11 +110,19 @@ reduGetMaxWGSize(std::shared_ptr Queue, return WGSize; } -__SYCL_EXPORT void attachLifetime(std::shared_ptr &Queue, - std::shared_ptr &Resource, +__SYCL_EXPORT void attachLifetime(std::shared_ptr &Resource, detail::AccessorBaseHost &AttachTo) { - Queue->getContextImplPtr()->attachLifetimeToMemObj(Resource, - getSyclObjImpl(AttachTo)->MSYCLMemObj); + SYCLMemObjI *MemObj = getSyclObjImpl(AttachTo)->MSYCLMemObj; + // On ABI break this should attach directly to the memory object. + std::lock_guard lock( + GlobalHandler::instance().getMemObjLifetimeAttachedResourcesMutex()); + auto &AttachedResourcesMap = + GlobalHandler::instance().getMemObjLifetimeAttachedResources(); + auto AttachedResourcesIt = AttachedResourcesMap.find(MemObj); + if (AttachedResourcesIt != AttachedResourcesMap.end()) + AttachedResourcesIt->second.push_back(Resource); + else + AttachedResourcesMap.insert({MemObj, {Resource}}); } __SYCL_EXPORT void attachLifetime(std::shared_ptr &Queue, diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 924e067d8d3d5..d3956e0d0a5db 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -95,8 +96,21 @@ void SYCLMemObjT::updateHostMemory() { // that happens this will be obsolete as the resources will automatically be // destroyed with the object. void SYCLMemObjT::detachResources() const { - if (MInteropContext) - MInteropContext->detachMemObjLifetimeResources(this); + // Swap the attached resources and let them go out of scope without the lock. + // This is required as they could potentially have their own attached + // resources they need to detach. + std::vector> AttachedResources; + { + std::lock_guard lock( + GlobalHandler::instance().getMemObjLifetimeAttachedResourcesMutex()); + auto &AttachedResourcesMap = + GlobalHandler::instance().getMemObjLifetimeAttachedResources(); + auto AttachedResourcesIt = AttachedResourcesMap.find(this); + if (AttachedResourcesIt == AttachedResourcesMap.end()) + return; + std::swap(AttachedResourcesIt->second, AttachedResources); + AttachedResourcesMap.erase(AttachedResourcesIt); + } } const plugin &SYCLMemObjT::getPlugin() const { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6d8f200934da2..f7edfd3cfaa7d 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3697,8 +3697,8 @@ _ZN2cl4sycl3ext6oneapi10level_zero12make_programERKNS0_7contextEm _ZN2cl4sycl3ext6oneapi10level_zero13make_platformEm _ZN2cl4sycl3ext6oneapi15filter_selectorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN2cl4sycl3ext6oneapi15filter_selectorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE +_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrIKvERNS0_6detail16AccessorBaseHostE _ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrINS0_6detail10queue_implEERS4_IKvEPv -_ZN2cl4sycl3ext6oneapi6detail14attachLifetimeERSt10shared_ptrINS0_6detail10queue_implEERS4_IKvERNS5_16AccessorBaseHostE _ZN2cl4sycl3ext6oneapi6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm _ZN2cl4sycl3ext6oneapi6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl3ext6oneapi6detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS0_6detail10queue_implEE From 034c49bf4d8e051b56a7793ca5d3beb1fe26e2cb Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 17 Feb 2022 22:20:33 +0300 Subject: [PATCH 6/6] Fix Windows symbol Signed-off-by: Steffen Larsen --- sycl/test/abi/sycl_symbols_windows.dump | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 3371ec3b82691..4f8a1a21bed10 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1314,7 +1314,7 @@ ?atanpi@__host_std@cl@@YAMM@Z ?atanpi@__host_std@cl@@YANN@Z ?attachLifetime@detail@oneapi@ext@sycl@cl@@YAXAEAV?$shared_ptr@$$CBX@std@@AEAVAccessorBaseHost@145@@Z -?attachLifetime@detail@oneapi@ext@sycl@cl@@YAXAEAV?$shared_ptr@$$CBX@std@@PEAX@Z +?attachLifetime@detail@oneapi@ext@sycl@cl@@YAXAEAV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@AEAV?$shared_ptr@$$CBX@7@PEAX@Z ?barrier@handler@sycl@cl@@QEAAXAEBV?$vector@Vevent@sycl@cl@@V?$allocator@Vevent@sycl@cl@@@std@@@std@@@Z ?barrier@handler@sycl@cl@@QEAAXXZ ?begin@exception_list@sycl@cl@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@Vexception_ptr@std@@@std@@@std@@@std@@XZ