From 92807c68a9c5e21135cae11ae00ad9d6c6e83e4f Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 15 Dec 2021 21:05:40 +0300 Subject: [PATCH 1/6] [SYCL] Fix memory leak in reduction resources Reductions that require additional resources, such as buffers, can currently create a circular dependency between the resources and the commands issued by the reductions. These changes clear up this dependence in a similar way to how streams are transferred by transferring ownership of the resources to the commands and ensuring release when cleaning up the commands. Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl/detail/cg.hpp | 12 ++++- sycl/include/CL/sycl/handler.hpp | 6 +-- sycl/source/detail/handler_impl.hpp | 6 +++ sycl/source/detail/scheduler/commands.cpp | 20 +++++++- sycl/source/detail/scheduler/commands.hpp | 2 + .../source/detail/scheduler/graph_builder.cpp | 27 +++++++++- sycl/source/detail/scheduler/scheduler.cpp | 16 +++++- sycl/source/detail/scheduler/scheduler.hpp | 6 ++- sycl/source/handler.cpp | 51 ++++++++++++++----- sycl/test/abi/sycl_symbols_linux.dump | 2 + .../program_manager/EliminatedArgMask.cpp | 4 +- sycl/unittests/scheduler/Regression.cpp | 1 + .../scheduler/SchedulerTestUtils.hpp | 4 +- .../scheduler/StreamInitDependencyOnHost.cpp | 4 +- 14 files changed, 133 insertions(+), 28 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 0b2ebddbf3a24..ba70ddefcee2e 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -248,6 +248,7 @@ class CGExecKernel : public CG { std::string MKernelName; detail::OSModuleHandle MOSModuleHandle; std::vector> MStreams; + std::vector> MAuxiliaryResources; CGExecKernel(NDRDescT NDRDesc, std::unique_ptr HKernel, std::shared_ptr SyclKernel, @@ -259,6 +260,7 @@ class CGExecKernel : public CG { std::vector Args, std::string KernelName, detail::OSModuleHandle OSModuleHandle, std::vector> Streams, + std::vector> AuxiliaryResources, CGTYPE Type, detail::code_location loc = {}) : CG(Type, std::move(ArgsStorage), std::move(AccStorage), std::move(SharedPtrStorage), std::move(Requirements), @@ -266,7 +268,8 @@ class CGExecKernel : public CG { MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MArgs(std::move(Args)), MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle), - MStreams(std::move(Streams)) { + MStreams(std::move(Streams)), + MAuxiliaryResources(std::move(AuxiliaryResources)) { assert((getType() == RunOnHostIntel || getType() == Kernel) && "Wrong type of exec kernel CG."); } @@ -277,6 +280,10 @@ class CGExecKernel : public CG { return MStreams; } + std::vector> getAuxiliaryResources() const { + return MAuxiliaryResources; + } + std::shared_ptr getKernelBundle() { const std::shared_ptr> &ExtendedMembers = getExtendedMembers(); @@ -291,6 +298,9 @@ class CGExecKernel : public CG { void clearStreams() { MStreams.clear(); } bool hasStreams() { return !MStreams.empty(); } + + void clearAuxiliaryResources() { MAuxiliaryResources.clear(); } + bool hasAuxiliaryResources() { return !MAuxiliaryResources.empty(); } }; /// "Copy memory" command group class. diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 341a46ca9679d..0e54abafc7e4f 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -472,12 +472,9 @@ class __SYCL_EXPORT handler { /// Saves buffers created by handling reduction feature in handler. /// They are then forwarded to command group and destroyed only after /// the command group finishes the work on device/host. - /// The 'MSharedPtrStorage' suits that need. /// /// @param ReduObj is a pointer to object that must be stored. - void addReduction(const std::shared_ptr &ReduObj) { - MSharedPtrStorage.push_back(ReduObj); - } + void addReduction(const std::shared_ptr &ReduObj); ~handler() = default; @@ -1267,6 +1264,7 @@ class __SYCL_EXPORT handler { } std::shared_ptr getHandlerImpl() const; + std::shared_ptr evictHandlerImpl() const; void setStateExplicitKernelBundle(); void setStateSpecConstSet(); diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index d4171e8d4d1d6..967784d0ddd64 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -65,6 +65,12 @@ class handler_impl { /// equal to the queue associated with the handler if the corresponding /// submission is a fallback from a previous submission. std::shared_ptr MSubmissionSecondaryQueue; + + // Protects MAuxiliaryResources. + std::mutex MAuxiliaryResourcesMutex; + + // Stores auxiliary resources used by internal operations. + std::vector> MAuxiliaryResources; }; } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 13c08efb63a6a..e0f35f1825c17 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1378,11 +1378,23 @@ std::vector ExecCGCommand::getStreams() const { return {}; } +std::vector> +ExecCGCommand::getAuxiliaryResources() const { + if (MCommandGroup->getType() == CG::Kernel) + return ((CGExecKernel *)MCommandGroup.get())->getAuxiliaryResources(); + return {}; +} + void ExecCGCommand::clearStreams() { if (MCommandGroup->getType() == CG::Kernel) ((CGExecKernel *)MCommandGroup.get())->clearStreams(); } +void ExecCGCommand::clearAuxiliaryResources() { + if (MCommandGroup->getType() == CG::Kernel) + ((CGExecKernel *)MCommandGroup.get())->clearAuxiliaryResources(); +} + cl_int UpdateHostRequirementCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; @@ -1673,7 +1685,9 @@ ExecCGCommand::ExecCGCommand(std::unique_ptr CommandGroup, static_cast(MCommandGroup.get())->MQueue; MEvent->setNeedsCleanupAfterWait(true); } else if (MCommandGroup->getType() == CG::CGTYPE::Kernel && - (static_cast(MCommandGroup.get()))->hasStreams()) + (static_cast(MCommandGroup.get())->hasStreams() || + static_cast(MCommandGroup.get()) + ->hasAuxiliaryResources())) MEvent->setNeedsCleanupAfterWait(true); emitInstrumentationDataProxy(); @@ -2482,7 +2496,9 @@ bool ExecCGCommand::supportsPostEnqueueCleanup() const { return Command::supportsPostEnqueueCleanup() && (MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask) && (MCommandGroup->getType() != CG::CGTYPE::Kernel || - !(static_cast(MCommandGroup.get()))->hasStreams()); + (!static_cast(MCommandGroup.get())->hasStreams() && + !static_cast(MCommandGroup.get()) + ->hasAuxiliaryResources())); } } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 82c42711b2da1..2b1a98cac1d35 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -541,8 +541,10 @@ class ExecCGCommand : public Command { ExecCGCommand(std::unique_ptr CommandGroup, QueueImplPtr Queue); std::vector getStreams() const; + std::vector> getAuxiliaryResources() const; void clearStreams(); + void clearAuxiliaryResources(); void printDot(std::ostream &Stream) const final; void emitInstrumentationData() final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index ed2ee3e6f78dc..5b44fd2cf8226 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1045,7 +1045,8 @@ void Scheduler::GraphBuilder::decrementLeafCountersForRecord( void Scheduler::GraphBuilder::cleanupCommandsForRecord( MemObjRecord *Record, - std::vector> &StreamsToDeallocate) { + std::vector> &StreamsToDeallocate, + std::vector> &ReduResourcesToDeallocate) { std::vector &AllocaCommands = Record->MAllocaCommands; if (AllocaCommands.empty()) return; @@ -1097,10 +1098,20 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord( // Collect stream objects for a visited command. if (Cmd->getType() == Command::CommandType::RUN_CG) { auto ExecCmd = static_cast(Cmd); + + // Transfer ownership of stream implementations. std::vector> Streams = ExecCmd->getStreams(); ExecCmd->clearStreams(); StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(), Streams.end()); + + // Transfer ownership of auxiliary resources. + std::vector> ReduResources = + ExecCmd->getAuxiliaryResources(); + ExecCmd->clearAuxiliaryResources(); + ReduResourcesToDeallocate.insert(ReduResourcesToDeallocate.end(), + ReduResources.begin(), + ReduResources.end()); } for (Command *UserCmd : Cmd->MUsers) @@ -1160,6 +1171,7 @@ void Scheduler::GraphBuilder::cleanupCommand(Command *Cmd) { if (ExecCGCmd->getCG().getType() == CG::CGTYPE::Kernel) { auto *ExecKernelCG = static_cast(&ExecCGCmd->getCG()); assert(!ExecKernelCG->hasStreams()); + assert(!ExecKernelCG->hasAuxiliaryResources()); } } #endif @@ -1191,7 +1203,8 @@ void Scheduler::GraphBuilder::cleanupCommand(Command *Cmd) { void Scheduler::GraphBuilder::cleanupFinishedCommands( Command *FinishedCmd, - std::vector> &StreamsToDeallocate) { + std::vector> &StreamsToDeallocate, + std::vector> &ReduResourcesToDeallocate) { assert(MCmdsToVisit.empty()); MCmdsToVisit.push(FinishedCmd); MVisitedCmds.clear(); @@ -1207,10 +1220,20 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands( // Collect stream objects for a visited command. if (Cmd->getType() == Command::CommandType::RUN_CG) { auto ExecCmd = static_cast(Cmd); + + // Transfer ownership of stream implementations. std::vector> Streams = ExecCmd->getStreams(); ExecCmd->clearStreams(); StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(), Streams.end()); + + // Transfer ownership of auxiliary resources. + std::vector> ReduResources = + ExecCmd->getAuxiliaryResources(); + ExecCmd->clearAuxiliaryResources(); + ReduResourcesToDeallocate.insert(ReduResourcesToDeallocate.end(), + ReduResources.begin(), + ReduResources.end()); } for (const DepDesc &Dep : Cmd->MDeps) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 232ee0a5d6e47..c520e81f298d9 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -239,6 +239,11 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { // objects, this is needed to guarantee that streamed data is printed and // resources are released. std::vector> StreamsToDeallocate; + // Similar to streams, we also collect the auxiliary resources used by the + // commands. Cleanup will make sure the commands do not own the resources + // anymore, so we just need them to survive the graph lock then they can die + // as they go out of scope. + std::vector> ReduResourcesToDeallocate; { // Avoiding deadlock situation, where one thread is in the process of // enqueueing (with a locked mutex) a currently blocked task that waits for @@ -249,7 +254,8 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { // The command might have been cleaned up (and set to nullptr) by another // thread if (FinishedCmd) - MGraphBuilder.cleanupFinishedCommands(FinishedCmd, StreamsToDeallocate); + MGraphBuilder.cleanupFinishedCommands(FinishedCmd, StreamsToDeallocate, + ReduResourcesToDeallocate); } } deallocateStreams(StreamsToDeallocate); @@ -261,6 +267,11 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // objects, this is needed to guarantee that streamed data is printed and // resources are released. std::vector> StreamsToDeallocate; + // Similar to streams, we also collect the auxiliary resources used by the + // commands. Cleanup will make sure the commands do not own the resources + // anymore, so we just need them to survive the graph lock then they can die + // as they go out of scope. + std::vector> ReduResourcesToDeallocate; { MemObjRecord *Record = nullptr; @@ -282,7 +293,8 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { WriteLockT Lock(MGraphLock, std::defer_lock); acquireWriteLock(Lock); MGraphBuilder.decrementLeafCountersForRecord(Record); - MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate); + MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, + ReduResourcesToDeallocate); MGraphBuilder.removeRecordForMemObj(MemObj); } } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 18ed2f5004c06..39075dbcd2703 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -514,7 +514,8 @@ class Scheduler { /// (assuming that all its commands have been waited for). void cleanupFinishedCommands( Command *FinishedCmd, - std::vector> &); + std::vector> &, + std::vector> &); /// Reschedules the command passed using Queue provided. /// @@ -540,7 +541,8 @@ class Scheduler { /// Removes commands that use the given MemObjRecord from the graph. void cleanupCommandsForRecord( MemObjRecord *Record, - std::vector> &); + std::vector> &, + std::vector> &); /// Removes the MemObjRecord for the memory object passed. void removeRecordForMemObj(SYCLMemObjI *MemObject); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index e9a9993ae6ed6..44f246b8fd0ea 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -49,24 +49,40 @@ handler::handler(std::shared_ptr Queue, MSharedPtrStorage.push_back(std::move(ExtendedMembers)); } +static detail::ExtendedMemberT &getHandlerImplMember( + std::vector> &SharedPtrStorage) { + assert(!SharedPtrStorage.empty()); + std::shared_ptr> ExtendedMembersVec = + detail::convertToExtendedMembers(SharedPtrStorage[0]); + assert(ExtendedMembersVec->size() > 0); + auto &HandlerImplMember = (*ExtendedMembersVec)[0]; + assert(detail::ExtendedMembersType::HANDLER_IMPL == HandlerImplMember.MType); + return HandlerImplMember; +} + /// Gets the handler_impl at the start of the extended members. std::shared_ptr handler::getHandlerImpl() const { std::lock_guard Lock( detail::GlobalHandler::instance().getHandlerExtendedMembersMutex()); + return std::static_pointer_cast( + getHandlerImplMember(MSharedPtrStorage).MData); +} - assert(!MSharedPtrStorage.empty()); - - std::shared_ptr> ExtendedMembersVec = - detail::convertToExtendedMembers(MSharedPtrStorage[0]); - - assert(ExtendedMembersVec->size() > 0); - - auto HandlerImplMember = (*ExtendedMembersVec)[0]; +/// Gets the handler_impl at the start of the extended members and removes it. +std::shared_ptr handler::evictHandlerImpl() const { + std::lock_guard Lock( + detail::GlobalHandler::instance().getHandlerExtendedMembersMutex()); + auto &HandlerImplMember = getHandlerImplMember(MSharedPtrStorage); + auto Impl = + std::static_pointer_cast(HandlerImplMember.MData); - assert(detail::ExtendedMembersType::HANDLER_IMPL == HandlerImplMember.MType); + // Reset the data of the member. + // NOTE: We let it stay because removing the front can be expensive. This will + // be improved when the impl is made a member of handler. In fact eviction is + // likely to not be needed when that happens. + HandlerImplMember.MData.reset(); - return std::static_pointer_cast( - HandlerImplMember.MData); + return Impl; } // Sets the submission state to indicate that an explicit kernel bundle has been @@ -282,6 +298,10 @@ event handler::finalize() { return MLastEvent; } + // Evict handler_impl from extended members to make sure the command group + // does not keep it alive. + std::shared_ptr Impl = evictHandlerImpl(); + std::unique_ptr CommandGroup; switch (type) { case detail::CG::Kernel: @@ -294,7 +314,8 @@ event handler::finalize() { std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), std::move(MArgs), MKernelName, MOSModuleHandle, - std::move(MStreamStorage), MCGType, MCodeLoc)); + std::move(MStreamStorage), std::move(Impl->MAuxiliaryResources), + MCGType, MCodeLoc)); break; } case detail::CG::CodeplayInteropTask: @@ -383,6 +404,12 @@ event handler::finalize() { return MLastEvent; } +void handler::addReduction(const std::shared_ptr &ReduObj) { + std::shared_ptr Impl = getHandlerImpl(); + std::lock_guard Lock(Impl->MAuxiliaryResourcesMutex); + Impl->MAuxiliaryResources.push_back(ReduObj); +} + void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, access::target AccTarget) { detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 79d58722b4f4f..5587ff14009b6 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3994,6 +3994,7 @@ _ZN2cl4sycl7handler10depends_onERKSt6vectorINS0_5eventESaIS3_EE _ZN2cl4sycl7handler10mem_adviseEPKvmi _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb +_ZN2cl4sycl7handler12addReductionERKSt10shared_ptrIKvE _ZN2cl4sycl7handler13getKernelNameB5cxx11Ev _ZN2cl4sycl7handler17use_kernel_bundleERKNS0_13kernel_bundleILNS0_12bundle_stateE2EEE _ZN2cl4sycl7handler18RangeRoundingTraceEv @@ -4390,6 +4391,7 @@ _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65552EEENS3_12param_traitsIS4_X _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65553EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl7context9getNativeEv _ZNK2cl4sycl7handler14getHandlerImplEv +_ZNK2cl4sycl7handler16evictHandlerImplEv _ZNK2cl4sycl7handler27isStateExplicitKernelBundleEv _ZNK2cl4sycl7handler30getOrInsertHandlerKernelBundleEb _ZNK2cl4sycl7program10get_kernelENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE diff --git a/sycl/unittests/program_manager/EliminatedArgMask.cpp b/sycl/unittests/program_manager/EliminatedArgMask.cpp index 5301ea986ad94..2fcb0750e13af 100644 --- a/sycl/unittests/program_manager/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/EliminatedArgMask.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include +#include #include #include #include @@ -126,6 +127,7 @@ class MockHandler : public sycl::handler { std::unique_ptr finalize() { auto CGH = static_cast(this); + std::shared_ptr Impl = evictHandlerImpl(); std::unique_ptr CommandGroup; switch (getType()) { case sycl::detail::CG::Kernel: { @@ -136,7 +138,7 @@ class MockHandler : public sycl::handler { std::move(CGH->MRequirements), std::move(CGH->MEvents), std::move(CGH->MArgs), std::move(CGH->MKernelName), std::move(CGH->MOSModuleHandle), std::move(CGH->MStreamStorage), - CGH->MCGType, CGH->MCodeLoc)); + std::move(Impl->MAuxiliaryResources), CGH->MCGType, CGH->MCodeLoc)); break; } default: diff --git a/sycl/unittests/scheduler/Regression.cpp b/sycl/unittests/scheduler/Regression.cpp index f0af4be0665eb..48f7f9c44bd21 100644 --- a/sycl/unittests/scheduler/Regression.cpp +++ b/sycl/unittests/scheduler/Regression.cpp @@ -86,6 +86,7 @@ TEST_F(SchedulerTest, CheckArgsBlobInPiEnqueueNativeKernelIsValid) { /*KernelName*/ "", /*OSModuleHandle*/ detail::OSUtil::ExeModuleHandle, /*Streams*/ {}, + /*AuxiliaryResources*/ {}, /*Type*/ detail::CG::RunOnHostIntel)}; context Ctx{Plt}; diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 2f6073d1d3672..f4dcc4e0f215f 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -124,7 +124,9 @@ class MockScheduler : public cl::sycl::detail::Scheduler { void cleanupCommandsForRecord(cl::sycl::detail::MemObjRecord *Rec) { std::vector> StreamsToDeallocate; - MGraphBuilder.cleanupCommandsForRecord(Rec, StreamsToDeallocate); + std::vector> AuxiliaryResourcesToDeallocate; + MGraphBuilder.cleanupCommandsForRecord(Rec, StreamsToDeallocate, + AuxiliaryResourcesToDeallocate); } void addNodeToLeaves(cl::sycl::detail::MemObjRecord *Rec, diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 478465603199a..ef8c4c0895df8 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -10,6 +10,7 @@ #include "SchedulerTestUtils.hpp" #include +#include #include #include @@ -44,6 +45,7 @@ class MockHandler : public sycl::handler { std::unique_ptr finalize() { auto CGH = static_cast(this); + std::shared_ptr Impl = evictHandlerImpl(); std::unique_ptr CommandGroup; switch (CGH->MCGType) { case detail::CG::Kernel: @@ -55,7 +57,7 @@ class MockHandler : public sycl::handler { std::move(CGH->MRequirements), std::move(CGH->MEvents), std::move(CGH->MArgs), std::move(CGH->MKernelName), std::move(CGH->MOSModuleHandle), std::move(CGH->MStreamStorage), - CGH->MCGType, CGH->MCodeLoc)); + std::move(Impl->MAuxiliaryResources), CGH->MCGType, CGH->MCodeLoc)); break; } default: From bd3070fd61bc9a9728abb073f473eacf99c68669 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 24 Feb 2022 23:33:04 +0300 Subject: [PATCH 2/6] Rename ReduResources* Signed-off-by: Steffen Larsen --- .../source/detail/scheduler/graph_builder.cpp | 20 +++++++++---------- sycl/source/detail/scheduler/scheduler.cpp | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 5b44fd2cf8226..4305e263a0090 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1046,7 +1046,7 @@ void Scheduler::GraphBuilder::decrementLeafCountersForRecord( void Scheduler::GraphBuilder::cleanupCommandsForRecord( MemObjRecord *Record, std::vector> &StreamsToDeallocate, - std::vector> &ReduResourcesToDeallocate) { + std::vector> &AuxResourcesToDeallocate) { std::vector &AllocaCommands = Record->MAllocaCommands; if (AllocaCommands.empty()) return; @@ -1106,12 +1106,12 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord( Streams.end()); // Transfer ownership of auxiliary resources. - std::vector> ReduResources = + std::vector> AuxResources = ExecCmd->getAuxiliaryResources(); ExecCmd->clearAuxiliaryResources(); - ReduResourcesToDeallocate.insert(ReduResourcesToDeallocate.end(), - ReduResources.begin(), - ReduResources.end()); + AuxResourcesToDeallocate.insert(AuxResourcesToDeallocate.end(), + AuxResources.begin(), + AuxResources.end()); } for (Command *UserCmd : Cmd->MUsers) @@ -1204,7 +1204,7 @@ void Scheduler::GraphBuilder::cleanupCommand(Command *Cmd) { void Scheduler::GraphBuilder::cleanupFinishedCommands( Command *FinishedCmd, std::vector> &StreamsToDeallocate, - std::vector> &ReduResourcesToDeallocate) { + std::vector> &AuxResourcesToDeallocate) { assert(MCmdsToVisit.empty()); MCmdsToVisit.push(FinishedCmd); MVisitedCmds.clear(); @@ -1228,12 +1228,12 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands( Streams.end()); // Transfer ownership of auxiliary resources. - std::vector> ReduResources = + std::vector> AuxResources = ExecCmd->getAuxiliaryResources(); ExecCmd->clearAuxiliaryResources(); - ReduResourcesToDeallocate.insert(ReduResourcesToDeallocate.end(), - ReduResources.begin(), - ReduResources.end()); + AuxResourcesToDeallocate.insert(AuxResourcesToDeallocate.end(), + AuxResources.begin(), + AuxResources.end()); } for (const DepDesc &Dep : Cmd->MDeps) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c520e81f298d9..38738f08736c9 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -243,7 +243,7 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { // commands. Cleanup will make sure the commands do not own the resources // anymore, so we just need them to survive the graph lock then they can die // as they go out of scope. - std::vector> ReduResourcesToDeallocate; + std::vector> AuxResourcesToDeallocate; { // Avoiding deadlock situation, where one thread is in the process of // enqueueing (with a locked mutex) a currently blocked task that waits for @@ -255,7 +255,7 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { // thread if (FinishedCmd) MGraphBuilder.cleanupFinishedCommands(FinishedCmd, StreamsToDeallocate, - ReduResourcesToDeallocate); + AuxResourcesToDeallocate); } } deallocateStreams(StreamsToDeallocate); @@ -271,7 +271,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // commands. Cleanup will make sure the commands do not own the resources // anymore, so we just need them to survive the graph lock then they can die // as they go out of scope. - std::vector> ReduResourcesToDeallocate; + std::vector> AuxResourcesToDeallocate; { MemObjRecord *Record = nullptr; @@ -294,7 +294,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { acquireWriteLock(Lock); MGraphBuilder.decrementLeafCountersForRecord(Record); MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, - ReduResourcesToDeallocate); + AuxResourcesToDeallocate); MGraphBuilder.removeRecordForMemObj(MemObj); } } From 40dc13d993b12ae8d2110bc16049a8d612e9ce7c Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 24 Feb 2022 23:34:05 +0300 Subject: [PATCH 3/6] Add missing Windows symbol Signed-off-by: Steffen Larsen --- sycl/test/abi/sycl_symbols_windows.dump | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index e6bb784eca0d8..0d7a9aa844742 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1754,6 +1754,7 @@ ?erfc@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V34562@@Z ?erfc@__host_std@cl@@YAMM@Z ?erfc@__host_std@cl@@YANN@Z +?evictHandlerImpl@handler@sycl@cl@@AEBA?AV?$shared_ptr@Vhandler_impl@detail@sycl@cl@@@std@@XZ ?exp10@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@@Z ?exp10@__host_std@cl@@YA?AV?$vec@M$01@sycl@2@V342@@Z ?exp10@__host_std@cl@@YA?AV?$vec@M$02@sycl@2@V342@@Z From 5182889323c1110184b76a9474d15c6372fe735d Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Fri, 25 Feb 2022 18:02:39 +0300 Subject: [PATCH 4/6] Remove unneeded mutex Signed-off-by: Steffen Larsen --- sycl/source/detail/handler_impl.hpp | 3 --- sycl/source/handler.cpp | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 967784d0ddd64..673650874bf7d 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -66,9 +66,6 @@ class handler_impl { /// submission is a fallback from a previous submission. std::shared_ptr MSubmissionSecondaryQueue; - // Protects MAuxiliaryResources. - std::mutex MAuxiliaryResourcesMutex; - // Stores auxiliary resources used by internal operations. std::vector> MAuxiliaryResources; }; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 44f246b8fd0ea..e5a5d3386c046 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -405,9 +405,7 @@ event handler::finalize() { } void handler::addReduction(const std::shared_ptr &ReduObj) { - std::shared_ptr Impl = getHandlerImpl(); - std::lock_guard Lock(Impl->MAuxiliaryResourcesMutex); - Impl->MAuxiliaryResources.push_back(ReduObj); + getHandlerImpl()->MAuxiliaryResources.push_back(ReduObj); } void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, From 82a3ded8e62b6f483380ca612d9a631d8fe514f0 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 7 Mar 2022 16:57:36 +0300 Subject: [PATCH 5/6] Fix formatting Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/graph_builder.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 4305e263a0090..4899e079cbbc6 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1110,8 +1110,7 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord( ExecCmd->getAuxiliaryResources(); ExecCmd->clearAuxiliaryResources(); AuxResourcesToDeallocate.insert(AuxResourcesToDeallocate.end(), - AuxResources.begin(), - AuxResources.end()); + AuxResources.begin(), AuxResources.end()); } for (Command *UserCmd : Cmd->MUsers) @@ -1232,8 +1231,7 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands( ExecCmd->getAuxiliaryResources(); ExecCmd->clearAuxiliaryResources(); AuxResourcesToDeallocate.insert(AuxResourcesToDeallocate.end(), - AuxResources.begin(), - AuxResources.end()); + AuxResources.begin(), AuxResources.end()); } for (const DepDesc &Dep : Cmd->MDeps) { From 3fa5f46b0dfc7d033a7a5f94ce6384c5c53b199f Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 16 Mar 2022 18:57:53 +0300 Subject: [PATCH 6/6] Fix reduction copy-back bug Signed-off-by: Steffen Larsen --- sycl/include/sycl/ext/oneapi/reduction.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 2944c8832275b..1b052f26108cb 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -718,6 +718,7 @@ class reduction_impl : private reduction_impl_base { auto RWReduVal = std::make_shared(MIdentity); CGH.addReduction(RWReduVal); MOutBufPtr = std::make_shared>(RWReduVal.get(), range<1>(1)); + MOutBufPtr->set_final_data(); CGH.addReduction(MOutBufPtr); return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } @@ -728,6 +729,7 @@ class reduction_impl : private reduction_impl_base { auto CounterMem = std::make_shared(0); CGH.addReduction(CounterMem); auto CounterBuf = std::make_shared>(CounterMem.get(), 1); + CounterBuf->set_final_data(); CGH.addReduction(CounterBuf); return {*CounterBuf, CGH}; }