From ae32f24e0256aa12feeae82389dae4172c5b1bf8 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Fri, 10 Dec 2021 15:54:33 +0300 Subject: [PATCH 01/13] [SYCL] Fixes memory dependency leaks caused by failed kernel enqueue If a kernel enqueue fails the runtime will immediately try and clean it up. However, if it has any dependencies or users the cleanup will be skipped. This can cause the dependencies to stay alive and leak. These changes forces a full sub-graph cleanup of the command if enqueuing failed. Additionally, sub-graph cleanup is changed to account for failed kernel enqueues and will remove the failed command from its leaves. Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/commands.hpp | 4 + .../source/detail/scheduler/graph_builder.cpp | 9 + sycl/source/detail/scheduler/scheduler.cpp | 33 ++-- sycl/unittests/scheduler/FailedCommands.cpp | 168 ++++++++++++++++++ 4 files changed, 193 insertions(+), 21 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 4a556f8a5567e..f1cd7b4ddc06e 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -136,6 +136,10 @@ class Command { return MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked; } + bool isEnqueueFailed() const { + return MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed; + } + const QueueImplPtr &getQueue() const { return MQueue; } const QueueImplPtr &getSubmittedQueue() const { return MSubmittedQueue; } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 61f10b41845cd..547315a30d80c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1125,6 +1125,15 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands( MCmdsToVisit.push(Dep.MDepCommand); } + // If the command has failed to enqueue it must be removed from its leaves. + if (Cmd->isEnqueueFailed()) { + for (const DepDesc &Dep : Cmd->MDeps) { + const Requirement *Req = Dep.MDepRequirement; + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + updateLeaves({Cmd}, Record, Req->MAccessMode); + } + } + // Do not clean up the node if it is a leaf for any memory object if (Cmd->MLeafCounter > 0) continue; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c17beb8a3621d..7d6d2ad2abd3f 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -108,7 +108,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, NewEvent = NewCmd->getEvent(); } - { + try { ReadLockT Lock(MGraphLock); Command *NewCmd = static_cast(NewEvent->getCommand()); @@ -128,31 +128,17 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, for (Command *Cmd : AuxiliaryCmds) { Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); - try { - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Auxiliary enqueue process failed.", - PI_INVALID_OPERATION); - } catch (...) { - // enqueueCommand() func and if statement above may throw an exception, - // so destroy required resources to avoid memory leak - CleanUp(); - std::rethrow_exception(std::current_exception()); - } + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Auxiliary enqueue process failed.", + PI_INVALID_OPERATION); } if (NewCmd) { // TODO: Check if lazy mode. EnqueueResultT Res; - try { - bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); - } catch (...) { - // enqueueCommand() func and if statement above may throw an exception, - // so destroy required resources to avoid memory leak - CleanUp(); - std::rethrow_exception(std::current_exception()); - } + bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); // If there are no memory dependencies decouple and free the command. // Though, dismiss ownership of native kernel command group as it's @@ -160,6 +146,11 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, // at native kernel execution finish. CleanUp(); } + } catch (...) { + // If enqueuing has failed we need to clean up the command to remove it + // from the graph so it does not cause issues for other related commands. + cleanupFinishedCommands(NewEvent); + std::rethrow_exception(std::current_exception()); } for (auto StreamImplPtr : Streams) { diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 37a7a71a4afdc..21426bb173f52 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -9,8 +9,58 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include +#include +#include + using namespace cl::sycl; +class TestKernel; + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { +template <> struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { return "TestKernel"; } + static constexpr bool isESIMD() { return false; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } +}; + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) + +static sycl::unittest::PiImage generateDefaultImage() { + using namespace sycl::unittest; + + PiPropertySet PropSet; + + std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data + + PiArray Entries = makeEmptyKernels({"TestKernel"}); + + PiImage Img{PI_DEVICE_BINARY_TYPE_SPIRV, // Format + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} + +static sycl::unittest::PiImage Img = generateDefaultImage(); +static sycl::unittest::PiImageArray<1> ImgArray{&Img}; + +using namespace sycl; + TEST_F(SchedulerTest, FailedDependency) { detail::Requirement MockReq = getMockRequirement(); MockCommand MDep(detail::getSyclObjImpl(MQueue)); @@ -35,3 +85,121 @@ TEST_F(SchedulerTest, FailedDependency) { ASSERT_EQ(MDep.MEnqueueStatus, detail::EnqueueResultT::SyclEnqueueFailed) << "MDep should be marked as failed\n"; } + +pi_result redefinedFailingEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, pi_uint32, + const pi_event *, pi_event *) { + throw sycl::runtime_error( + "Exception from redefinedFailingEnqueueKernelLaunch.", + PI_INVALID_OPERATION); +} + +size_t MemBufRefCount = 0u; + +pi_result redefinedMemBufferCreate(pi_context, pi_mem_flags, size_t, void *, + pi_mem *ret_mem, const pi_mem_properties *) { + *ret_mem = (pi_mem)0x1; + ++MemBufRefCount; + return PI_SUCCESS; +} + +pi_result redefinedMemBufferPartition(pi_mem, pi_mem_flags, + pi_buffer_create_type, void *, + pi_mem *ret_mem) { + *ret_mem = (pi_mem)0x1; + ++MemBufRefCount; + return PI_SUCCESS; +} + +pi_result redefinedMemRetain(pi_mem) { + ++MemBufRefCount; + return PI_SUCCESS; +} + +pi_result redefinedMemRelease(pi_mem) { + --MemBufRefCount; + return PI_SUCCESS; +} + +TEST_F(SchedulerTest, FailedCommandAccessorCleanup) { + default_selector Selector; + platform Plt{default_selector()}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + MemBufRefCount = 0u; + Mock.redefine( + redefinedFailingEnqueueKernelLaunch); + Mock.redefine(redefinedMemBufferCreate); + Mock.redefine(redefinedMemRetain); + Mock.redefine(redefinedMemRelease); + + { + context Ctx{Plt}; + queue Q{Ctx, Selector}; + + kernel_bundle KernelBundle = + sycl::get_kernel_bundle(Ctx); + auto ExecBundle = sycl::build(KernelBundle); + + buffer Buff{cl::sycl::range<1>(1)}; + + try { + Q.submit([&](sycl::handler &CGH) { + auto Acc = Buff.get_access(CGH); + CGH.use_kernel_bundle(ExecBundle); + CGH.single_task([=] {}); + }); + FAIL() << "No exception was thrown."; + } catch (sycl::runtime_error &e) { } + } + + ASSERT_EQ(MemBufRefCount, 0u) << "Memory leak detected."; +} + +TEST_F(SchedulerTest, FailedCommandStreamCleanup) { + default_selector Selector; + platform Plt{default_selector()}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + MemBufRefCount = 0u; + Mock.redefine( + redefinedFailingEnqueueKernelLaunch); + Mock.redefine(redefinedMemBufferCreate); + Mock.redefine( + redefinedMemBufferPartition); + Mock.redefine(redefinedMemRetain); + Mock.redefine(redefinedMemRelease); + + { + context Ctx{Plt}; + queue Q{Ctx, Selector}; + + kernel_bundle KernelBundle = + sycl::get_kernel_bundle(Ctx); + auto ExecBundle = sycl::build(KernelBundle); + + try { + Q.submit([&](sycl::handler &CGH) { + sycl::stream KernelStream(108 * 64 + 128, 64, CGH); + CGH.use_kernel_bundle(ExecBundle); + //CGH.set_args(KernelStream); + CGH.single_task([=] {}); + }); + FAIL() << "No exception was thrown."; + } catch (sycl::runtime_error &e) { } + Q.wait(); + } + + ASSERT_EQ(MemBufRefCount, 0u) << "Memory leak detected."; +} From ae4f2a8ac418fb23808c7c59dea1c9ef2cf94151 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 13 Dec 2021 11:46:41 +0300 Subject: [PATCH 02/13] Fix format and tests Signed-off-by: Steffen Larsen --- sycl/unittests/scheduler/FailedCommands.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 21426bb173f52..bc0db1c28b8ad 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -99,7 +99,7 @@ size_t MemBufRefCount = 0u; pi_result redefinedMemBufferCreate(pi_context, pi_mem_flags, size_t, void *, pi_mem *ret_mem, const pi_mem_properties *) { - *ret_mem = (pi_mem)0x1; + *ret_mem = reinterpret_cast(0x1); ++MemBufRefCount; return PI_SUCCESS; } @@ -107,7 +107,7 @@ pi_result redefinedMemBufferCreate(pi_context, pi_mem_flags, size_t, void *, pi_result redefinedMemBufferPartition(pi_mem, pi_mem_flags, pi_buffer_create_type, void *, pi_mem *ret_mem) { - *ret_mem = (pi_mem)0x1; + *ret_mem = reinterpret_cast(0x1); ++MemBufRefCount; return PI_SUCCESS; } @@ -129,6 +129,11 @@ TEST_F(SchedulerTest, FailedCommandAccessorCleanup) { std::cout << "Not run due to host-only environment\n"; return; } + if (Plt.get_backend() == sycl::backend::ext_oneapi_cuda && + Plt.get_backend() == sycl::backend::ext_oneapi_hip) { + std::cout << "CUDA and HIP backends do not currently support this test\n"; + return; + } unittest::PiMock Mock{Plt}; setupDefaultMockAPIs(Mock); @@ -156,7 +161,8 @@ TEST_F(SchedulerTest, FailedCommandAccessorCleanup) { CGH.single_task([=] {}); }); FAIL() << "No exception was thrown."; - } catch (sycl::runtime_error &e) { } + } catch (...) { + } } ASSERT_EQ(MemBufRefCount, 0u) << "Memory leak detected."; @@ -169,6 +175,11 @@ TEST_F(SchedulerTest, FailedCommandStreamCleanup) { std::cout << "Not run due to host-only environment\n"; return; } + if (Plt.get_backend() == sycl::backend::ext_oneapi_cuda && + Plt.get_backend() == sycl::backend::ext_oneapi_hip) { + std::cout << "CUDA and HIP backends do not currently support this test\n"; + return; + } unittest::PiMock Mock{Plt}; setupDefaultMockAPIs(Mock); @@ -193,11 +204,11 @@ TEST_F(SchedulerTest, FailedCommandStreamCleanup) { Q.submit([&](sycl::handler &CGH) { sycl::stream KernelStream(108 * 64 + 128, 64, CGH); CGH.use_kernel_bundle(ExecBundle); - //CGH.set_args(KernelStream); CGH.single_task([=] {}); }); FAIL() << "No exception was thrown."; - } catch (sycl::runtime_error &e) { } + } catch (...) { + } Q.wait(); } From 050b4f25ac2141fa1b6883a236af79a75d15c734 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 13 Dec 2021 11:47:35 +0300 Subject: [PATCH 03/13] Correct conditional Signed-off-by: Steffen Larsen --- sycl/unittests/scheduler/FailedCommands.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index bc0db1c28b8ad..e8d2752947078 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -129,7 +129,7 @@ TEST_F(SchedulerTest, FailedCommandAccessorCleanup) { std::cout << "Not run due to host-only environment\n"; return; } - if (Plt.get_backend() == sycl::backend::ext_oneapi_cuda && + if (Plt.get_backend() == sycl::backend::ext_oneapi_cuda || Plt.get_backend() == sycl::backend::ext_oneapi_hip) { std::cout << "CUDA and HIP backends do not currently support this test\n"; return; @@ -175,7 +175,7 @@ TEST_F(SchedulerTest, FailedCommandStreamCleanup) { std::cout << "Not run due to host-only environment\n"; return; } - if (Plt.get_backend() == sycl::backend::ext_oneapi_cuda && + if (Plt.get_backend() == sycl::backend::ext_oneapi_cuda || Plt.get_backend() == sycl::backend::ext_oneapi_hip) { std::cout << "CUDA and HIP backends do not currently support this test\n"; return; From 02021545fc369a95de39a5f6da8093a2642c022c Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 14 Dec 2021 21:06:41 +0300 Subject: [PATCH 04/13] Change cleanup strategy This commit adds a new strategy for cleaning up failed commands, namely to walk up the failed commands and its users and replace them by empty commands. This preserves the structure of the graph while replacing failed state. Signed-off-by: Steffen Larsen --- .../source/detail/scheduler/graph_builder.cpp | 83 +++++++++++++++++-- .../detail/scheduler/leaves_collection.cpp | 1 + sycl/source/detail/scheduler/scheduler.cpp | 48 +++++------ sycl/source/detail/scheduler/scheduler.hpp | 5 ++ sycl/unittests/scheduler/LeavesCollection.cpp | 1 + 5 files changed, 105 insertions(+), 33 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 547315a30d80c..60cbc46a4c175 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1125,15 +1125,6 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands( MCmdsToVisit.push(Dep.MDepCommand); } - // If the command has failed to enqueue it must be removed from its leaves. - if (Cmd->isEnqueueFailed()) { - for (const DepDesc &Dep : Cmd->MDeps) { - const Requirement *Req = Dep.MDepRequirement; - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - updateLeaves({Cmd}, Record, Req->MAccessMode); - } - } - // Do not clean up the node if it is a leaf for any memory object if (Cmd->MLeafCounter > 0) continue; @@ -1162,6 +1153,80 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands( handleVisitedNodes(MVisitedCmds); } +void Scheduler::GraphBuilder::cleanupFailedCommand( + Command *FailedCmd, + std::vector> + &StreamsToDeallocate) { + assert(MCmdsToVisit.empty()); + MCmdsToVisit.push(FailedCmd); + MVisitedCmds.clear(); + + // Traverse the graph using BFS + while (!MCmdsToVisit.empty()) { + Command *Cmd = MCmdsToVisit.front(); + MCmdsToVisit.pop(); + + if (!markNodeAsVisited(Cmd, MVisitedCmds)) + continue; + + // Skip replacing empty commands similar to the one we will create + if (Cmd->getType() == Command::EMPTY_TASK && + Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueReady) { + for (Command *UserCmd : Cmd->MUsers) + MCmdsToVisit.push(UserCmd); + continue; + } + + // Collect stream objects for a visited command. + if (Cmd->getType() == Command::CommandType::RUN_CG) { + auto ExecCmd = static_cast(Cmd); + std::vector> Streams = ExecCmd->getStreams(); + ExecCmd->clearStreams(); + StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(), + Streams.end()); + } + + // Create empty command that is "ready" for enqueuing. + EmptyCommand *EmptyCmd = new EmptyCommand(Cmd->getQueue()); + if (!EmptyCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + + // Mark new empty command as visited to avoid replacing it later. + markNodeAsVisited(EmptyCmd, MVisitedCmds); + + for (Command *UserCmd : Cmd->MUsers) { + // User dependencies cannot be satisfied as dependency failed. These are + // also considered as failed. + MCmdsToVisit.push(UserCmd); + + // Replace failed command in users with new empty command. + for (DepDesc &Dep : UserCmd->MDeps) { + if (Dep.MDepCommand == Cmd) { + Dep.MDepCommand = EmptyCmd; + EmptyCmd->MUsers.insert(UserCmd); + } + } + } + + for (DepDesc &Dep : Cmd->MDeps) { + // Replace failed command in dependency records. + const Requirement *Req = Dep.MDepRequirement; + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + updateLeaves({Cmd}, Record, Req->MAccessMode); + std::vector ToEnqueue; + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); + + // Replace failed command as a user. + if(Dep.MDepCommand->MUsers.erase(Cmd)) + Dep.MDepCommand->MUsers.insert(EmptyCmd); + } + + Cmd->MMarks.MToBeDeleted = true; + } + handleVisitedNodes(MVisitedCmds); +} + void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { const auto It = std::find_if( MMemObjs.begin(), MMemObjs.end(), diff --git a/sycl/source/detail/scheduler/leaves_collection.cpp b/sycl/source/detail/scheduler/leaves_collection.cpp index 0ae0bcfbb9c0e..8bac1b95daeed 100644 --- a/sycl/source/detail/scheduler/leaves_collection.cpp +++ b/sycl/source/detail/scheduler/leaves_collection.cpp @@ -32,6 +32,7 @@ static inline bool doOverlap(const Requirement *LHS, const Requirement *RHS) { static inline bool isHostAccessorCmd(Command *Cmd) { return Cmd->getType() == Command::EMPTY_TASK && + Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked && Cmd->MBlockReason == Command::BlockReason::HostAccessor; } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 7d6d2ad2abd3f..bb2f716688610 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -68,6 +68,16 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, } } +static void deallocateStreams( + std::vector> &StreamsToDeallocate) { + // Deallocate buffers for stream objects of the finished commands. Iterate in + // reverse order because it is the order of commands execution. + for (auto StreamImplPtr = StreamsToDeallocate.rbegin(); + StreamImplPtr != StreamsToDeallocate.rend(); ++StreamImplPtr) + detail::Scheduler::getInstance().deallocateStreamBuffers( + StreamImplPtr->get()); +} + EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue) { EventImplPtr NewEvent = nullptr; @@ -116,16 +126,6 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, EnqueueResultT Res; bool Enqueued; - auto CleanUp = [&]() { - if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { - if (Type == CG::RunOnHostIntel) - static_cast(NewCmd)->releaseCG(); - - NewEvent->setCommand(nullptr); - delete NewCmd; - } - }; - for (Command *Cmd : AuxiliaryCmds) { Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) @@ -144,12 +144,22 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, // Though, dismiss ownership of native kernel command group as it's // resources may be in use by backend and synchronization point here is // at native kernel execution finish. - CleanUp(); + if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { + if (Type == CG::RunOnHostIntel) + static_cast(NewCmd)->releaseCG(); + + NewEvent->setCommand(nullptr); + delete NewCmd; + } } } catch (...) { - // If enqueuing has failed we need to clean up the command to remove it - // from the graph so it does not cause issues for other related commands. - cleanupFinishedCommands(NewEvent); + std::vector StreamsToDeallocate; + Command *NewCmd = static_cast(NewEvent->getCommand()); + if (NewCmd) { + WriteLockT Lock(MGraphLock, std::defer_lock); + MGraphBuilder.cleanupFailedCommand(NewCmd, StreamsToDeallocate); + } + deallocateStreams(StreamsToDeallocate); std::rethrow_exception(std::current_exception()); } @@ -204,16 +214,6 @@ void Scheduler::waitForEvent(EventImplPtr Event) { GraphProcessor::waitForEvent(std::move(Event), Lock, /*LockTheLock=*/false); } -static void deallocateStreams( - std::vector> &StreamsToDeallocate) { - // Deallocate buffers for stream objects of the finished commands. Iterate in - // reverse order because it is the order of commands execution. - for (auto StreamImplPtr = StreamsToDeallocate.rbegin(); - StreamImplPtr != StreamsToDeallocate.rend(); ++StreamImplPtr) - detail::Scheduler::getInstance().deallocateStreamBuffers( - StreamImplPtr->get()); -} - void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { // We are going to traverse a graph of finished commands. Gather stream // objects from these commands if any and deallocate buffers for these stream diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 66e17d7862301..518b87d9bcd18 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -511,6 +511,11 @@ class Scheduler { Command *FinishedCmd, std::vector> &); + /// Removes failed command from the subgraph + void cleanupFailedCommand( + Command *FailedCmd, + std::vector> &); + /// Reschedules the command passed using Queue provided. /// /// This can lead to rescheduling of all dependent commands. This can be diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp index 19d243388d198..ee62fc89c8277 100644 --- a/sycl/unittests/scheduler/LeavesCollection.cpp +++ b/sycl/unittests/scheduler/LeavesCollection.cpp @@ -43,6 +43,7 @@ createEmptyCommand(const std::shared_ptr &Q, EmptyCommand *Cmd = new EmptyCommand(Q); Cmd->addRequirement(/* DepCmd = */ nullptr, /* AllocaCmd = */ nullptr, &Req); Cmd->MBlockReason = Command::BlockReason::HostAccessor; + Cmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; return std::shared_ptr{Cmd}; } From eafd2e63775913f4355e0d310019757fab88741b Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 14 Dec 2021 21:14:13 +0300 Subject: [PATCH 05/13] Fix formatting Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/graph_builder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 60cbc46a4c175..60d477f286d1e 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1218,7 +1218,7 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); // Replace failed command as a user. - if(Dep.MDepCommand->MUsers.erase(Cmd)) + if (Dep.MDepCommand->MUsers.erase(Cmd)) Dep.MDepCommand->MUsers.insert(EmptyCmd); } From 5811dc90709e285f5d62ece0b245b55fa833f30b Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 15 Dec 2021 12:01:12 +0300 Subject: [PATCH 06/13] Add fast-path for dependencyless and userless commands Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/graph_builder.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 60d477f286d1e..b35f91ec1e878 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1157,9 +1157,19 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( Command *FailedCmd, std::vector> &StreamsToDeallocate) { + MVisitedCmds.clear(); + + // If the failed command has no users and no dependencies, there is no reason + // to replace it with an empty command. + if (FailedCmd->MDeps.size() == 0 && FailedCmd->MUsers.size() == 0) { + markNodeAsVisited(FailedCmd, MVisitedCmds); + FailedCmd->MMarks.MToBeDeleted = true; + handleVisitedNodes(MVisitedCmds); + return; + } + assert(MCmdsToVisit.empty()); MCmdsToVisit.push(FailedCmd); - MVisitedCmds.clear(); // Traverse the graph using BFS while (!MCmdsToVisit.empty()) { From 2908a135cb2e0cc8873f9452ed74046c92b827ca Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 20 Dec 2021 14:03:44 +0300 Subject: [PATCH 07/13] Merge failed command and users into a single empty command Signed-off-by: Steffen Larsen --- .../source/detail/scheduler/graph_builder.cpp | 43 ++++++------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index b35f91ec1e878..a03750a6389d0 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1171,6 +1171,15 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( assert(MCmdsToVisit.empty()); MCmdsToVisit.push(FailedCmd); + // Create empty command that is "ready" for enqueuing. + EmptyCommand *EmptyCmd = new EmptyCommand(FailedCmd->getQueue()); + if (!EmptyCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + + // Mark new empty command as visited to avoid replacing it later. + markNodeAsVisited(EmptyCmd, MVisitedCmds); + // Traverse the graph using BFS while (!MCmdsToVisit.empty()) { Command *Cmd = MCmdsToVisit.front(); @@ -1179,14 +1188,6 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( if (!markNodeAsVisited(Cmd, MVisitedCmds)) continue; - // Skip replacing empty commands similar to the one we will create - if (Cmd->getType() == Command::EMPTY_TASK && - Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueReady) { - for (Command *UserCmd : Cmd->MUsers) - MCmdsToVisit.push(UserCmd); - continue; - } - // Collect stream objects for a visited command. if (Cmd->getType() == Command::CommandType::RUN_CG) { auto ExecCmd = static_cast(Cmd); @@ -1196,29 +1197,11 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( Streams.end()); } - // Create empty command that is "ready" for enqueuing. - EmptyCommand *EmptyCmd = new EmptyCommand(Cmd->getQueue()); - if (!EmptyCmd) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - - // Mark new empty command as visited to avoid replacing it later. - markNodeAsVisited(EmptyCmd, MVisitedCmds); - - for (Command *UserCmd : Cmd->MUsers) { - // User dependencies cannot be satisfied as dependency failed. These are - // also considered as failed. + // Users cannot be satisfied as dependency failed. These are also considered + // as failed. We merge these into the new empty root command. + for (Command *UserCmd : Cmd->MUsers) MCmdsToVisit.push(UserCmd); - // Replace failed command in users with new empty command. - for (DepDesc &Dep : UserCmd->MDeps) { - if (Dep.MDepCommand == Cmd) { - Dep.MDepCommand = EmptyCmd; - EmptyCmd->MUsers.insert(UserCmd); - } - } - } - for (DepDesc &Dep : Cmd->MDeps) { // Replace failed command in dependency records. const Requirement *Req = Dep.MDepRequirement; @@ -1228,7 +1211,7 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); // Replace failed command as a user. - if (Dep.MDepCommand->MUsers.erase(Cmd)) + if (Dep.MDepCommand->MUsers.erase(Cmd) && Dep.MDepCommand != EmptyCmd) Dep.MDepCommand->MUsers.insert(EmptyCmd); } From 35ba2037bb8fcbd1c8cc97a87399294d887a7232 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 3 Feb 2022 17:21:10 +0300 Subject: [PATCH 08/13] Move cleanup Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 0ac99ec3d1fd2..7be417bb48c8c 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -162,9 +162,9 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, WriteLockT Lock(MGraphLock, std::defer_lock); MGraphBuilder.cleanupFailedCommand(NewCmd, StreamsToDeallocate, ToCleanUp); - cleanupCommands(ToCleanUp); } deallocateStreams(StreamsToDeallocate); + cleanupCommands(ToCleanUp); std::rethrow_exception(std::current_exception()); } cleanupCommands(ToCleanUp); From 88a14042e0b645575636fa4f495a2a714aefc34e Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 9 Feb 2022 22:49:48 +0300 Subject: [PATCH 09/13] Ensure adoption of dependencies Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/graph_builder.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index b5771addb1ef2..226f557dd3d18 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1312,8 +1312,10 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); // Replace failed command as a user. - if (Dep.MDepCommand->MUsers.erase(Cmd) && Dep.MDepCommand != EmptyCmd) + if (Dep.MDepCommand->MUsers.erase(Cmd) && Dep.MDepCommand != EmptyCmd) { Dep.MDepCommand->MUsers.insert(EmptyCmd); + EmptyCmd->MDeps.push_back(Dep); + } } Cmd->MMarks.MToBeDeleted = true; From 2ecb99a0baef3c06e4b42cad8f69af00e932162f Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Fri, 11 Feb 2022 20:51:57 +0300 Subject: [PATCH 10/13] Simplify approach and extend replacement operations Signed-off-by: Steffen Larsen --- .../source/detail/scheduler/graph_builder.cpp | 96 +++++++++---------- sycl/source/detail/scheduler/scheduler.hpp | 5 +- 2 files changed, 47 insertions(+), 54 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 226f557dd3d18..d0bc43c3e6078 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -234,13 +234,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord( void Scheduler::GraphBuilder::updateLeaves(const std::set &Cmds, MemObjRecord *Record, - access::mode AccessMode, std::vector &ToCleanUp) { - - const bool ReadOnlyReq = AccessMode == access::mode::read; - if (ReadOnlyReq) - return; - for (Command *Cmd : Cmds) { bool WasLeaf = Cmd->MLeafCounter > 0; Cmd->MLeafCounter -= Record->MReadLeaves.remove(Cmd); @@ -252,6 +246,18 @@ void Scheduler::GraphBuilder::updateLeaves(const std::set &Cmds, } } +void Scheduler::GraphBuilder::updateLeaves(const std::set &Cmds, + MemObjRecord *Record, + access::mode AccessMode, + std::vector &ToCleanUp) { + + const bool ReadOnlyReq = AccessMode == access::mode::read; + if (ReadOnlyReq) + return; + + updateLeaves(Cmds, Record, ToCleanUp); +} + void Scheduler::GraphBuilder::addNodeToLeaves( MemObjRecord *Record, Command *Cmd, access::mode AccessMode, std::vector &ToEnqueue) { @@ -1258,19 +1264,11 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( std::vector> &StreamsToDeallocate, std::vector &ToCleanUp) { - MVisitedCmds.clear(); // If the failed command has no users and no dependencies, there is no reason // to replace it with an empty command. - if (FailedCmd->MDeps.size() == 0 && FailedCmd->MUsers.size() == 0) { - markNodeAsVisited(FailedCmd, MVisitedCmds); - FailedCmd->MMarks.MToBeDeleted = true; - handleVisitedNodes(MVisitedCmds); + if (FailedCmd->MDeps.size() == 0 && FailedCmd->MUsers.size() == 0) return; - } - - assert(MCmdsToVisit.empty()); - MCmdsToVisit.push(FailedCmd); // Create empty command that is "ready" for enqueuing. EmptyCommand *EmptyCmd = new EmptyCommand(FailedCmd->getQueue()); @@ -1278,49 +1276,41 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - // Mark new empty command as visited to avoid replacing it later. - markNodeAsVisited(EmptyCmd, MVisitedCmds); - - // Traverse the graph using BFS - while (!MCmdsToVisit.empty()) { - Command *Cmd = MCmdsToVisit.front(); - MCmdsToVisit.pop(); + // Collect stream objects for the failed command. + if (FailedCmd->getType() == Command::CommandType::RUN_CG) { + auto ExecCmd = static_cast(FailedCmd); + std::vector> Streams = ExecCmd->getStreams(); + ExecCmd->clearStreams(); + StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(), + Streams.end()); + } - if (!markNodeAsVisited(Cmd, MVisitedCmds)) - continue; + for (DepDesc &Dep : FailedCmd->MDeps) { + // Replace failed command in dependency records. + const Requirement *Req = Dep.MDepRequirement; + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + updateLeaves({FailedCmd}, Record, ToCleanUp); + std::vector ToEnqueue; + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); + assert(ToEnqueue.empty()); - // Collect stream objects for a visited command. - if (Cmd->getType() == Command::CommandType::RUN_CG) { - auto ExecCmd = static_cast(Cmd); - std::vector> Streams = ExecCmd->getStreams(); - ExecCmd->clearStreams(); - StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(), - Streams.end()); + // Replace failed command as a user. + if (Dep.MDepCommand->MUsers.erase(FailedCmd)) { + Dep.MDepCommand->MUsers.insert(EmptyCmd); + EmptyCmd->MDeps.push_back(Dep); } + } + FailedCmd->MDeps.clear(); - // Users cannot be satisfied as dependency failed. These are also considered - // as failed. We merge these into the new empty root command. - for (Command *UserCmd : Cmd->MUsers) - MCmdsToVisit.push(UserCmd); - - for (DepDesc &Dep : Cmd->MDeps) { - // Replace failed command in dependency records. - const Requirement *Req = Dep.MDepRequirement; - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp); - std::vector ToEnqueue; - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); - - // Replace failed command as a user. - if (Dep.MDepCommand->MUsers.erase(Cmd) && Dep.MDepCommand != EmptyCmd) { - Dep.MDepCommand->MUsers.insert(EmptyCmd); - EmptyCmd->MDeps.push_back(Dep); - } - } + for (Command *UserCmd : FailedCmd->MUsers) + for (DepDesc &Dep : UserCmd->MDeps) + if (Dep.MDepCommand == FailedCmd) + Dep.MDepCommand = EmptyCmd; + std::swap(FailedCmd->MUsers, EmptyCmd->MUsers); - Cmd->MMarks.MToBeDeleted = true; - } - handleVisitedNodes(MVisitedCmds); + FailedCmd->getEvent()->setCommand(EmptyCmd); + assert(FailedCmd->MLeafCounter == 0); + delete FailedCmd; } void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 8ba143a8faafa..9ae0386cd5f48 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -516,7 +516,8 @@ class Scheduler { Command *FinishedCmd, std::vector> &); - /// Removes failed command from the subgraph + /// Replaces a failed command in the subgraph with an empty command and + /// deletes the failed command. void cleanupFailedCommand( Command *FailedCmd, std::vector> &, @@ -557,6 +558,8 @@ class Scheduler { std::vector &ToEnqueue); /// Removes commands from leaves. + void updateLeaves(const std::set &Cmds, MemObjRecord *Record, + std::vector &ToCleanUp); void updateLeaves(const std::set &Cmds, MemObjRecord *Record, access::mode AccessMode, std::vector &ToCleanUp); From 5daf07a5827d0f7d8054482dbcb73b033f55c655 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 14 Feb 2022 17:30:24 +0300 Subject: [PATCH 11/13] Remove unused function Signed-off-by: Steffen Larsen --- sycl/source/detail/scheduler/commands.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 0f081ab956f9c..82c42711b2da1 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -146,10 +146,6 @@ class Command { return MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked; } - bool isEnqueueFailed() const { - return MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed; - } - const QueueImplPtr &getQueue() const { return MQueue; } const QueueImplPtr &getSubmittedQueue() const { return MSubmittedQueue; } From 922c2062803806d490555a70501a0b2d9735dc1e Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 8 Jul 2022 12:04:24 -0700 Subject: [PATCH 12/13] Fix build Signed-off-by: Larsen, Steffen --- sycl/source/detail/scheduler/graph_builder.cpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 3 --- sycl/unittests/scheduler/FailedCommands.cpp | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index c665b704d3d2f..d123294424994 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1295,7 +1295,7 @@ void Scheduler::GraphBuilder::cleanupFailedCommand( // Create empty command that is "ready" for enqueuing. EmptyCommand *EmptyCmd = new EmptyCommand(FailedCmd->getQueue()); if (!EmptyCmd) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; // Collect stream objects for the failed command. diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 1e2d76419ac95..0dfb965363e9e 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -152,9 +152,6 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, // resources may be in use by backend and synchronization point here is // at native kernel execution finish. if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { - if (Type == CG::RunOnHostIntel) - static_cast(NewCmd)->releaseCG(); - NewEvent->setCommand(nullptr); delete NewCmd; } diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 5fd9523994d80..3d989e0d12062 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -93,7 +93,7 @@ pi_result redefinedFailingEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, const pi_event *, pi_event *) { throw sycl::runtime_error( "Exception from redefinedFailingEnqueueKernelLaunch.", - PI_INVALID_OPERATION); + PI_ERROR_INVALID_OPERATION); } size_t MemBufRefCount = 0u; From e128d14c022db1db3c463624090af21a8062ecd9 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 26 Jul 2022 05:32:43 -0700 Subject: [PATCH 13/13] Add getKernelSize to test Signed-off-by: Larsen, Steffen --- sycl/unittests/scheduler/FailedCommands.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 3d989e0d12062..e050f5ecbafa3 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -30,6 +30,7 @@ template <> struct KernelInfo { static constexpr bool isESIMD() { return false; } static constexpr bool callsThisItem() { return false; } static constexpr bool callsAnyThisFreeFunction() { return false; } + static constexpr int64_t getKernelSize() { return 1; } }; } // namespace detail