diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 5ef520a6d2ed7..080c5d52add15 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -166,8 +166,6 @@ static std::string commandToNodeType(Command::CommandType Type) { return "memory_transfer_node"; case Command::CommandType::UPDATE_REQUIREMENT: return "host_acc_create_buffer_lock_node"; - case Command::CommandType::EMPTY_TASK: - return "host_acc_destroy_buffer_release_node"; case Command::CommandType::FUSION: return "kernel_fusion_placeholder_node"; default: @@ -196,8 +194,6 @@ static std::string commandToName(Command::CommandType Type) { return "Memory Transfer (Unmap)"; case Command::CommandType::UPDATE_REQUIREMENT: return "Host Accessor Creation/Buffer Lock"; - case Command::CommandType::EMPTY_TASK: - return "Host Accessor Destruction/Buffer Lock Release"; case Command::CommandType::FUSION: return "Kernel Fusion Placeholder"; default: @@ -236,6 +232,21 @@ bool Command::isHostTask() const { CG::CGTYPE::CodeplayHostTask); } +bool Command::blockManually(const BlockReason &Reason) { + if (MIsManuallyBlocked) + return false; + MBlockReason = Reason; + MIsManuallyBlocked = true; + return true; +} + +bool Command::unblock() { + if (!MIsManuallyBlocked) + return false; + MIsManuallyBlocked = false; + return true; +} + static void flushCrossQueueDeps(const std::vector &EventImpls, const QueueImplPtr &Queue) { for (auto &EventImpl : EventImpls) { @@ -625,7 +636,7 @@ bool Command::supportsPostEnqueueCleanup() const { return true; } bool Command::readyForCleanup() const { return MLeafCounter == 0 && - MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess; + MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess && !isBlocking(); } Command *Command::addDep(DepDesc NewDep, std::vector &ToCleanUp) { @@ -636,7 +647,7 @@ Command *Command::addDep(DepDesc NewDep, std::vector &ToCleanUp) { processDepEvent(NewDep.MDepCommand->getEvent(), NewDep, ToCleanUp); } // ConnectionCmd insertion builds the following dependency structure: - // this -> emptyCmd (for ConnectionCmd) -> ConnectionCmd -> NewDep + // this -> ConnectionCmd -> NewDep // that means that this and NewDep are already dependent if (!ConnectionCmd) { MDeps.push_back(NewDep); @@ -693,35 +704,12 @@ void Command::emitInstrumentation(uint16_t Type, const char *Txt) { bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector &ToCleanUp) { + assert(MEnqueueStatus != EnqueueResultT::SyclEnqueueBlocked && + "Final command enqueue should always be not blocked."); // Exit if already enqueued if (MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess) return true; - // If the command is blocked from enqueueing - if (MIsBlockable && MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked) { - // Exit if enqueue type is not blocking - if (!Blocking) { - EnqueueResult = EnqueueResultT(EnqueueResultT::SyclEnqueueBlocked, this); - return false; - } - -#ifdef XPTI_ENABLE_INSTRUMENTATION - // Scoped trace event notifier that emits a barrier begin and barrier end - // event, which models the barrier while enqueuing along with the blocked - // reason, as determined by the scheduler - std::string Info = "enqueue.barrier["; - Info += std::string(getBlockReason()) + "]"; - emitInstrumentation(xpti::trace_barrier_begin, Info.c_str()); -#endif - - // Wait if blocking - while (MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked) - ; -#ifdef XPTI_ENABLE_INSTRUMENTATION - emitInstrumentation(xpti::trace_barrier_end, Info.c_str()); -#endif - } - std::lock_guard Lock(MEnqueueMtx); // Exit if the command is already enqueued @@ -1535,83 +1523,6 @@ pi_int32 MemCpyCommandHost::enqueueImp() { return PI_SUCCESS; } -EmptyCommand::EmptyCommand(QueueImplPtr Queue) - : Command(CommandType::EMPTY_TASK, std::move(Queue)) { - emitInstrumentationDataProxy(); -} - -pi_int32 EmptyCommand::enqueueImp() { - waitForPreparedHostEvents(); - waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); - - return PI_SUCCESS; -} - -void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, - const Requirement *Req) { - const Requirement &ReqRef = *Req; - MRequirements.emplace_back(ReqRef); - const Requirement *const StoredReq = &MRequirements.back(); - - // EmptyCommand is always host one, so we believe that result of addDep is - // nil - std::vector ToCleanUp; - Command *Cmd = addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}, ToCleanUp); - assert(Cmd == nullptr && "Conection command should be null for EmptyCommand"); - assert(ToCleanUp.empty() && "addDep should add a command for cleanup only if " - "there's a connection command"); - (void)Cmd; -} - -void EmptyCommand::emitInstrumentationData() { -#ifdef XPTI_ENABLE_INSTRUMENTATION - if (!xptiTraceEnabled()) - return; - // Create a payload with the command name and an event using this payload to - // emit a node_create - if (MRequirements.empty()) - return; - - Requirement &Req = *MRequirements.begin(); - - MAddress = Req.MSYCLMemObj; - makeTraceEventProlog(MAddress); - - if (MFirstInstance) { - xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - xpti::addMetadata(CmdTraceEvent, "sycl_device", - deviceToID(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); - xpti::addMetadata(CmdTraceEvent, "memory_object", - reinterpret_cast(MAddress)); - makeTraceEventEpilog(); - } -#endif -} - -void EmptyCommand::printDot(std::ostream &Stream) const { - Stream << "\"" << this << "\" [style=filled, fillcolor=\"#8d8f29\", label=\""; - - Stream << "ID = " << this << "\\n"; - Stream << "EMPTY NODE" - << "\\n"; - - Stream << "\"];" << std::endl; - - for (const auto &Dep : MDeps) { - Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\"" - << " [ label = \"Access mode: " - << accessModeToString(Dep.MDepRequirement->MAccessMode) << "\\n" - << "MemObj: " << Dep.MDepRequirement->MSYCLMemObj << " \" ]" - << std::endl; - } -} - -bool EmptyCommand::producesPiEvent() const { return false; } - void MemCpyCommandHost::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\""; @@ -1662,6 +1573,11 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { #endif } +bool UpdateHostRequirementCommand::supportsPostEnqueueCleanup() const { + // TODO: consider moving to base class + return !isBlocking(); +} + static std::string cgTypeToString(detail::CG::CGTYPE Type) { switch (Type) { case detail::CG::Kernel: @@ -2585,12 +2501,6 @@ bool ExecCGCommand::supportsPostEnqueueCleanup() const { (MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask); } -bool ExecCGCommand::readyForCleanup() const { - if (MCommandGroup->getType() == CG::CGTYPE::CodeplayHostTask) - return MLeafCounter == 0 && MEvent->isCompleted(); - return Command::readyForCleanup(); -} - KernelFusionCommand::KernelFusionCommand(QueueImplPtr Queue) : Command(Command::CommandType::FUSION, Queue), MStatus(FusionStatus::ACTIVE) { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8a1bbe7370793..6273accdad92e 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -42,7 +42,6 @@ class AllocaCommand; class AllocaCommandBase; class ReleaseCommand; class ExecCGCommand; -class EmptyCommand; enum BlockingT { NON_BLOCKING = 0, BLOCKING }; @@ -103,7 +102,6 @@ class Command { MAP_MEM_OBJ, UNMAP_MEM_OBJ, UPDATE_REQUIREMENT, - EMPTY_TASK, HOST_TASK, FUSION }; @@ -143,14 +141,14 @@ class Command { return MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess; } - // Shows that command could not be enqueued, now it may be true for empty task - // only - bool isEnqueueBlocked() const { - return MIsBlockable && MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked; + // Shows that command could be enqueued, but blocks enqueue of all + // commands depending on it. Regular usage - host task & host accessors. + bool isBlocking() const { + return MIsManuallyBlocked || (isHostTask() && !MEvent->isCompleted()); } - // Shows that command could be enqueued, but is blocking enqueue of all - // commands depending on it. Regular usage - host task. - bool isBlocking() const { return isHostTask() && !MEvent->isCompleted(); } + enum class BlockReason : int { HostAccessor = 0, HostTask }; + bool blockManually(const BlockReason &Reason); + bool unblock(); void addBlockedUserUnique(const EventImplPtr &NewUser) { if (std::find(MBlockedUsers.begin(), MBlockedUsers.end(), NewUser) != @@ -281,8 +279,8 @@ class Command { std::vector MDeps; /// Contains list of commands that depend on the command. std::unordered_set MUsers; - /// Indicates whether the command can be blocked from enqueueing. - bool MIsBlockable = false; + /// Indicates whether the command is set as blocking for its users. + std::atomic_bool MIsManuallyBlocked = false; /// Counts the number of memory objects this command is a leaf for. unsigned MLeafCounter = 0; @@ -295,10 +293,8 @@ class Command { /// Used for marking the node during graph traversal. Marks MMarks; - enum class BlockReason : int { HostAccessor = 0, HostTask }; - - // Only have reasonable value while MIsBlockable is true - BlockReason MBlockReason; + // Only have reasonable value while isBlocking returns true + BlockReason MBlockReason = BlockReason::HostTask; /// Describes the status of the command. std::atomic MEnqueueStatus; @@ -352,30 +348,6 @@ class Command { std::mutex MBlockedUsersMutex; }; -/// The empty command does nothing during enqueue. The task can be used to -/// implement lock in the graph, or to merge several nodes into one. -class EmptyCommand : public Command { -public: - EmptyCommand(QueueImplPtr Queue); - - void printDot(std::ostream &Stream) const final; - const Requirement *getRequirement() const final { return &MRequirements[0]; } - void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, - const Requirement *Req); - - void emitInstrumentationData() override; - - bool producesPiEvent() const final; - -private: - pi_int32 enqueueImp() final; - - // Employing deque here as it allows to push_back/emplace_back without - // invalidation of pointer or reference to stored data item regardless of - // iterator invalidation. - std::deque MRequirements; -}; - /// The release command enqueues release of a memory object instance allocated /// on Host or underlying framework. class ReleaseCommand : public Command { @@ -589,12 +561,6 @@ class ExecCGCommand : public Command { detail::CG &getCG() const { return *MCommandGroup; } - // MEmptyCmd is only employed if this command refers to host-task. - // The mechanism of lookup for single EmptyCommand amongst users of - // host-task-representing command is unreliable. This unreliability roots in - // the cleanup process. - EmptyCommand *MEmptyCmd = nullptr; - // MFusionCommand is employed to mark a CG command as part of a kernel fusion // and allows to refer back to the corresponding KernelFusionCommand if // necessary. @@ -604,8 +570,6 @@ class ExecCGCommand : public Command { bool supportsPostEnqueueCleanup() const final; - bool readyForCleanup() const final; - private: pi_int32 enqueueImp() final; @@ -624,6 +588,7 @@ class UpdateHostRequirementCommand : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; + bool supportsPostEnqueueCleanup() const final; private: pi_int32 enqueueImp() final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d3ee02d6e1f17..0a8bebb8f9ed3 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -535,13 +535,9 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue); + assert(UpdateHostAccCmd->blockManually(Command::BlockReason::HostAccessor)); - // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = - addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue, - Command::BlockReason::HostAccessor, ToEnqueue); - - Req->MBlockedCmd = EmptyCmd; + Req->MBlockedCmd = UpdateHostAccCmd; if (MPrintOptionsArray[AfterAddHostAcc]) printGraphAsDot("after_addHostAccessor"); @@ -851,54 +847,7 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } } -template -typename detail::enable_if_t< - std::is_same, Requirement>::value, - EmptyCommand *> -Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, - const QueueImplPtr &Queue, - Command::BlockReason Reason, - std::vector &ToEnqueue, - const bool AddDepsToLeaves) { - EmptyCommand *EmptyCmd = - new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); - - if (!EmptyCmd) - throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); - - EmptyCmd->MIsBlockable = true; - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = Reason; - - for (T *Req : Reqs) { - MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req, ToEnqueue); - AllocaCommandBase *AllocaCmd = - getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); - EmptyCmd->addRequirement(Cmd, AllocaCmd, Req); - } - // addRequirement above call addDep that already will add EmptyCmd as user for - // Cmd no Reqs size check here so assume it is possible to have no Reqs passed - if (!Reqs.size()) - Cmd->addUser(EmptyCmd); - - if (AddDepsToLeaves) { - const std::vector &Deps = Cmd->MDeps; - std::vector ToCleanUp; - for (const DepDesc &Dep : Deps) { - const Requirement *Req = Dep.MDepRequirement; - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - - updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); - } - for (Command *Cmd : ToCleanUp) - cleanupCommand(Cmd); - } - - return EmptyCmd; -} - -static bool isInteropHostTask(ExecCGCommand *Cmd) { +static bool isInteropHostTask(const std::unique_ptr &Cmd) { if (Cmd->getCG().getType() != CG::CGTYPE::CodeplayHostTask) return false; @@ -951,7 +900,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, // commands (e.g., allocations) resulting from that process by adding them // to the list of auxiliary commands of the fusion command. createGraphForCommand(NewCmd.get(), NewCmd->getCG(), - isInteropHostTask(NewCmd.get()), Reqs, Events, Queue, + isInteropHostTask(NewCmd), Reqs, Events, Queue, FusionCmd->auxiliaryCommands()); // We need to check the commands that this kernel depends on for any other // commands that have been submitted to another queue which is also in @@ -995,7 +944,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, return {NewCmd.release(), FusionCmd->getEvent(), false}; } createGraphForCommand(NewCmd.get(), NewCmd->getCG(), - isInteropHostTask(NewCmd.get()), Reqs, Events, Queue, + isInteropHostTask(NewCmd), Reqs, Events, Queue, ToEnqueue); auto Event = NewCmd->getEvent(); return {NewCmd.release(), Event, true}; diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index cecdf9fb1063a..ef40ca955a191 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -51,11 +51,19 @@ bool Scheduler::GraphProcessor::handleBlockingCmd(Command *Cmd, EnqueueResultT &EnqueueResult, Command *RootCommand, BlockingT Blocking) { - if (Cmd == RootCommand || Blocking) + // No error to be returned for root command. + if (Cmd == RootCommand) return true; - { + if (Blocking) { + // If Blocking & isBlocked -> we will successfully wait for event in + // enqueueImp. MIsManuallyBlocked should block enqueueCommand with BLOCKING + // and could not be moved to event.wait because should not affect simple + // wait case when no users of blocking command. + while (Cmd->MIsManuallyBlocked) + ; + } else { std::lock_guard Guard(Cmd->MBlockedUsersMutex); - if (Cmd->isBlocking()) { + if (Cmd->isBlocking() || Cmd->MIsManuallyBlocked) { const EventImplPtr &RootCmdEvent = RootCommand->getEvent(); Cmd->addBlockedUserUnique(RootCmdEvent); EnqueueResult = EnqueueResultT(EnqueueResultT::SyclEnqueueBlocked, Cmd); @@ -96,12 +104,6 @@ bool Scheduler::GraphProcessor::enqueueCommand( } } - // Exit early if the command is blocked and the enqueue type is non-blocking - if (Cmd->isEnqueueBlocked() && !Blocking) { - EnqueueResult = EnqueueResultT(EnqueueResultT::SyclEnqueueBlocked, Cmd); - return false; - } - // Recursively enqueue all the implicit + explicit backend level dependencies // first and exit immediately if any of the commands cannot be enqueued. for (const EventImplPtr &Event : Cmd->getPreparedDepsEvents()) { diff --git a/sycl/source/detail/scheduler/leaves_collection.cpp b/sycl/source/detail/scheduler/leaves_collection.cpp index 586baf973ff98..a03ccf40b32c6 100644 --- a/sycl/source/detail/scheduler/leaves_collection.cpp +++ b/sycl/source/detail/scheduler/leaves_collection.cpp @@ -31,8 +31,7 @@ static inline bool doOverlap(const Requirement *LHS, const Requirement *RHS) { } static inline bool isHostAccessorCmd(Command *Cmd) { - return Cmd->getType() == Command::EMPTY_TASK && - Cmd->MBlockReason == Command::BlockReason::HostAccessor; + return Cmd->getType() == Command::UPDATE_REQUIREMENT; } size_t LeavesCollection::remove(value_type Cmd) { @@ -46,15 +45,14 @@ size_t LeavesCollection::remove(value_type Cmd) { } // host accessor commands part - return eraseHostAccessorCommand(static_cast(Cmd)); + return eraseHostAccessorCommand(Cmd); } bool LeavesCollection::push_back(value_type Cmd, EnqueueListT &ToEnqueue) { bool Result = false; if (isHostAccessorCmd(Cmd)) - Result = - addHostAccessorCommand(static_cast(Cmd), ToEnqueue); + Result = addHostAccessorCommand(Cmd, ToEnqueue); else Result = addGenericCommand(Cmd, ToEnqueue); @@ -67,13 +65,13 @@ std::vector LeavesCollection::toVector() const { Result.insert(Result.end(), MGenericCommands.begin(), MGenericCommands.end()); - for (EmptyCommand *Cmd : MHostAccessorCommands) + for (Command *Cmd : MHostAccessorCommands) Result.push_back(Cmd); return Result; } -bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd, +bool LeavesCollection::addHostAccessorCommand(Command *Cmd, EnqueueListT &ToEnqueue) { // 1. find the oldest command with doOverlap() = true amongst the List // => OldCmd @@ -86,7 +84,7 @@ bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd, else OldCmdIt = std::find_if( MHostAccessorCommands.begin(), MHostAccessorCommands.end(), - [&](const EmptyCommand *Test) -> bool { + [&](const Command *Test) -> bool { return doOverlap(Test->getRequirement(), Cmd->getRequirement()); }); @@ -102,7 +100,7 @@ bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd, MAllocateDependency(Cmd, *OldCmdIt, MRecord, ToEnqueue); // erase the old cmd as it's tracked via dependency now - eraseHostAccessorCommand(static_cast(*OldCmdIt)); + eraseHostAccessorCommand(*OldCmdIt); } // 2.2 If OldCmd == null: @@ -128,12 +126,12 @@ bool LeavesCollection::addGenericCommand(Command *Cmd, return true; } -void LeavesCollection::insertHostAccessorCommand(EmptyCommand *Cmd) { +void LeavesCollection::insertHostAccessorCommand(Command *Cmd) { MHostAccessorCommandsXRef[Cmd] = MHostAccessorCommands.insert(MHostAccessorCommands.end(), Cmd); } -size_t LeavesCollection::eraseHostAccessorCommand(EmptyCommand *Cmd) { +size_t LeavesCollection::eraseHostAccessorCommand(Command *Cmd) { auto XRefIt = MHostAccessorCommandsXRef.find(Cmd); if (XRefIt == MHostAccessorCommandsXRef.end()) diff --git a/sycl/source/detail/scheduler/leaves_collection.hpp b/sycl/source/detail/scheduler/leaves_collection.hpp index c8f73645fcb06..b770415650501 100644 --- a/sycl/source/detail/scheduler/leaves_collection.hpp +++ b/sycl/source/detail/scheduler/leaves_collection.hpp @@ -38,7 +38,7 @@ struct MemObjRecord; class LeavesCollection { public: using GenericCommandsT = CircularBuffer; - using HostAccessorCommandsT = std::list; + using HostAccessorCommandsT = std::list; using EnqueueListT = std::vector; // Make first command depend on the second @@ -117,7 +117,7 @@ class LeavesCollection { using HostAccessorCommandSingleXRefT = typename HostAccessorCommandsT::iterator; using HostAccessorCommandsXRefT = - std::unordered_map; + std::unordered_map; MemObjRecord *MRecord; GenericCommandsT MGenericCommands; @@ -127,12 +127,12 @@ class LeavesCollection { AllocateDependencyF MAllocateDependency; bool addGenericCommand(value_type Cmd, EnqueueListT &ToEnqueue); - bool addHostAccessorCommand(EmptyCommand *Cmd, EnqueueListT &ToEnqueue); + bool addHostAccessorCommand(Command *Cmd, EnqueueListT &ToEnqueue); // inserts a command to the end of list for its mem object - void insertHostAccessorCommand(EmptyCommand *Cmd); + void insertHostAccessorCommand(Command *Cmd); // returns number of removed elements - size_t eraseHostAccessorCommand(EmptyCommand *Cmd); + size_t eraseHostAccessorCommand(Command *Cmd); typename Iterator::type beginHostAccessor() { return MHostAccessorCommands.begin(); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index e2e7f5df48cee..11c50dbe19e4c 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -336,8 +336,11 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { assert(BlockedCmd && "Can't find appropriate command to unblock"); - BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - + { + std::lock_guard Guard(BlockedCmd->MBlockedUsersMutex); + BlockedCmd->unblock(); + } + // TODO: consider replacement with enqueueUnblockedCommands enqueueLeavesOfReqUnlocked(Req, Lock, ToCleanUp); } cleanupCommands(ToCleanUp); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 5026babe712f8..abe04c3d6f7da 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -653,15 +653,6 @@ class Scheduler { const Requirement *Req, const ContextImplPtr &Context); - template - typename detail::enable_if_t< - std::is_same, Requirement>::value, - EmptyCommand *> - addEmptyCmd(Command *Cmd, const std::vector &Req, - const QueueImplPtr &Queue, Command::BlockReason Reason, - std::vector &ToEnqueue, - const bool AddDepsToLeaves = true); - void createGraphForCommand(Command *NewCmd, CG &CG, bool isInteropTask, std::vector &Reqs, const std::vector &Events, diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index 8765a06a0b2fd..574c956905522 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -130,6 +130,7 @@ TEST(QueueWait, QueueWaitTest) { auto acc = buf.template get_access(Cgh); Cgh.host_task([=, &m]() { (void)acc; + std::unique_lock InsideHostTaskLock(m); }); }); diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp index e5ab988687493..32ad7508598f9 100644 --- a/sycl/unittests/scheduler/BlockedCommands.cpp +++ b/sycl/unittests/scheduler/BlockedCommands.cpp @@ -17,11 +17,14 @@ using namespace testing; TEST_F(SchedulerTest, BlockedCommands) { sycl::unittest::PiMock Mock; sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + MockCommand MockCmdBlocking(detail::getSyclObjImpl(Q)); MockCommand MockCmd(detail::getSyclObjImpl(Q)); - MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; - MockCmd.MIsBlockable = true; - MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY; + EXPECT_TRUE(MockCmdBlocking.blockManually( + detail::Command::BlockReason::HostAccessor)); + EXPECT_TRUE(MockCmdBlocking.isBlocking()); + std::vector ToCleanUp; + std::ignore = MockCmd.addDep(MockCmdBlocking.getEvent(), ToCleanUp); MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); @@ -32,22 +35,9 @@ TEST_F(SchedulerTest, BlockedCommands) { ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED\n"; - MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - Res.MResult = detail::EnqueueResultT::SyclEnqueueSuccess; - MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY; - - Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); - ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; - ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) - << "The command is expected to fail to enqueue.\n"; - ASSERT_EQ(CL_DEVICE_PARTITION_EQUALLY, MockCmd.MRetVal) - << "Expected different error code.\n"; - ASSERT_EQ(&MockCmd, Res.MCmd) << "Expected different failed command.\n"; - Res = detail::EnqueueResultT{}; - MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - MockCmd.MRetVal = CL_SUCCESS; - Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); + MockCmdBlocking.unblock(); + Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::NON_BLOCKING); ASSERT_TRUE(Enqueued && Res.MResult == detail::EnqueueResultT::SyclEnqueueSuccess) << "The command is expected to be successfully enqueued.\n"; @@ -59,21 +49,18 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { MockCommand A(detail::getSyclObjImpl(Q)); A.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - A.MIsBlockable = true; A.MRetVal = CL_SUCCESS; MockCommand B(detail::getSyclObjImpl(Q)); B.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - B.MIsBlockable = true; B.MRetVal = CL_SUCCESS; MockCommand C(detail::getSyclObjImpl(Q)); - C.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; - C.MIsBlockable = true; + C.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; + C.blockManually(detail::Command::BlockReason::HostAccessor); MockCommand D(detail::getSyclObjImpl(Q)); D.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - D.MIsBlockable = true; D.MRetVal = CL_SUCCESS; addEdge(&A, &B, nullptr); @@ -90,7 +77,7 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { EXPECT_CALL(A, enqueue).Times(0); EXPECT_CALL(B, enqueue).Times(1); - EXPECT_CALL(C, enqueue).Times(0); + EXPECT_CALL(C, enqueue).Times(1); EXPECT_CALL(D, enqueue).Times(0); MockScheduler MS; @@ -103,17 +90,16 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { ASSERT_EQ(&C, Res.MCmd) << "Expected different failed command.\n"; } -TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { +TEST_F(SchedulerTest, EnqueueBlockedCommandNoEarlyExit) { sycl::unittest::PiMock Mock; sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; MockCommand A(detail::getSyclObjImpl(Q)); - A.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; - A.MIsBlockable = true; + A.blockManually(detail::Command::BlockReason::HostAccessor); MockCommand B(detail::getSyclObjImpl(Q)); B.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - B.MRetVal = CL_OUT_OF_RESOURCES; + B.MRetVal = CL_SUCCESS; addEdge(&A, &B, nullptr); @@ -121,30 +107,17 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { // // A -> B // - // If A is blocked, we should not try to enqueue B. + // If A is blocked, we still should try to enqueue B. - EXPECT_CALL(A, enqueue).Times(0); - EXPECT_CALL(B, enqueue).Times(0); + EXPECT_CALL(A, enqueue).Times(1); + EXPECT_CALL(B, enqueue).Times(1); MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); - ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; - ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) - << "Result of enqueueing blocked command should be BLOCKED.\n"; - ASSERT_EQ(&A, Res.MCmd) << "Expected different failed command.\n"; - - // But if the enqueue type is blocking we should not exit early. - - EXPECT_CALL(A, enqueue).Times(0); - EXPECT_CALL(B, enqueue).Times(1); - - Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::BLOCKING); - ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; - ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) - << "Result of enqueueing blocked command should be BLOCKED.\n"; - ASSERT_EQ(&B, Res.MCmd) << "Expected different failed command.\n"; + ASSERT_TRUE(Enqueued) << "Blocking command prevent user from being enqueued " + "but could be enqueued itself\n"; } // This unit test is for workaround described in GraphProcessor::enqueueCommand @@ -155,12 +128,10 @@ TEST_F(SchedulerTest, EnqueueHostDependency) { MockCommand A(detail::getSyclObjImpl(Q)); A.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - A.MIsBlockable = true; A.MRetVal = CL_SUCCESS; MockCommand B(detail::getSyclObjImpl(Q)); B.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - B.MIsBlockable = true; B.MRetVal = CL_SUCCESS; sycl::detail::EventImplPtr DepEvent{ diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 97aefc8db7b36..04c67053e2c1d 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -13,6 +13,8 @@ #include #include +#include + #include using namespace sycl; @@ -128,6 +130,9 @@ class DependsOnTests : public ::testing::Test { detail::BlockingT::NON_BLOCKING)); EXPECT_EQ(Result.MResult, detail::EnqueueResultT::SyclEnqueueBlocked); EXPECT_EQ(Result.MCmd, static_cast(BlockingCommand)); + EXPECT_EQ(Result.MCmd->MBlockReason, + detail::Command::BlockReason::HostTask); + EXPECT_FALSE(BlockedCmd->isSuccessfullyEnqueued()); } EXPECT_TRUE(BlockingCommand->isSuccessfullyEnqueued()); @@ -230,8 +235,7 @@ TEST_F(DependsOnTests, EnqueueNoMemObjDoubleKernelDepHostBlocked) { detail::Command *Cmd1 = AddTaskCG(TestCGType::HOST_TASK, Events, &CustomHostLambda); EventImplPtr Cmd1Event = Cmd1->getEvent(); - Cmd1->MIsBlockable = true; - Cmd1->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; + Cmd1->blockManually(detail::Command::BlockReason::HostAccessor); // Depends on host task Events.push_back(Cmd1Event); @@ -253,38 +257,98 @@ TEST_F(DependsOnTests, EnqueueNoMemObjDoubleKernelDepHostBlocked) { MS.enqueueCommand(Cmd3, Result, detail::BlockingT::NON_BLOCKING)); EXPECT_EQ(Result.MResult, detail::EnqueueResultT::SyclEnqueueBlocked); EXPECT_EQ(Result.MCmd, static_cast(Cmd1)); + EXPECT_EQ(Result.MCmd->MBlockReason, + detail::Command::BlockReason::HostAccessor); // Preconditions for post enqueue checks - EXPECT_FALSE(Cmd1->isSuccessfullyEnqueued()); + EXPECT_TRUE(Cmd1->isSuccessfullyEnqueued()); EXPECT_FALSE(Cmd2->isSuccessfullyEnqueued()); EXPECT_FALSE(Cmd3->isSuccessfullyEnqueued()); - Cmd1->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - - std::vector BlockedCommands{Cmd2, Cmd3}; - VerifyBlockedCommandsEnqueue(Cmd1, BlockedCommands); + Cmd1->unblock(); + EXPECT_TRUE(MS.enqueueCommand(Cmd2, Result, detail::BlockingT::BLOCKING)); + EXPECT_TRUE(MS.enqueueCommand(Cmd3, Result, detail::BlockingT::NON_BLOCKING)); } -TEST_F(DependsOnTests, EnqueueNoMemObjDoubleKernelDepHost) { - // Checks blocking command tranfer for dependent kernels and enqueue of - // kernels on host task completion - std::vector Events; +TEST_F(DependsOnTests, TwoHostAccessorsReadRead) { + buffer Buf{range<1>(1)}; + std::shared_ptr BufImpl = detail::getSyclObjImpl(Buf); + detail::Requirement MockReq = getMockRequirement(Buf); + MockReq.MDims = 1; + MockReq.MSYCLMemObj = BufImpl.get(); + MockReq.MAccessMode = access::mode::read; + auto EventImplRead = MS.addHostAccessor(&MockReq); + auto EventImplReadSecond = MS.addHostAccessor(&MockReq); + detail::Command *CmdRead = + static_cast(EventImplRead->getCommand()); + detail::Command *CmdReadSecond = + static_cast(EventImplReadSecond->getCommand()); + EXPECT_EQ(CmdRead->MUsers.size(), 0u); + bool DepExists = any_of( + CmdReadSecond->MDeps.begin(), CmdReadSecond->MDeps.end(), + [&CmdRead](detail::DepDesc &Dep) { return CmdRead == Dep.MDepCommand; }); + EXPECT_FALSE(DepExists); + MS.releaseHostAccessor(&MockReq); +} - detail::Command *Cmd1 = - AddTaskCG(TestCGType::HOST_TASK, Events, &CustomHostLambda); - EventImplPtr Cmd1Event = Cmd1->getEvent(); +TEST_F(DependsOnTests, TwoHostAccessorsReadWrite) { + buffer Buf{range<1>(1)}; + std::shared_ptr BufImpl = detail::getSyclObjImpl(Buf); + detail::Requirement MockReq = getMockRequirement(Buf); + MockReq.MDims = 1; + MockReq.MSYCLMemObj = BufImpl.get(); + MockReq.MAccessMode = access::mode::read; + auto EventImplRead = MS.addHostAccessor(&MockReq); + detail::Requirement MockReq2 = getMockRequirement(Buf); + MockReq2.MDims = 1; + MockReq2.MSYCLMemObj = BufImpl.get(); + MockReq2.MAccessMode = access::mode::discard_write; + auto EventImplWrite = MS.addHostAccessor(&MockReq2); + detail::Command *CmdRead = + static_cast(EventImplRead->getCommand()); + detail::Command *CmdWrite = + static_cast(EventImplWrite->getCommand()); + EXPECT_EQ(CmdRead->MUsers.size(), 1u); + bool DepExists = any_of( + CmdWrite->MDeps.begin(), CmdWrite->MDeps.end(), + [&CmdRead](detail::DepDesc &Dep) { return CmdRead == Dep.MDepCommand; }); + EXPECT_TRUE(DepExists); + MS.releaseHostAccessor(&MockReq); + MS.releaseHostAccessor(&MockReq2); +} - // Depends on host task - Events.push_back(Cmd1Event); - detail::Command *Cmd2 = AddTaskCG(TestCGType::KERNEL_TASK, Events); - EventImplPtr Cmd2Event = Cmd2->getEvent(); +TEST_F(DependsOnTests, EnqueueKernelDepHostAcc) { + buffer Buf{range<1>(1)}; + std::shared_ptr BufImpl = detail::getSyclObjImpl(Buf); + detail::Requirement MockReq = getMockRequirement(Buf); + MockReq.MDims = 1; + MockReq.MSYCLMemObj = BufImpl.get(); + MockReq.MAccessMode = access::mode::write; + auto EventImplRead = MS.addHostAccessor(&MockReq); + + event KEvent = QueueDevImpl->submit( + [&](handler &Cgh) { + auto acc = Buf.template get_access(Cgh); + constexpr size_t KS = sizeof(decltype(acc)); + Cgh.single_task>([=]() { (void)acc; }); + }, + QueueDevImpl, nullptr, {}); + std::shared_ptr KernelEventImpl = + detail::getSyclObjImpl(KEvent); + detail::Command *KernelCmd = + static_cast(KernelEventImpl->getCommand()); + ASSERT_NE(KernelCmd, nullptr); + EXPECT_FALSE(KernelCmd->isSuccessfullyEnqueued()); - // Depends on kernel depending on host task - Events.clear(); - Events.push_back(Cmd2Event); - detail::Command *Cmd3 = AddTaskCG(TestCGType::KERNEL_TASK, Events); - EventImplPtr Cmd3Event = Cmd3->getEvent(); + detail::EnqueueResultT Result; + EXPECT_FALSE( + MS.enqueueCommand(KernelCmd, Result, detail::BlockingT::NON_BLOCKING)); + EXPECT_EQ(Result.MResult, detail::EnqueueResultT::SyclEnqueueBlocked); + EXPECT_EQ(Result.MCmd, + static_cast(EventImplRead->getCommand())); + EXPECT_EQ(Result.MCmd->MBlockReason, + detail::Command::BlockReason::HostAccessor); - std::vector BlockedCommands{Cmd2, Cmd3}; - VerifyBlockedCommandsEnqueue(Cmd1, BlockedCommands); + MS.releaseHostAccessor(&MockReq); + EXPECT_TRUE(KernelCmd->isSuccessfullyEnqueued()); } diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 8bace3b30f5b2..e8fa4f894dfba 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -242,12 +242,6 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) { Record->MMemModified = true; MS.addCopyBack(&MockReq, ToEnqueue); }); - checkCleanupOnLeafUpdate( - MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) { - detail::Command *Leaf = *Record->MWriteLeaves.begin(); - MS.addEmptyCmd(Leaf, {&MockReq}, QueueImpl, - detail::Command::BlockReason::HostTask, ToEnqueue); - }); device HostDevice = detail::createSyclObjFromImpl( detail::device_impl::getHostDeviceImpl()); detail::QueueImplPtr DefaultHostQueue{ diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp index ea883041add66..29caead28105f 100644 --- a/sycl/unittests/scheduler/LeavesCollection.cpp +++ b/sycl/unittests/scheduler/LeavesCollection.cpp @@ -37,11 +37,11 @@ createGenericCommand(const std::shared_ptr &Q) { } std::shared_ptr -createEmptyCommand(const std::shared_ptr &Q, - const Requirement &Req) { - EmptyCommand *Cmd = new EmptyCommand(Q); - Cmd->addRequirement(/* DepCmd = */ nullptr, /* AllocaCmd = */ nullptr, &Req); - Cmd->MBlockReason = Command::BlockReason::HostAccessor; +createHostAccCommand(const std::shared_ptr &Q, + const Requirement &Req) { + Command *Cmd = new UpdateHostRequirementCommand( + Q, Req, /*SrcAllocaCmd*/ nullptr, /*DstPtr*/ nullptr); + Cmd->blockManually(Command::BlockReason::HostAccessor); return std::shared_ptr{Cmd}; } @@ -83,7 +83,7 @@ TEST_F(LeavesCollectionTest, PushBack) { << "Host accessor commands container isn't empty, but it should be."; } - // add mix of generic and empty commands + // add mix of generic and host acc commands { sycl::buffer Buf(sycl::range<1>(1)); @@ -97,7 +97,7 @@ TEST_F(LeavesCollectionTest, PushBack) { for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) { auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q)) - : createEmptyCommand(getSyclObjImpl(Q), MockReq); + : createHostAccCommand(getSyclObjImpl(Q), MockReq); Cmds.push_back(Cmd); LE.push_back(Cmds.back().get(), ToEnqueue); @@ -137,7 +137,7 @@ TEST_F(LeavesCollectionTest, Remove) { for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) { auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q)) - : createEmptyCommand(getSyclObjImpl(Q), MockReq); + : createHostAccCommand(getSyclObjImpl(Q), MockReq); Cmds.push_back(Cmd); if (LE.push_back(Cmds.back().get(), ToEnqueue)) diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index cfa2a4d98cbd3..aa289a31251f5 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -186,15 +186,6 @@ class MockScheduler : public sycl::detail::Scheduler { return MGraphBuilder.insertUpdateHostReqCmd(Record, Req, Queue, ToEnqueue); } - sycl::detail::EmptyCommand * - addEmptyCmd(sycl::detail::Command *Cmd, - const std::vector &Reqs, - const sycl::detail::QueueImplPtr &Queue, - sycl::detail::Command::BlockReason Reason, - std::vector &ToEnqueue) { - return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Queue, Reason, ToEnqueue); - } - sycl::detail::Command * addCG(std::unique_ptr CommandGroup, sycl::detail::QueueImplPtr Queue,