diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 4ea9394f2034f..aaf492649be17 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -235,7 +235,7 @@ class DispatchHostTask {
     // Thus we employ read-lock of graph.
     {
       Scheduler &Sched = Scheduler::getInstance();
-      std::shared_lock<std::shared_timed_mutex> Lock(Sched.MGraphLock);
+      Scheduler::ReadLockT Lock(Sched.MGraphLock);
 
       std::vector<DepDesc> Deps = MThisCmd->MDeps;
 
@@ -481,7 +481,7 @@ void Command::makeTraceEventEpilog() {
 #endif
 }
 
-void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) {
+Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) {
   const QueueImplPtr &WorkerQueue = getWorkerQueue();
   const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr();
 
@@ -493,21 +493,25 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) {
     // call to waitInternal() is in waitForPreparedHostEvents() as it's called
     // from enqueue process functions
     MPreparedHostDepsEvents.push_back(DepEvent);
-    return;
+    return nullptr;
   }
 
+  Command *ConnectionCmd = nullptr;
+
   // Do not add redundant event dependencies for in-order queues.
   if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue &&
       WorkerQueue->has_property<property::queue::in_order>())
-    return;
+    return nullptr;
 
   ContextImplPtr DepEventContext = DepEvent->getContextImpl();
   // If contexts don't match we'll connect them using host task
   if (DepEventContext != WorkerContext && !WorkerContext->is_host()) {
     Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder;
-    GB.connectDepEvent(this, DepEvent, Dep);
+    ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep);
   } else
     MPreparedDepsEvents.push_back(std::move(DepEvent));
+
+  return ConnectionCmd;
 }
 
 const ContextImplPtr &Command::getWorkerContext() const {
@@ -516,9 +520,11 @@ const ContextImplPtr &Command::getWorkerContext() const {
 
 const QueueImplPtr &Command::getWorkerQueue() const { return MQueue; }
 
-void Command::addDep(DepDesc NewDep) {
+Command *Command::addDep(DepDesc NewDep) {
+  Command *ConnectionCmd = nullptr;
+
   if (NewDep.MDepCommand) {
-    processDepEvent(NewDep.MDepCommand->getEvent(), NewDep);
+    ConnectionCmd = processDepEvent(NewDep.MDepCommand->getEvent(), NewDep);
   }
   MDeps.push_back(NewDep);
 #ifdef XPTI_ENABLE_INSTRUMENTATION
@@ -526,9 +532,11 @@ void Command::addDep(DepDesc NewDep) {
       NewDep.MDepCommand, (void *)NewDep.MDepRequirement->MSYCLMemObj,
       accessModeToString(NewDep.MDepRequirement->MAccessMode), true);
 #endif
+
+  return ConnectionCmd;
 }
 
-void Command::addDep(EventImplPtr Event) {
+Command *Command::addDep(EventImplPtr Event) {
 #ifdef XPTI_ENABLE_INSTRUMENTATION
   // We need this for just the instrumentation, so guarding it will prevent
   // unused variable warnings when instrumentation is turned off
@@ -538,7 +546,7 @@ void Command::addDep(EventImplPtr Event) {
   emitEdgeEventForEventDependence(Cmd, PiEventAddr);
 #endif
 
-  processDepEvent(std::move(Event), DepDesc{nullptr, nullptr, nullptr});
+  return processDepEvent(std::move(Event), DepDesc{nullptr, nullptr, nullptr});
 }
 
 void Command::emitEnqueuedEventSignal(RT::PiEvent &PiEventAddr) {
@@ -732,7 +740,10 @@ AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req,
   // Node event must be created before the dependent edge is added to this node,
   // so this call must be before the addDep() call.
   emitInstrumentationDataProxy();
-  addDep(DepDesc(nullptr, getRequirement(), this));
+  // "Nothing to depend on"
+  Command *ConnectionCmd = addDep(DepDesc(nullptr, getRequirement(), this));
+  assert(ConnectionCmd == nullptr);
+  (void)ConnectionCmd;
 }
 
 void AllocaCommand::emitInstrumentationData() {
@@ -795,7 +806,8 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
 }
 
 AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req,
-                                         AllocaCommandBase *ParentAlloca)
+                                         AllocaCommandBase *ParentAlloca,
+                                         std::vector<Command *> &ToEnqueue)
     : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue),
                         std::move(Req),
                         /*LinkedAllocaCmd*/ nullptr),
@@ -804,7 +816,10 @@ AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req,
   // is added to this node, so this call must be before
   // the addDep() call.
   emitInstrumentationDataProxy();
-  addDep(DepDesc(MParentAlloca, getRequirement(), MParentAlloca));
+  Command *ConnectionCmd =
+      addDep(DepDesc(MParentAlloca, getRequirement(), MParentAlloca));
+  if (ConnectionCmd)
+    ToEnqueue.push_back(ConnectionCmd);
 }
 
 void AllocaSubBufCommand::emitInstrumentationData() {
@@ -1329,7 +1344,10 @@ void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd,
   MRequirements.emplace_back(ReqRef);
   const Requirement *const StoredReq = &MRequirements.back();
 
-  addDep(DepDesc{DepCmd, StoredReq, AllocaCmd});
+  // EmptyCommand is always host one, so we believe that result of addDep is nil
+  Command *Cmd = addDep(DepDesc{DepCmd, StoredReq, AllocaCmd});
+  assert(Cmd == nullptr && "Conection command should be null for EmptyCommand");
+  (void)Cmd;
 }
 
 void EmptyCommand::emitInstrumentationData() {
diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp
index 677cdc0cfcceb..f6262a907e8d6 100644
--- a/sycl/source/detail/scheduler/commands.hpp
+++ b/sycl/source/detail/scheduler/commands.hpp
@@ -107,9 +107,11 @@ class Command {
 
   Command(CommandType Type, QueueImplPtr Queue);
 
-  void addDep(DepDesc NewDep);
+  /// \return an optional connection cmd to enqueue
+  [[nodiscard]] Command *addDep(DepDesc NewDep);
 
-  void addDep(EventImplPtr Event);
+  /// \return an optional connection cmd to enqueue
+  [[nodiscard]] Command *addDep(EventImplPtr Event);
 
   void addUser(Command *NewUser) { MUsers.insert(NewUser); }
 
@@ -204,13 +206,15 @@ class Command {
   /// Perform glueing of events from different contexts
   /// \param DepEvent event this commands should depend on
   /// \param Dep optional DepDesc to perform connection of events properly
+  /// \return returns an optional connection command to enqueue
   ///
   /// Glueing (i.e. connecting) will be performed if and only if DepEvent is
   /// not from host context and its context doesn't match to context of this
   /// command. Context of this command is fetched via getWorkerContext().
   ///
   /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr.
-  void processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep);
+  [[nodiscard]] Command *processDepEvent(EventImplPtr DepEvent,
+                                         const DepDesc &Dep);
 
   /// Private interface. Derived classes should implement this method.
   virtual cl_int enqueueImp() = 0;
@@ -387,7 +391,8 @@ class AllocaCommand : public AllocaCommandBase {
 class AllocaSubBufCommand : public AllocaCommandBase {
 public:
   AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req,
-                      AllocaCommandBase *ParentAlloca);
+                      AllocaCommandBase *ParentAlloca,
+                      std::vector<Command *> &ToEnqueue);
 
   void *getMemAllocation() const final;
   void printDot(std::ostream &Stream) const final;
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 440a7e6973e27..383bcee2f9d7e 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -172,9 +172,9 @@ MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) {
   return MemObject->MRecord.get();
 }
 
-MemObjRecord *
-Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
-                                                 const Requirement *Req) {
+MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
+    const QueueImplPtr &Queue, const Requirement *Req,
+    std::vector<Command *> &ToEnqueue) {
   SYCLMemObjI *MemObject = Req->MSYCLMemObj;
   MemObjRecord *Record = getMemObjRecord(MemObject);
 
@@ -183,12 +183,14 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
 
   const size_t LeafLimit = 8;
   LeavesCollection::AllocateDependencyF AllocateDependency =
-      [this](Command *Dependant, Command *Dependency, MemObjRecord *Record) {
+      [this](Command *Dependant, Command *Dependency, MemObjRecord *Record,
+             LeavesCollection::EnqueueListT &ToEnqueue) {
         // Add the old leaf as a dependency for the new one by duplicating one
         // of the requirements for the current record
         DepDesc Dep = findDepForRecord(Dependant, Record);
         Dep.MDepCommand = Dependency;
-        Dependant->addDep(Dep);
+        if (Command *ConnectionCmd = Dependant->addDep(Dep))
+          ToEnqueue.push_back(ConnectionCmd);
         Dependency->addUser(Dependant);
         --(Dependency->MLeafCounter);
       };
@@ -212,7 +214,8 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
 
     MemObject->MRecord.reset(
         new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency});
-    getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr);
+    getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
+                            ToEnqueue);
   } else
     MemObject->MRecord.reset(new MemObjRecord{Queue->getContextImplPtr(),
                                               LeafLimit, AllocateDependency});
@@ -235,18 +238,19 @@ void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
   }
 }
 
-void Scheduler::GraphBuilder::addNodeToLeaves(MemObjRecord *Record,
-                                              Command *Cmd,
-                                              access::mode AccessMode) {
+void Scheduler::GraphBuilder::addNodeToLeaves(
+    MemObjRecord *Record, Command *Cmd, access::mode AccessMode,
+    std::vector<Command *> &ToEnqueue) {
   LeavesCollection &Leaves{AccessMode == access::mode::read
                                ? Record->MReadLeaves
                                : Record->MWriteLeaves};
-  if (Leaves.push_back(Cmd))
+  if (Leaves.push_back(Cmd, ToEnqueue))
     ++Cmd->MLeafCounter;
 }
 
 UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
-    MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue) {
+    MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
+    std::vector<Command *> &ToEnqueue) {
   AllocaCommandBase *AllocaCmd =
       findAllocaForReq(Record, Req, Queue->getContextImplPtr());
   assert(AllocaCmd && "There must be alloca for requirement!");
@@ -259,11 +263,14 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
   std::set<Command *> Deps =
       findDepsForReq(Record, Req, Queue->getContextImplPtr());
   for (Command *Dep : Deps) {
-    UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd});
+    Command *ConnCmd =
+        UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd});
+    if (ConnCmd)
+      ToEnqueue.push_back(ConnCmd);
     Dep->addUser(UpdateCommand);
   }
   updateLeaves(Deps, Record, Req->MAccessMode);
-  addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode);
+  addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode, ToEnqueue);
   return UpdateCommand;
 }
 
@@ -296,11 +303,12 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc,
   return MapCmd;
 }
 
-Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
-                                                   Requirement *Req,
-                                                   const QueueImplPtr &Queue) {
+Command *Scheduler::GraphBuilder::insertMemoryMove(
+    MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
+    std::vector<Command *> &ToEnqueue) {
 
-  AllocaCommandBase *AllocaCmdDst = getOrCreateAllocaForReq(Record, Req, Queue);
+  AllocaCommandBase *AllocaCmdDst =
+      getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
   if (!AllocaCmdDst)
     throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY);
 
@@ -369,17 +377,21 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
   }
 
   for (Command *Dep : Deps) {
-    NewCmd->addDep(DepDesc{Dep, NewCmd->getRequirement(), AllocaCmdDst});
+    Command *ConnCmd =
+        NewCmd->addDep(DepDesc{Dep, NewCmd->getRequirement(), AllocaCmdDst});
+    if (ConnCmd)
+      ToEnqueue.push_back(ConnCmd);
     Dep->addUser(NewCmd);
   }
   updateLeaves(Deps, Record, access::mode::read_write);
-  addNodeToLeaves(Record, NewCmd, access::mode::read_write);
+  addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue);
   Record->MCurContext = Queue->getContextImplPtr();
   return NewCmd;
 }
 
 Command *Scheduler::GraphBuilder::remapMemoryObject(
-    MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd) {
+    MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd,
+    std::vector<Command *> &ToEnqueue) {
   assert(HostAllocaCmd->getQueue()->is_host() &&
          "Host alloca command expected");
   assert(HostAllocaCmd->MIsActive && "Active alloca command expected");
@@ -402,23 +414,30 @@ Command *Scheduler::GraphBuilder::remapMemoryObject(
       &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue(), MapMode);
 
   for (Command *Dep : Deps) {
-    UnMapCmd->addDep(DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd});
+    Command *ConnCmd = UnMapCmd->addDep(
+        DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd});
+    if (ConnCmd)
+      ToEnqueue.push_back(ConnCmd);
     Dep->addUser(UnMapCmd);
   }
 
-  MapCmd->addDep(DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd});
+  Command *ConnCmd = MapCmd->addDep(
+      DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd});
+  if (ConnCmd)
+    ToEnqueue.push_back(ConnCmd);
   UnMapCmd->addUser(MapCmd);
 
   updateLeaves(Deps, Record, access::mode::read_write);
-  addNodeToLeaves(Record, MapCmd, access::mode::read_write);
+  addNodeToLeaves(Record, MapCmd, access::mode::read_write, ToEnqueue);
   Record->MHostAccess = MapMode;
   return MapCmd;
 }
 
 // The function adds copy operation of the up to date'st memory to the memory
 // pointed by Req.
-Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) {
-
+Command *
+Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
+                                     std::vector<Command *> &ToEnqueue) {
   QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue();
   SYCLMemObjI *MemObj = Req->MSYCLMemObj;
   MemObjRecord *Record = getMemObjRecord(MemObj);
@@ -443,12 +462,15 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) {
 
   MemCpyCommandHost *MemCpyCmd = MemCpyCmdUniquePtr.release();
   for (Command *Dep : Deps) {
-    MemCpyCmd->addDep(DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd});
+    Command *ConnCmd = MemCpyCmd->addDep(
+        DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd});
+    if (ConnCmd)
+      ToEnqueue.push_back(ConnCmd);
     Dep->addUser(MemCpyCmd);
   }
 
   updateLeaves(Deps, Record, Req->MAccessMode);
-  addNodeToLeaves(Record, MemCpyCmd, Req->MAccessMode);
+  addNodeToLeaves(Record, MemCpyCmd, Req->MAccessMode, ToEnqueue);
   if (MPrintOptionsArray[AfterAddCopyBack])
     printGraphAsDot("after_addCopyBack");
   return MemCpyCmd;
@@ -456,30 +478,34 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) {
 
 // The function implements SYCL host accessor logic: host accessor
 // should provide access to the buffer in user space.
-Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) {
+Command *
+Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
+                                         std::vector<Command *> &ToEnqueue) {
 
   const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue();
 
-  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req);
+  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
   if (MPrintOptionsArray[BeforeAddHostAcc])
     printGraphAsDot("before_addHostAccessor");
   markModifiedIfWrite(Record, Req);
 
   AllocaCommandBase *HostAllocaCmd =
-      getOrCreateAllocaForReq(Record, Req, HostQueue);
+      getOrCreateAllocaForReq(Record, Req, HostQueue, ToEnqueue);
 
   if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
               Record->MCurContext)) {
     if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
-      remapMemoryObject(Record, Req, HostAllocaCmd);
+      remapMemoryObject(Record, Req, HostAllocaCmd, ToEnqueue);
   } else
-    insertMemoryMove(Record, Req, HostQueue);
+    insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
 
-  Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue);
+  Command *UpdateHostAccCmd =
+      insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue);
 
   // Need empty command to be blocked until host accessor is destructed
-  EmptyCommand *EmptyCmd = addEmptyCmd<Requirement>(
-      UpdateHostAccCmd, {Req}, HostQueue, Command::BlockReason::HostAccessor);
+  EmptyCommand *EmptyCmd =
+      addEmptyCmd<Requirement>(UpdateHostAccCmd, {Req}, HostQueue,
+                               Command::BlockReason::HostAccessor, ToEnqueue);
 
   Req->MBlockedCmd = EmptyCmd;
 
@@ -490,13 +516,14 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) {
 }
 
 Command *Scheduler::GraphBuilder::addCGUpdateHost(
-    std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr HostQueue) {
+    std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr HostQueue,
+    std::vector<Command *> &ToEnqueue) {
 
   auto UpdateHost = static_cast<CGUpdateHost *>(CommandGroup.get());
   Requirement *Req = UpdateHost->getReqToUpdate();
 
-  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req);
-  return insertMemoryMove(Record, Req, HostQueue);
+  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
+  return insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
 }
 
 /// Start the search for the record from list of "leaf" commands and check if
@@ -616,7 +643,8 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
 // Note, creation of new allocation command can lead to the current context
 // (Record->MCurContext) change.
 AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
-    MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue) {
+    MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue,
+    std::vector<Command *> &ToEnqueue) {
 
   AllocaCommandBase *AllocaCmd =
       findAllocaForReq(Record, Req, Queue->getContextImplPtr());
@@ -632,8 +660,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
                                     /*Working with bytes*/ sizeof(char));
 
       auto *ParentAlloca =
-          getOrCreateAllocaForReq(Record, &ParentRequirement, Queue);
-      AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca);
+          getOrCreateAllocaForReq(Record, &ParentRequirement, Queue, ToEnqueue);
+      AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca, ToEnqueue);
     } else {
 
       const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange,
@@ -672,7 +700,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
                 DefaultHostQueue, FullReq, true /* InitFromUserData */,
                 nullptr /* LinkedAllocaCmd */);
             Record->MAllocaCommands.push_back(HostAllocaCmd);
-            Record->MWriteLeaves.push_back(HostAllocaCmd);
+            Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue);
             ++(HostAllocaCmd->MLeafCounter);
             Record->MCurContext = DefaultHostQueue->getContextImplPtr();
           }
@@ -719,15 +747,19 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
 
       // Update linked command
       if (LinkedAllocaCmd) {
-        AllocaCmd->addDep(DepDesc{LinkedAllocaCmd, AllocaCmd->getRequirement(),
-                                  LinkedAllocaCmd});
+        Command *ConnCmd = AllocaCmd->addDep(DepDesc{
+            LinkedAllocaCmd, AllocaCmd->getRequirement(), LinkedAllocaCmd});
+        if (ConnCmd)
+          ToEnqueue.push_back(ConnCmd);
         LinkedAllocaCmd->addUser(AllocaCmd);
         LinkedAllocaCmd->MLinkedAllocaCmd = AllocaCmd;
 
         // To ensure that the leader allocation is removed first
-        AllocaCmd->getReleaseCmd()->addDep(
+        ConnCmd = AllocaCmd->getReleaseCmd()->addDep(
             DepDesc(LinkedAllocaCmd->getReleaseCmd(),
                     AllocaCmd->getRequirement(), LinkedAllocaCmd));
+        if (ConnCmd)
+          ToEnqueue.push_back(ConnCmd);
 
         // Device allocation takes ownership of the host ptr during
         // construction, host allocation doesn't. So, device allocation should
@@ -742,17 +774,20 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           std::set<Command *> Deps =
               findDepsForReq(Record, Req, Queue->getContextImplPtr());
           for (Command *Dep : Deps) {
-            AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd});
+            Command *ConnCmd =
+                AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd});
+            if (ConnCmd)
+              ToEnqueue.push_back(ConnCmd);
             Dep->addUser(AllocaCmd);
           }
           updateLeaves(Deps, Record, Req->MAccessMode);
-          addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode);
+          addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode, ToEnqueue);
         }
       }
     }
 
     Record->MAllocaCommands.push_back(AllocaCmd);
-    Record->MWriteLeaves.push_back(AllocaCmd);
+    Record->MWriteLeaves.push_back(AllocaCmd, ToEnqueue);
     ++(AllocaCmd->MLeafCounter);
   }
   return AllocaCmd;
@@ -780,7 +815,8 @@ typename detail::enable_if_t<
     EmptyCommand *>
 Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector<T *> &Reqs,
                                      const QueueImplPtr &Queue,
-                                     Command::BlockReason Reason) {
+                                     Command::BlockReason Reason,
+                                     std::vector<Command *> &ToEnqueue) {
   EmptyCommand *EmptyCmd =
       new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue());
 
@@ -792,8 +828,9 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector<T *> &Reqs,
   EmptyCmd->MBlockReason = Reason;
 
   for (T *Req : Reqs) {
-    MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req);
-    AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue);
+    MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req, ToEnqueue);
+    AllocaCommandBase *AllocaCmd =
+        getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
     EmptyCmd->addRequirement(Cmd, AllocaCmd, Req);
   }
 
@@ -805,7 +842,7 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector<T *> &Reqs,
     MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
 
     updateLeaves({Cmd}, Record, Req->MAccessMode);
-    addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode);
+    addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue);
   }
 
   return EmptyCmd;
@@ -843,7 +880,8 @@ static void combineAccessModesOfReqs(std::vector<Requirement *> &Reqs) {
 
 Command *
 Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
-                               QueueImplPtr Queue) {
+                               QueueImplPtr Queue,
+                               std::vector<Command *> &ToEnqueue) {
   std::vector<Requirement *> &Reqs = CommandGroup->MRequirements;
   const std::vector<detail::EventImplPtr> &Events = CommandGroup->MEvents;
   const CG::CGTYPE CGType = CommandGroup->getType();
@@ -872,10 +910,11 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
               ? static_cast<detail::CGHostTask &>(NewCmd->getCG()).MQueue
               : Queue;
 
-      Record = getOrInsertMemObjRecord(QueueForAlloca, Req);
+      Record = getOrInsertMemObjRecord(QueueForAlloca, Req, ToEnqueue);
       markModifiedIfWrite(Record, Req);
 
-      AllocaCmd = getOrCreateAllocaForReq(Record, Req, QueueForAlloca);
+      AllocaCmd =
+          getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue);
 
       isSameCtx =
           sameCtx(QueueForAlloca->getContextImplPtr(), Record->MCurContext);
@@ -888,7 +927,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
       // required access mode is valid, remap if not.
       if (Record->MCurContext->is_host() &&
           !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess))
-        remapMemoryObject(Record, Req, AllocaCmd);
+        remapMemoryObject(Record, Req, AllocaCmd, ToEnqueue);
     } else {
       // Cannot directly copy memory from OpenCL device to OpenCL device -
       // create two copies: device->host and host->device.
@@ -908,14 +947,16 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
 
       if (NeedMemMoveToHost)
         insertMemoryMove(Record, Req,
-                         Scheduler::getInstance().getDefaultHostQueue());
-      insertMemoryMove(Record, Req, MemMoveTargetQueue);
+                         Scheduler::getInstance().getDefaultHostQueue(),
+                         ToEnqueue);
+      insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
     }
     std::set<Command *> Deps =
         findDepsForReq(Record, Req, Queue->getContextImplPtr());
 
     for (Command *Dep : Deps)
-      NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd});
+      if (Command *ConnCmd = NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}))
+        ToEnqueue.push_back(ConnCmd);
   }
 
   // Set new command as user for dependencies and update leaves.
@@ -928,17 +969,19 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
     const Requirement *Req = Dep.MDepRequirement;
     MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
     updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode);
-    addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode);
+    addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode, ToEnqueue);
   }
 
   // Register all the events as dependencies
   for (detail::EventImplPtr e : Events) {
-    NewCmd->addDep(e);
+    if (Command *ConnCmd = NewCmd->addDep(e))
+      ToEnqueue.push_back(ConnCmd);
   }
 
   if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK)
-    NewCmd->MEmptyCmd = addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements,
-                                    Queue, Command::BlockReason::HostTask);
+    NewCmd->MEmptyCmd =
+        addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements, Queue,
+                    Command::BlockReason::HostTask, ToEnqueue);
 
   if (MPrintOptionsArray[AfterAddCG])
     printGraphAsDot("after_addCG");
@@ -1132,9 +1175,9 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) {
 // requirement in Dep we make ConnectCmd depend on DepEvent's command with this
 // requirement.
 // Optionality of Dep is set by Dep.MDepCommand equal to nullptr.
-void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd,
-                                              EventImplPtr DepEvent,
-                                              const DepDesc &Dep) {
+Command *Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd,
+                                                  EventImplPtr DepEvent,
+                                                  const DepDesc &Dep) {
   assert(Cmd->getWorkerContext() != DepEvent->getContextImpl());
 
   // construct Host Task type command manually and make it depend on DepEvent
@@ -1162,7 +1205,9 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd,
 
   if (Dep.MDepRequirement) {
     // make ConnectCmd depend on requirement
-    ConnectCmd->addDep(Dep);
+    // Dismiss the result here as it's not a connection now,
+    // 'cause ConnectCmd is host one
+    (void)ConnectCmd->addDep(Dep);
     assert(reinterpret_cast<Command *>(DepEvent->getCommand()) ==
            Dep.MDepCommand);
     // add user to Dep.MDepCommand is already performed beyond this if branch
@@ -1170,12 +1215,16 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd,
     MemObjRecord *Record = getMemObjRecord(Dep.MDepRequirement->MSYCLMemObj);
 
     updateLeaves({Dep.MDepCommand}, Record, Dep.MDepRequirement->MAccessMode);
-    addNodeToLeaves(Record, ConnectCmd, Dep.MDepRequirement->MAccessMode);
+    std::vector<Command *> ToEnqueue;
+    addNodeToLeaves(Record, ConnectCmd, Dep.MDepRequirement->MAccessMode,
+                    ToEnqueue);
+    assert(ToEnqueue.size() == 0);
 
     const std::vector<const Requirement *> Reqs(1, Dep.MDepRequirement);
     EmptyCmd = addEmptyCmd(ConnectCmd, Reqs,
                            Scheduler::getInstance().getDefaultHostQueue(),
-                           Command::BlockReason::HostTask);
+                           Command::BlockReason::HostTask, ToEnqueue);
+    assert(ToEnqueue.size() == 0);
     // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs.
 
     // Depend Cmd on empty command
@@ -1183,36 +1232,35 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd,
       DepDesc CmdDep = Dep;
       CmdDep.MDepCommand = EmptyCmd;
 
-      Cmd->addDep(CmdDep);
+      // Dismiss the result here as it's not a connection now,
+      // 'cause EmptyCmd is host one
+      (void)Cmd->addDep(CmdDep);
     }
   } else {
+    std::vector<Command *> ToEnqueue;
     EmptyCmd = addEmptyCmd<Requirement>(
         ConnectCmd, {}, Scheduler::getInstance().getDefaultHostQueue(),
-        Command::BlockReason::HostTask);
+        Command::BlockReason::HostTask, ToEnqueue);
+    assert(ToEnqueue.size() == 0);
 
     // There is no requirement thus, empty command will only depend on
     // ConnectCmd via its event.
-    EmptyCmd->addDep(ConnectCmd->getEvent());
-    ConnectCmd->addDep(DepEvent);
+    // Dismiss the result here as it's not a connection now,
+    // 'cause ConnectCmd is host one.
+    (void)EmptyCmd->addDep(ConnectCmd->getEvent());
+    (void)ConnectCmd->addDep(DepEvent);
 
     // Depend Cmd on empty command
-    Cmd->addDep(EmptyCmd->getEvent());
+    // Dismiss the result here as it's not a connection now,
+    // 'cause EmptyCmd is host one
+    (void)Cmd->addDep(EmptyCmd->getEvent());
   }
 
   EmptyCmd->addUser(Cmd);
 
   ConnectCmd->MEmptyCmd = EmptyCmd;
 
-  // FIXME graph builder shouldn't really enqueue commands. We're in the middle
-  // of enqueue process for some command Cmd. We're going to add a dependency
-  // for it. Need some nice and cute solution to enqueue ConnectCmd via standard
-  // scheduler/graph processor mechanisms.
-  // Though, we need this call to enqueue to launch ConnectCmd.
-  EnqueueResultT Res;
-  bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res);
-  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
-    throw runtime_error("Failed to enqueue a sync event between two contexts",
-                        PI_INVALID_OPERATION);
+  return ConnectCmd;
 }
 
 } // namespace detail
diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp
index 0e7a3fbbbaa0f..cc2e0cb15067c 100644
--- a/sycl/source/detail/scheduler/graph_processor.cpp
+++ b/sycl/source/detail/scheduler/graph_processor.cpp
@@ -36,7 +36,9 @@ Scheduler::GraphProcessor::getWaitList(EventImplPtr Event) {
   return Result;
 }
 
-void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event) {
+void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event,
+                                             ReadLockT &GraphReadLock,
+                                             bool LockTheLock) {
   Command *Cmd = getCommand(Event);
   // Command can be nullptr if user creates cl::sycl::event explicitly or the
   // event has been waited on by another thread
@@ -49,7 +51,13 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event) {
     // TODO: Reschedule commands.
     throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
 
-  Cmd->getEvent()->waitInternal();
+  assert(Cmd->getEvent() == Event);
+
+  GraphReadLock.unlock();
+  Event->waitInternal();
+
+  if (LockTheLock)
+    GraphReadLock.lock();
 }
 
 bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd,
@@ -87,6 +95,19 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd,
         return false;
   }
 
+  // Only graph read lock is to be held here.
+  // Enqueue process of a command may last quite a time. Having graph locked can
+  // introduce some thread starving (i.e. when the other thread attempts to
+  // acquire write lock and add a command to graph). Releasing read lock without
+  // other safety measures isn't an option here as the other thread could go
+  // into graph cleanup process (due to some event complete) and remove some
+  // dependencies from dependencies of the user of this command.
+  // An example: command A depends on commands B and C. This thread wants to
+  // enqueue A. Hence, it needs to enqueue B and C. So this thread gets into
+  // dependency list and starts enqueueing B right away. The other thread waits
+  // on completion of C and starts cleanup process. This thread is still in the
+  // middle of enqueue of B. The other thread modifies dependency list of A by
+  // removing C out of it. Iterators become invalid.
   return Cmd->enqueue(EnqueueResult, Blocking);
 }
 
diff --git a/sycl/source/detail/scheduler/leaves_collection.cpp b/sycl/source/detail/scheduler/leaves_collection.cpp
index 051a7d61fe333..0ae0bcfbb9c0e 100644
--- a/sycl/source/detail/scheduler/leaves_collection.cpp
+++ b/sycl/source/detail/scheduler/leaves_collection.cpp
@@ -49,13 +49,14 @@ size_t LeavesCollection::remove(value_type Cmd) {
   return eraseHostAccessorCommand(static_cast<EmptyCommand *>(Cmd));
 }
 
-bool LeavesCollection::push_back(value_type Cmd) {
+bool LeavesCollection::push_back(value_type Cmd, EnqueueListT &ToEnqueue) {
   bool Result = false;
 
   if (isHostAccessorCmd(Cmd))
-    Result = addHostAccessorCommand(static_cast<EmptyCommand *>(Cmd));
+    Result =
+        addHostAccessorCommand(static_cast<EmptyCommand *>(Cmd), ToEnqueue);
   else
-    Result = addGenericCommand(Cmd);
+    Result = addGenericCommand(Cmd, ToEnqueue);
 
   return Result;
 }
@@ -72,7 +73,8 @@ std::vector<LeavesCollection::value_type> LeavesCollection::toVector() const {
   return Result;
 }
 
-bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd) {
+bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd,
+                                              EnqueueListT &ToEnqueue) {
   // 1. find the oldest command with doOverlap() = true amongst the List
   //      => OldCmd
   HostAccessorCommandSingleXRefT OldCmdIt;
@@ -97,7 +99,7 @@ bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd) {
   //          when circular buffer is full.
   if (OldCmdIt != MHostAccessorCommands.end()) {
     // allocate dependency
-    MAllocateDependency(Cmd, *OldCmdIt, MRecord);
+    MAllocateDependency(Cmd, *OldCmdIt, MRecord, ToEnqueue);
 
     // erase the old cmd as it's tracked via dependency now
     eraseHostAccessorCommand(static_cast<EmptyCommand *>(*OldCmdIt));
@@ -109,7 +111,8 @@ bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd) {
   return true;
 }
 
-bool LeavesCollection::addGenericCommand(Command *Cmd) {
+bool LeavesCollection::addGenericCommand(Command *Cmd,
+                                         EnqueueListT &ToEnqueue) {
   if (MGenericCommands.full()) {
     Command *OldLeaf = MGenericCommands.front();
 
@@ -117,7 +120,7 @@ bool LeavesCollection::addGenericCommand(Command *Cmd) {
     if (OldLeaf == Cmd)
       return false;
 
-    MAllocateDependency(Cmd, OldLeaf, MRecord);
+    MAllocateDependency(Cmd, OldLeaf, MRecord, ToEnqueue);
   }
 
   MGenericCommands.push_back(Cmd);
diff --git a/sycl/source/detail/scheduler/leaves_collection.hpp b/sycl/source/detail/scheduler/leaves_collection.hpp
index 5ebadddbf9698..54b162693355a 100644
--- a/sycl/source/detail/scheduler/leaves_collection.hpp
+++ b/sycl/source/detail/scheduler/leaves_collection.hpp
@@ -39,10 +39,11 @@ class LeavesCollection {
 public:
   using GenericCommandsT = CircularBuffer<Command *>;
   using HostAccessorCommandsT = std::list<EmptyCommand *>;
+  using EnqueueListT = std::vector<Command *>;
 
   // Make first command depend on the second
   using AllocateDependencyF =
-      std::function<void(Command *, Command *, MemObjRecord *)>;
+      std::function<void(Command *, Command *, MemObjRecord *, EnqueueListT &)>;
 
   template <bool IsConst> class IteratorT;
 
@@ -81,7 +82,7 @@ class LeavesCollection {
   }
 
   /// Returns true if insertion took place. Returns false otherwise.
-  bool push_back(value_type Cmd);
+  bool push_back(value_type Cmd, EnqueueListT &ToEnqueue);
 
   /// Replacement for std::remove with subsequent call to erase(newEnd, end()).
   /// This function is introduced here due to complexity of iterator.
@@ -125,8 +126,8 @@ class LeavesCollection {
 
   AllocateDependencyF MAllocateDependency;
 
-  bool addGenericCommand(value_type Cmd);
-  bool addHostAccessorCommand(EmptyCommand *Cmd);
+  bool addGenericCommand(value_type Cmd, EnqueueListT &ToEnqueue);
+  bool addHostAccessorCommand(EmptyCommand *Cmd, EnqueueListT &ToEnqueue);
 
   // inserts a command to the end of list for its mem object
   void insertHostAccessorCommand(EmptyCommand *Cmd);
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 995b6e2a13ac8..1f646ff6acf5b 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -26,7 +26,8 @@ __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 namespace detail {
 
-void Scheduler::waitForRecordToFinish(MemObjRecord *Record) {
+void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
+                                      ReadLockT &GraphReadLock) {
 #ifdef XPTI_ENABLE_INSTRUMENTATION
   // Will contain the list of dependencies for the Release Command
   std::set<Command *> DepCommands;
@@ -40,7 +41,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record) {
     // Capture the dependencies
     DepCommands.insert(Cmd);
 #endif
-    GraphProcessor::waitForEvent(Cmd->getEvent());
+    GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock);
   }
   for (Command *Cmd : Record->MWriteLeaves) {
     EnqueueResultT Res;
@@ -50,7 +51,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record) {
 #ifdef XPTI_ENABLE_INSTRUMENTATION
     DepCommands.insert(Cmd);
 #endif
-    GraphProcessor::waitForEvent(Cmd->getEvent());
+    GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock);
   }
   for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) {
     Command *ReleaseCmd = AllocaCmd->getReleaseCmd();
@@ -63,7 +64,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record) {
     // reported as edges
     ReleaseCmd->resolveReleaseDependencies(DepCommands);
 #endif
-    GraphProcessor::waitForEvent(ReleaseCmd->getEvent());
+    GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock);
   }
 }
 
@@ -71,6 +72,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
                               QueueImplPtr Queue) {
   EventImplPtr NewEvent = nullptr;
   const bool IsKernel = CommandGroup->getType() == CG::KERNEL;
+  std::vector<Command *> AuxiliaryCmds;
   const bool IsHostKernel = CommandGroup->getType() == CG::RUN_ON_HOST_INTEL;
   vector_class<StreamImplPtr> Streams;
 
@@ -87,31 +89,36 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
   }
 
   {
-    std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
-    lockSharedTimedMutex(Lock);
+    WriteLockT Lock(MGraphLock, std::defer_lock);
+    acquireWriteLock(Lock);
 
     Command *NewCmd = nullptr;
     switch (CommandGroup->getType()) {
     case CG::UPDATE_HOST:
       NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup),
-                                             DefaultHostQueue);
+                                             DefaultHostQueue, AuxiliaryCmds);
       break;
     case CG::CODEPLAY_HOST_TASK:
-      NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue);
+      NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue,
+                                   AuxiliaryCmds);
       break;
     default:
-      NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue));
+      NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue),
+                                   AuxiliaryCmds);
     }
     NewEvent = NewCmd->getEvent();
   }
 
   {
-    std::shared_lock<std::shared_timed_mutex> Lock(MGraphLock);
+    ReadLockT Lock(MGraphLock);
 
     Command *NewCmd = static_cast<Command *>(NewEvent->getCommand());
 
+    EnqueueResultT Res;
+    bool Enqueued;
+
     auto CleanUp = [&]() {
-      if (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0) {
+      if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) {
         if (IsHostKernel)
           static_cast<ExecCGCommand *>(NewCmd)->releaseCG();
 
@@ -120,6 +127,20 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
       }
     };
 
+    for (Command *Cmd : AuxiliaryCmds) {
+      Enqueued = GraphProcessor::enqueueCommand(Cmd, Res);
+      try {
+        if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
+          throw runtime_error("Auxiliary enqueue process failed.",
+                              PI_INVALID_OPERATION);
+      } catch (...) {
+        // enqueueCommand() func and if statement above may throw an exception,
+        // so destroy required resources to avoid memory leak
+        CleanUp();
+        std::rethrow_exception(std::current_exception());
+      }
+    }
+
     if (NewCmd) {
       // TODO: Check if lazy mode.
       EnqueueResultT Res;
@@ -150,17 +171,30 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
 }
 
 EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
-  std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
-  lockSharedTimedMutex(Lock);
-  Command *NewCmd = MGraphBuilder.addCopyBack(Req);
-  // Command was not creted because there were no operations with
-  // buffer.
-  if (!NewCmd)
-    return nullptr;
+  std::vector<Command *> AuxiliaryCmds;
+  Command *NewCmd = nullptr;
+  {
+    WriteLockT Lock(MGraphLock, std::defer_lock);
+    acquireWriteLock(Lock);
+    NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds);
+    // Command was not creted because there were no operations with
+    // buffer.
+    if (!NewCmd)
+      return nullptr;
+  }
 
   try {
+    ReadLockT Lock(MGraphLock);
     EnqueueResultT Res;
-    bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res);
+    bool Enqueued;
+
+    for (Command *Cmd : AuxiliaryCmds) {
+      Enqueued = GraphProcessor::enqueueCommand(Cmd, Res);
+      if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
+        throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
+    }
+
+    Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res);
     if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
       throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
   } catch (...) {
@@ -174,13 +208,15 @@ Scheduler &Scheduler::getInstance() {
 }
 
 std::vector<EventImplPtr> Scheduler::getWaitList(EventImplPtr Event) {
-  std::shared_lock<std::shared_timed_mutex> Lock(MGraphLock);
+  ReadLockT Lock(MGraphLock);
   return GraphProcessor::getWaitList(std::move(Event));
 }
 
 void Scheduler::waitForEvent(EventImplPtr Event) {
-  std::shared_lock<std::shared_timed_mutex> Lock(MGraphLock);
-  GraphProcessor::waitForEvent(std::move(Event));
+  ReadLockT Lock(MGraphLock);
+  // It's fine to leave the lock unlocked upon return from waitForEvent as
+  // there's no more actions to do here with graph
+  GraphProcessor::waitForEvent(std::move(Event), Lock, /*LockTheLock=*/false);
 }
 
 static void deallocateStreams(
@@ -203,8 +239,7 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
     // Avoiding deadlock situation, where one thread is in the process of
     // enqueueing (with a locked mutex) a currently blocked task that waits for
     // another thread which is stuck at attempting cleanup.
-    std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock,
-                                                   std::try_to_lock);
+    WriteLockT Lock(MGraphLock, std::try_to_lock);
     if (Lock.owns_lock()) {
       auto FinishedCmd = static_cast<Command *>(FinishedEvent->getCommand());
       // The command might have been cleaned up (and set to nullptr) by another
@@ -222,12 +257,13 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
   // objects, this is needed to guarantee that streamed data is printed and
   // resources are released.
   std::vector<std::shared_ptr<stream_impl>> StreamsToDeallocate;
+
   {
     MemObjRecord *Record = nullptr;
-    std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
+    WriteLockT Lock(MGraphLock, std::defer_lock);
 
     {
-      lockSharedTimedMutex(Lock);
+      acquireWriteLock(Lock);
 
       Record = MGraphBuilder.getMemObjRecord(MemObj);
       if (!Record)
@@ -240,12 +276,12 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
     {
       // This only needs a shared mutex as it only involves enqueueing and
       // awaiting for events
-      std::shared_lock<std::shared_timed_mutex> Lock(MGraphLock);
-      waitForRecordToFinish(Record);
+      ReadLockT Lock(MGraphLock);
+      waitForRecordToFinish(Record, Lock);
     }
 
     {
-      lockSharedTimedMutex(Lock);
+      acquireWriteLock(Lock);
       MGraphBuilder.decrementLeafCountersForRecord(Record);
       MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate);
       MGraphBuilder.removeRecordForMemObj(MemObj);
@@ -255,24 +291,42 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
 }
 
 EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
-  std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
-  lockSharedTimedMutex(Lock);
+  std::vector<Command *> AuxiliaryCmds;
+  Command *NewCmd = nullptr;
+
+  {
+    WriteLockT Lock(MGraphLock, std::defer_lock);
+    acquireWriteLock(Lock);
 
-  Command *NewCmd = MGraphBuilder.addHostAccessor(Req);
+    NewCmd = MGraphBuilder.addHostAccessor(Req, AuxiliaryCmds);
+  }
 
   if (!NewCmd)
     return nullptr;
-  EnqueueResultT Res;
-  bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res);
-  if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
-    throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
+
+  {
+    ReadLockT ReadLock(MGraphLock);
+    EnqueueResultT Res;
+    bool Enqueued;
+
+    for (Command *Cmd : AuxiliaryCmds) {
+      Enqueued = GraphProcessor::enqueueCommand(Cmd, Res);
+      if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
+        throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
+    }
+
+    Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res);
+    if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
+      throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
+  }
+
   return NewCmd->getEvent();
 }
 
 void Scheduler::releaseHostAccessor(Requirement *Req) {
   Command *const BlockedCmd = Req->MBlockedCmd;
 
-  std::shared_lock<std::shared_timed_mutex> Lock(MGraphLock);
+  ReadLockT Lock(MGraphLock);
 
   assert(BlockedCmd && "Can't find appropriate command to unblock");
 
@@ -292,6 +346,7 @@ void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req) {
         throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
     }
   };
+
   EnqueueLeaves(Record->MReadLeaves);
   EnqueueLeaves(Record->MWriteLeaves);
 }
@@ -336,8 +391,7 @@ Scheduler::~Scheduler() {
   }
 }
 
-void Scheduler::lockSharedTimedMutex(
-    std::unique_lock<std::shared_timed_mutex> &Lock) {
+void Scheduler::acquireWriteLock(WriteLockT &Lock) {
 #ifdef _WIN32
   // Avoiding deadlock situation for MSVC. std::shared_timed_mutex specification
   // does not specify a priority for shared and exclusive accesses. It will be a
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index 917fc0e1a3ee3..5193d61858849 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -450,12 +450,17 @@ class Scheduler {
   ~Scheduler();
 
 protected:
+  // TODO: after switching to C++17, change std::shared_timed_mutex to
+  // std::shared_mutex
+  using RWLockT = std::shared_timed_mutex;
+  using ReadLockT = std::shared_lock<RWLockT>;
+  using WriteLockT = std::unique_lock<RWLockT>;
+
   /// Provides exclusive access to std::shared_timed_mutex object with deadlock
   /// avoidance
   ///
-  /// \param Lock is an instance of std::unique_lock<std::shared_timed_mutex>
-  /// class
-  void lockSharedTimedMutex(std::unique_lock<std::shared_timed_mutex> &Lock);
+  /// \param Lock is an instance of WriteLockT, created with \c std::defer_lock
+  void acquireWriteLock(WriteLockT &Lock);
 
   static void enqueueLeavesOfReqUnlocked(const Requirement *const Req);
 
@@ -474,25 +479,27 @@ class Scheduler {
     /// \sa queue::submit, Scheduler::addCG
     ///
     /// \return a command that represents command group execution.
-    Command *addCG(std::unique_ptr<detail::CG> CommandGroup,
-                   QueueImplPtr Queue);
+    Command *addCG(std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
+                   std::vector<Command *> &ToEnqueue);
 
     /// Registers a \ref CG "command group" that updates host memory to the
     /// latest state.
     ///
     /// \return a command that represents command group execution.
     Command *addCGUpdateHost(std::unique_ptr<detail::CG> CommandGroup,
-                             QueueImplPtr HostQueue);
+                             QueueImplPtr HostQueue,
+                             std::vector<Command *> &ToEnqueue);
 
     /// Enqueues a command to update memory to the latest state.
     ///
     /// \param Req is a requirement, that describes memory object.
-    Command *addCopyBack(Requirement *Req);
+    Command *addCopyBack(Requirement *Req, std::vector<Command *> &ToEnqueue);
 
     /// Enqueues a command to create a host accessor.
     ///
     /// \param Req points to memory being accessed.
-    Command *addHostAccessor(Requirement *Req);
+    Command *addHostAccessor(Requirement *Req,
+                             std::vector<Command *> &ToEnqueue);
 
     /// [Provisional] Optimizes the whole graph.
     void optimize();
@@ -522,7 +529,8 @@ class Scheduler {
     /// \return a pointer to MemObjRecord for pointer to memory object. If the
     /// record is not found, nullptr is returned.
     MemObjRecord *getOrInsertMemObjRecord(const QueueImplPtr &Queue,
-                                          const Requirement *Req);
+                                          const Requirement *Req,
+                                          std::vector<Command *> &ToEnqueue);
 
     /// Decrements leaf counters for all leaves of the record.
     void decrementLeafCountersForRecord(MemObjRecord *Record);
@@ -537,7 +545,8 @@ class Scheduler {
 
     /// Adds new command to leaves if needed.
     void addNodeToLeaves(MemObjRecord *Record, Command *Cmd,
-                         access::mode AccessMode);
+                         access::mode AccessMode,
+                         std::vector<Command *> &ToEnqueue);
 
     /// Removes commands from leaves.
     void updateLeaves(const std::set<Command *> &Cmds, MemObjRecord *Record,
@@ -547,10 +556,11 @@ class Scheduler {
     /// \param Cmd dependant command
     /// \param DepEvent event to depend on
     /// \param Dep optional DepDesc to perform connection properly
+    /// \returns the connecting command which is to be enqueued
     ///
     /// Optionality of Dep is set by Dep.MDepCommand equal to nullptr.
-    void connectDepEvent(Command *const Cmd, EventImplPtr DepEvent,
-                         const DepDesc &Dep);
+    Command *connectDepEvent(Command *const Cmd, EventImplPtr DepEvent,
+                             const DepDesc &Dep);
 
     std::vector<SYCLMemObjI *> MMemObjs;
 
@@ -565,16 +575,19 @@ class Scheduler {
     /// \param Req is a Requirement describing destination.
     /// \param Queue is a queue that is bound to target context.
     Command *insertMemoryMove(MemObjRecord *Record, Requirement *Req,
-                              const QueueImplPtr &Queue);
+                              const QueueImplPtr &Queue,
+                              std::vector<Command *> &ToEnqueue);
 
     // Inserts commands required to remap the memory object to its current host
     // context so that the required access mode becomes valid.
     Command *remapMemoryObject(MemObjRecord *Record, Requirement *Req,
-                               AllocaCommandBase *HostAllocaCmd);
+                               AllocaCommandBase *HostAllocaCmd,
+                               std::vector<Command *> &ToEnqueue);
 
     UpdateHostRequirementCommand *
     insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req,
-                           const QueueImplPtr &Queue);
+                           const QueueImplPtr &Queue,
+                           std::vector<Command *> &ToEnqueue);
 
     /// Finds dependencies for the requirement.
     std::set<Command *> findDepsForReq(MemObjRecord *Record,
@@ -586,7 +599,8 @@ class Scheduler {
         std::is_same<typename std::remove_cv_t<T>, Requirement>::value,
         EmptyCommand *>
     addEmptyCmd(Command *Cmd, const std::vector<T *> &Req,
-                const QueueImplPtr &Queue, Command::BlockReason Reason);
+                const QueueImplPtr &Queue, Command::BlockReason Reason,
+                std::vector<Command *> &ToEnqueue);
 
   protected:
     /// Finds a command dependency corresponding to the record.
@@ -605,9 +619,10 @@ class Scheduler {
     /// Searches for suitable alloca in memory record.
     ///
     /// If none found, creates new one.
-    AllocaCommandBase *getOrCreateAllocaForReq(MemObjRecord *Record,
-                                               const Requirement *Req,
-                                               QueueImplPtr Queue);
+    AllocaCommandBase *
+    getOrCreateAllocaForReq(MemObjRecord *Record, const Requirement *Req,
+                            QueueImplPtr Queue,
+                            std::vector<Command *> &ToEnqueue);
 
     void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req);
 
@@ -711,22 +726,38 @@ class Scheduler {
     static std::vector<EventImplPtr> getWaitList(EventImplPtr Event);
 
     /// Waits for the command, associated with Event passed, is completed.
-    static void waitForEvent(EventImplPtr Event);
+    /// \param GraphReadLock read-lock which is already acquired for reading
+    /// \param LockTheLock selects if graph lock should be locked upon return
+    ///
+    /// The function may unlock and lock GraphReadLock as needed. Upon return
+    /// the lock is left in locked state if and only if LockTheLock is true.
+    static void waitForEvent(EventImplPtr Event, ReadLockT &GraphReadLock,
+                             bool LockTheLock = true);
 
     /// Enqueues the command and all its dependencies.
     ///
     /// \param EnqueueResult is set to specific status if enqueue failed.
+    /// \param GraphReadLock read-lock which is already acquired for reading
     /// \return true if the command is successfully enqueued.
+    ///
+    /// The function may unlock and lock GraphReadLock as needed. Upon return
+    /// the lock is left in locked state.
     static bool enqueueCommand(Command *Cmd, EnqueueResultT &EnqueueResult,
                                BlockingT Blocking = NON_BLOCKING);
   };
 
-  void waitForRecordToFinish(MemObjRecord *Record);
+  /// This function waits on all of the graph leaves which somehow use the
+  /// memory object which is represented by \c Record. The function is called
+  /// upon destruction of memory buffer.
+  /// \param Record memory record to await graph leaves of to finish
+  /// \param GraphReadLock locked graph read lock
+  ///
+  /// GraphReadLock will be unlocked/locked as needed. Upon return from the
+  /// function, GraphReadLock will be left in locked state.
+  void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock);
 
   GraphBuilder MGraphBuilder;
-  // TODO: after switching to C++17, change std::shared_timed_mutex to
-  // std::shared_mutex
-  std::shared_timed_mutex MGraphLock;
+  RWLockT MGraphLock;
 
   QueueImplPtr DefaultHostQueue;
 
diff --git a/sycl/unittests/scheduler/AllocaLinking.cpp b/sycl/unittests/scheduler/AllocaLinking.cpp
index 129bab848cb3b..33348079e0abf 100644
--- a/sycl/unittests/scheduler/AllocaLinking.cpp
+++ b/sycl/unittests/scheduler/AllocaLinking.cpp
@@ -68,11 +68,13 @@ TEST_F(SchedulerTest, AllocaLinking) {
     buffer<int, 1> Buf(range<1>(1));
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    std::vector<detail::Command *> AuxCmds;
+    detail::MemObjRecord *Record =
+        MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl);
+        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);
+        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
 
     EXPECT_FALSE(HostAllocaCmd->MLinkedAllocaCmd);
     EXPECT_FALSE(NonHostAllocaCmd->MLinkedAllocaCmd);
@@ -83,11 +85,13 @@ TEST_F(SchedulerTest, AllocaLinking) {
         range<1>(1), {ext::oneapi::property::buffer::use_pinned_host_memory()});
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    std::vector<detail::Command *> AuxCmds;
+    detail::MemObjRecord *Record =
+        MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl);
+        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);
+        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
 
     EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd);
     EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd);
@@ -98,11 +102,13 @@ TEST_F(SchedulerTest, AllocaLinking) {
     buffer<int, 1> Buf(range<1>(1));
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    std::vector<detail::Command *> AuxCmds;
+    detail::MemObjRecord *Record =
+        MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl);
+        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);
+        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
 
     EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd);
     EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd);
diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp
index c447a8f1e606b..967b3ee75531c 100644
--- a/sycl/unittests/scheduler/BlockedCommands.cpp
+++ b/sycl/unittests/scheduler/BlockedCommands.cpp
@@ -19,6 +19,8 @@ TEST_F(SchedulerTest, BlockedCommands) {
   MockCmd.MIsBlockable = true;
   MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY;
 
+  MockScheduler MS;
+  auto Lock = MS.acquireGraphReadLock();
   detail::EnqueueResultT Res;
   bool Enqueued =
       MockScheduler::enqueueCommand(&MockCmd, Res, detail::NON_BLOCKING);
@@ -84,6 +86,8 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) {
   EXPECT_CALL(C, enqueue(_, _)).Times(0);
   EXPECT_CALL(D, enqueue(_, _)).Times(0);
 
+  MockScheduler MS;
+  auto Lock = MS.acquireGraphReadLock();
   detail::EnqueueResultT Res;
   bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING);
   ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n";
@@ -112,6 +116,8 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) {
   EXPECT_CALL(A, enqueue(_, _)).Times(0);
   EXPECT_CALL(B, enqueue(_, _)).Times(0);
 
+  MockScheduler MS;
+  auto Lock = MS.acquireGraphReadLock();
   detail::EnqueueResultT Res;
   bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING);
   ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n";
@@ -148,7 +154,7 @@ TEST_F(SchedulerTest, EnqueueHostDependency) {
       new cl::sycl::detail::event_impl(detail::getSyclObjImpl(MQueue))};
   DepEvent->setCommand(&B);
 
-  A.addDep(DepEvent);
+  (void)A.addDep(DepEvent);
 
   // We have such a "graph":
   //
@@ -163,6 +169,8 @@ TEST_F(SchedulerTest, EnqueueHostDependency) {
   EXPECT_CALL(A, enqueue(_, _)).Times(1);
   EXPECT_CALL(B, enqueue(_, _)).Times(1);
 
+  MockScheduler MS;
+  auto Lock = MS.acquireGraphReadLock();
   detail::EnqueueResultT Res;
   bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING);
   ASSERT_TRUE(Enqueued) << "The command should be enqueued\n";
diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp
index 02c9f9ba1d865..37a7a71a4afdc 100644
--- a/sycl/unittests/scheduler/FailedCommands.cpp
+++ b/sycl/unittests/scheduler/FailedCommands.cpp
@@ -16,10 +16,12 @@ TEST_F(SchedulerTest, FailedDependency) {
   MockCommand MDep(detail::getSyclObjImpl(MQueue));
   MockCommand MUser(detail::getSyclObjImpl(MQueue));
   MDep.addUser(&MUser);
-  MUser.addDep(detail::DepDesc{&MDep, &MockReq, nullptr});
+  (void)MUser.addDep(detail::DepDesc{&MDep, &MockReq, nullptr});
   MUser.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady;
   MDep.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueFailed;
 
+  MockScheduler MS;
+  auto Lock = MS.acquireGraphReadLock();
   detail::EnqueueResultT Res;
   bool Enqueued =
       MockScheduler::enqueueCommand(&MUser, Res, detail::NON_BLOCKING);
diff --git a/sycl/unittests/scheduler/FinishedCmdCleanup.cpp b/sycl/unittests/scheduler/FinishedCmdCleanup.cpp
index abafde6cd2c5a..34401108187e1 100644
--- a/sycl/unittests/scheduler/FinishedCmdCleanup.cpp
+++ b/sycl/unittests/scheduler/FinishedCmdCleanup.cpp
@@ -21,8 +21,9 @@ TEST_F(SchedulerTest, FinishedCmdCleanup) {
   detail::Requirement MockReqA = getMockRequirement(BufA);
   detail::Requirement MockReqB = getMockRequirement(BufB);
   detail::Requirement MockReqC = getMockRequirement(BufC);
-  detail::MemObjRecord *RecC =
-      MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(MQueue), &MockReqC);
+  std::vector<detail::Command *> AuxCmds;
+  detail::MemObjRecord *RecC = MS.getOrInsertMemObjRecord(
+      detail::getSyclObjImpl(MQueue), &MockReqC, AuxCmds);
 
   // Create a graph and check that all inner nodes have been deleted and
   // their users have had the corresponding dependency replaced with a
@@ -59,13 +60,15 @@ TEST_F(SchedulerTest, FinishedCmdCleanup) {
       detail::getSyclObjImpl(MQueue), MockReqA, Callback);
   addEdge(InnerC, &AllocaA, &AllocaA);
 
+  std::vector<detail::Command *> ToEnqueue;
+
   MockCommand LeafB{detail::getSyclObjImpl(MQueue), MockReqB};
   addEdge(&LeafB, &AllocaB, &AllocaB);
-  MS.addNodeToLeaves(RecC, &LeafB);
+  MS.addNodeToLeaves(RecC, &LeafB, access::mode::read, ToEnqueue);
 
   MockCommand LeafA{detail::getSyclObjImpl(MQueue), MockReqA};
   addEdge(&LeafA, InnerC, &AllocaA);
-  MS.addNodeToLeaves(RecC, &LeafA);
+  MS.addNodeToLeaves(RecC, &LeafA, access::mode::read, ToEnqueue);
 
   MockCommand *InnerB = new MockCommandWithCallback(
       detail::getSyclObjImpl(MQueue), MockReqB, Callback);
diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
index cec7c1772852c..14c8a802d1772 100644
--- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp
+++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
@@ -112,18 +112,21 @@ TEST_F(SchedulerTest, InOrderQueueDeps) {
   buffer<int, 1> Buf(&val, range<1>(1));
   detail::Requirement Req = getMockRequirement(Buf);
 
+  std::vector<detail::Command *> AuxCmds;
   detail::MemObjRecord *Record =
-      MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req);
-  MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl);
-  MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);
+      MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req, AuxCmds);
+  MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl, AuxCmds);
+  MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
 
   // Check that sequential memory movements submitted to the same in-order
   // queue do not depend on each other.
-  detail::Command *Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue);
+  detail::Command *Cmd =
+      MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds);
   detail::EnqueueResultT Res;
+  auto ReadLock = MS.acquireGraphReadLock();
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
-  Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl);
+  Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl, AuxCmds);
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
-  Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue);
+  Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds);
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
 }
diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp
index d840099a0048d..ffed74ba0e1ec 100644
--- a/sycl/unittests/scheduler/LeafLimit.cpp
+++ b/sycl/unittests/scheduler/LeafLimit.cpp
@@ -29,8 +29,9 @@ TEST_F(SchedulerTest, LeafLimit) {
 
   MockDepCmd =
       std::make_unique<MockCommand>(detail::getSyclObjImpl(MQueue), MockReq);
-  detail::MemObjRecord *Rec =
-      MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(MQueue), &MockReq);
+  std::vector<detail::Command *> AuxCmds;
+  detail::MemObjRecord *Rec = MS.getOrInsertMemObjRecord(
+      detail::getSyclObjImpl(MQueue), &MockReq, AuxCmds);
 
   // Create commands that will be added as leaves exceeding the limit by 1
   for (std::size_t i = 0; i < Rec->MWriteLeaves.genericCommandsCapacity() + 1;
@@ -41,12 +42,13 @@ TEST_F(SchedulerTest, LeafLimit) {
   // Create edges: all soon-to-be leaves are direct users of MockDep
   for (auto &Leaf : LeavesToAdd) {
     MockDepCmd->addUser(Leaf.get());
-    Leaf->addDep(
+    (void)Leaf->addDep(
         detail::DepDesc{MockDepCmd.get(), Leaf->getRequirement(), nullptr});
   }
+  std::vector<cl::sycl::detail::Command *> ToEnqueue;
   // Add edges as leaves and exceed the leaf limit
   for (auto &LeafPtr : LeavesToAdd) {
-    MS.addNodeToLeaves(Rec, LeafPtr.get());
+    MS.addNodeToLeaves(Rec, LeafPtr.get(), access::mode::write, ToEnqueue);
   }
   // Check that the oldest leaf has been removed from the leaf list
   // and added as a dependency of the newest one instead
diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp
index ee5a3952cb0c6..a731f960c0c4a 100644
--- a/sycl/unittests/scheduler/LeavesCollection.cpp
+++ b/sycl/unittests/scheduler/LeavesCollection.cpp
@@ -51,8 +51,13 @@ TEST_F(LeavesCollectionTest, PushBack) {
 
   size_t TimesGenericWasFull;
 
+  std::vector<cl::sycl::detail::Command *> ToEnqueue;
+
   LeavesCollection::AllocateDependencyF AllocateDependency =
-      [&](Command *, Command *, MemObjRecord *) { ++TimesGenericWasFull; };
+      [&](Command *, Command *, MemObjRecord *,
+          std::vector<cl::sycl::detail::Command *> &) {
+        ++TimesGenericWasFull;
+      };
 
   // add only generic commands
   {
@@ -65,7 +70,7 @@ TEST_F(LeavesCollectionTest, PushBack) {
     for (size_t Idx = 0; Idx < GenericCmdsCapacity * 2; ++Idx) {
       Cmds.push_back(createGenericCommand(getSyclObjImpl(MQueue)));
 
-      LE.push_back(Cmds.back().get());
+      LE.push_back(Cmds.back().get(), ToEnqueue);
     }
 
     ASSERT_EQ(TimesGenericWasFull, GenericCmdsCapacity)
@@ -95,7 +100,7 @@ TEST_F(LeavesCollectionTest, PushBack) {
                          : createEmptyCommand(getSyclObjImpl(MQueue), MockReq);
       Cmds.push_back(Cmd);
 
-      LE.push_back(Cmds.back().get());
+      LE.push_back(Cmds.back().get(), ToEnqueue);
     }
 
     ASSERT_EQ(TimesGenericWasFull, GenericCmdsCapacity)
@@ -112,8 +117,11 @@ TEST_F(LeavesCollectionTest, PushBack) {
 TEST_F(LeavesCollectionTest, Remove) {
   static constexpr size_t GenericCmdsCapacity = 8;
 
+  std::vector<cl::sycl::detail::Command *> ToEnqueue;
+
   LeavesCollection::AllocateDependencyF AllocateDependency =
-      [](Command *, Command *Old, MemObjRecord *) { --Old->MLeafCounter; };
+      [](Command *, Command *Old, MemObjRecord *,
+         std::vector<cl::sycl::detail::Command *> &) { --Old->MLeafCounter; };
 
   {
     cl::sycl::buffer<int, 1> Buf(cl::sycl::range<1>(1));
@@ -129,7 +137,7 @@ TEST_F(LeavesCollectionTest, Remove) {
                          : createEmptyCommand(getSyclObjImpl(MQueue), MockReq);
       Cmds.push_back(Cmd);
 
-      if (LE.push_back(Cmds.back().get()))
+      if (LE.push_back(Cmds.back().get(), ToEnqueue))
         ++Cmd->MLeafCounter;
     }
 
diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
index 6056c94b445b2..aa10bb446c32c 100644
--- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
+++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
@@ -68,7 +68,8 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) {
       /*PropList=*/{}));
 
   auto AllocaDep = [](cl::sycl::detail::Command *, cl::sycl::detail::Command *,
-                      cl::sycl::detail::MemObjRecord *) {};
+                      cl::sycl::detail::MemObjRecord *,
+                      std::vector<cl::sycl::detail::Command *> &) {};
 
   std::shared_ptr<cl::sycl::detail::MemObjRecord> Record{
       new cl::sycl::detail::MemObjRecord(DefaultHostQueue->getContextImplPtr(),
@@ -84,11 +85,12 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) {
   MockCommand DepDepCmd(DefaultHostQueue, Req);
   DepCmd.MDeps.push_back({&DepDepCmd, DepDepCmd.getRequirement(), &AllocaCmd1});
   DepDepCmd.MUsers.insert(&DepCmd);
-  Record->MWriteLeaves.push_back(&DepCmd);
+  std::vector<cl::sycl::detail::Command *> ToEnqueue;
+  Record->MWriteLeaves.push_back(&DepCmd, ToEnqueue);
 
   MockScheduler MS;
   cl::sycl::detail::Command *AllocaCmd2 =
-      MS.getOrCreateAllocaForReq(Record.get(), &Req, Q1);
+      MS.getOrCreateAllocaForReq(Record.get(), &Req, Q1, ToEnqueue);
 
   ASSERT_TRUE(!!AllocaCmd1.MLinkedAllocaCmd)
       << "No link appeared in existing command";
diff --git a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp
index d35ece2454203..17429831f4257 100644
--- a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp
+++ b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp
@@ -17,8 +17,9 @@ TEST_F(SchedulerTest, MemObjCommandCleanup) {
   buffer<int, 1> BufB(range<1>(1));
   detail::Requirement MockReqA = getMockRequirement(BufA);
   detail::Requirement MockReqB = getMockRequirement(BufB);
-  detail::MemObjRecord *RecA =
-      MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(MQueue), &MockReqA);
+  std::vector<detail::Command *> AuxCmds;
+  detail::MemObjRecord *RecA = MS.getOrInsertMemObjRecord(
+      detail::getSyclObjImpl(MQueue), &MockReqA, AuxCmds);
 
   // Create 2 fake allocas, one of which will be cleaned up
   detail::AllocaCommand *MockAllocaA =
diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
index 9dc561295eb86..ed66cc0c9f60f 100644
--- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
+++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
@@ -89,9 +89,11 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     buffer<int, 1> Buf(&val, range<1>(1));
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    std::vector<detail::Command *> AuxCmds;
+    detail::MemObjRecord *Record =
+        MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl);
+        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
 
     // Both non-host and host allocations should be created in this case in
     // order to perform a memory move.
@@ -102,7 +104,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd);
     EXPECT_TRUE(Record->MCurContext->is_host());
 
-    detail::Command *MemoryMove = MS.insertMemoryMove(Record, &Req, QImpl);
+    detail::Command *MemoryMove =
+        MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds);
     EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY);
   }
   // Check non-host alloca with discard access modes
@@ -116,8 +119,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
 
     // No need to create a host allocation in this case since the data can be
     // discarded.
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
-    MS.getOrCreateAllocaForReq(Record, &DiscardReq, QImpl);
+    std::vector<detail::Command *> AuxCmds;
+    detail::MemObjRecord *Record =
+        MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &DiscardReq, QImpl, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 1U);
   }
   // Check non-host alloca without user pointer
@@ -127,8 +132,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
 
     // No need to create a host allocation in this case since there's no data to
     // initialize the buffer with.
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
-    MS.getOrCreateAllocaForReq(Record, &Req, QImpl);
+    std::vector<detail::Command *> AuxCmds;
+    detail::MemObjRecord *Record =
+        MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 1U);
   }
   // Check host -> non-host alloca
@@ -139,18 +146,20 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
 
     // No special handling required: alloca commands are created one after
     // another and the transfer is done via a write operation.
+    std::vector<detail::Command *> AuxCmds;
     detail::MemObjRecord *Record =
-        MS.getOrInsertMemObjRecord(DefaultHostQueue, &Req);
+        MS.getOrInsertMemObjRecord(DefaultHostQueue, &Req, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);
+        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 1U);
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl);
+        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 2U);
     EXPECT_TRUE(!HostAllocaCmd->MLinkedAllocaCmd);
     EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd);
 
-    detail::Command *MemoryMove = MS.insertMemoryMove(Record, &Req, QImpl);
+    detail::Command *MemoryMove =
+        MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds);
     EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY);
   }
   // Check that memory movement operations work correctly with/after discard
@@ -163,13 +172,15 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     detail::Requirement DiscardReq = getMockRequirement(Buf);
     DiscardReq.MAccessMode = access::mode::discard_read_write;
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
-    MS.getOrCreateAllocaForReq(Record, &Req, QImpl);
-    MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);
+    std::vector<detail::Command *> AuxCmds;
+    detail::MemObjRecord *Record =
+        MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
 
     // Memory movement operations should be omitted for discard access modes.
     detail::Command *MemoryMove =
-        MS.insertMemoryMove(Record, &DiscardReq, DefaultHostQueue);
+        MS.insertMemoryMove(Record, &DiscardReq, DefaultHostQueue, AuxCmds);
     EXPECT_TRUE(MemoryMove == nullptr);
     // The current context for the record should still be modified.
     EXPECT_EQ(Record->MCurContext, DefaultHostQueue->getContextImplPtr());
diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp
index 304f79319e0b2..ba244fa7c0bb8 100644
--- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp
+++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp
@@ -22,6 +22,14 @@
 
 cl::sycl::detail::Requirement getMockRequirement();
 
+namespace cl {
+namespace sycl {
+namespace detail {
+class Command;
+} // namespace detail
+} // namespace sycl
+} // namespace cl
+
 class MockCommand : public cl::sycl::detail::Command {
 public:
   MockCommand(cl::sycl::detail::QueueImplPtr Queue,
@@ -91,8 +99,9 @@ class MockScheduler : public cl::sycl::detail::Scheduler {
 public:
   cl::sycl::detail::MemObjRecord *
   getOrInsertMemObjRecord(const cl::sycl::detail::QueueImplPtr &Queue,
-                          cl::sycl::detail::Requirement *Req) {
-    return MGraphBuilder.getOrInsertMemObjRecord(Queue, Req);
+                          cl::sycl::detail::Requirement *Req,
+                          std::vector<cl::sycl::detail::Command *> &ToEnqueue) {
+    return MGraphBuilder.getOrInsertMemObjRecord(Queue, Req, ToEnqueue);
   }
 
   void removeRecordForMemObj(cl::sycl::detail::SYCLMemObjI *MemObj) {
@@ -105,10 +114,11 @@ class MockScheduler : public cl::sycl::detail::Scheduler {
     MGraphBuilder.cleanupCommandsForRecord(Rec, StreamsToDeallocate);
   }
 
-  void addNodeToLeaves(
-      cl::sycl::detail::MemObjRecord *Rec, cl::sycl::detail::Command *Cmd,
-      cl::sycl::access::mode Mode = cl::sycl::access::mode::read_write) {
-    return MGraphBuilder.addNodeToLeaves(Rec, Cmd, Mode);
+  void addNodeToLeaves(cl::sycl::detail::MemObjRecord *Rec,
+                       cl::sycl::detail::Command *Cmd,
+                       cl::sycl::access::mode Mode,
+                       std::vector<cl::sycl::detail::Command *> &ToEnqueue) {
+    return MGraphBuilder.addNodeToLeaves(Rec, Cmd, Mode, ToEnqueue);
   }
 
   static bool enqueueCommand(cl::sycl::detail::Command *Cmd,
@@ -120,21 +130,26 @@ class MockScheduler : public cl::sycl::detail::Scheduler {
   cl::sycl::detail::AllocaCommandBase *
   getOrCreateAllocaForReq(cl::sycl::detail::MemObjRecord *Record,
                           const cl::sycl::detail::Requirement *Req,
-                          cl::sycl::detail::QueueImplPtr Queue) {
-    return MGraphBuilder.getOrCreateAllocaForReq(Record, Req, Queue);
+                          cl::sycl::detail::QueueImplPtr Queue,
+                          std::vector<cl::sycl::detail::Command *> &ToEnqueue) {
+    return MGraphBuilder.getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
   }
 
+  ReadLockT acquireGraphReadLock() { return ReadLockT{MGraphLock}; }
+
   cl::sycl::detail::Command *
   insertMemoryMove(cl::sycl::detail::MemObjRecord *Record,
                    cl::sycl::detail::Requirement *Req,
-                   const cl::sycl::detail::QueueImplPtr &Queue) {
-    return MGraphBuilder.insertMemoryMove(Record, Req, Queue);
+                   const cl::sycl::detail::QueueImplPtr &Queue,
+                   std::vector<cl::sycl::detail::Command *> &ToEnqueue) {
+    return MGraphBuilder.insertMemoryMove(Record, Req, Queue, ToEnqueue);
   }
 
   cl::sycl::detail::Command *
   addCG(std::unique_ptr<cl::sycl::detail::CG> CommandGroup,
-        cl::sycl::detail::QueueImplPtr Queue) {
-    return MGraphBuilder.addCG(std::move(CommandGroup), Queue);
+        cl::sycl::detail::QueueImplPtr Queue,
+        std::vector<cl::sycl::detail::Command *> &ToEnqueue) {
+    return MGraphBuilder.addCG(std::move(CommandGroup), Queue, ToEnqueue);
   }
 };
 
diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
index 083fc3eb40630..720247e38c27b 100644
--- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
+++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
@@ -128,7 +128,8 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) {
   initStream(Streams[0], HQueueImpl);
 
   MockScheduler MS;
-  detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl);
+  std::vector<detail::Command *> AuxCmds;
+  detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds);
   ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler";
   ASSERT_GT(NewCmd->MDeps.size(), 0u)
       << "No deps appeared in the new exec kernel command";
diff --git a/sycl/unittests/scheduler/utils.cpp b/sycl/unittests/scheduler/utils.cpp
index 3e80c485bc458..b6bb23b4325d8 100644
--- a/sycl/unittests/scheduler/utils.cpp
+++ b/sycl/unittests/scheduler/utils.cpp
@@ -10,7 +10,8 @@
 
 void addEdge(cl::sycl::detail::Command *User, cl::sycl::detail::Command *Dep,
              cl::sycl::detail::AllocaCommandBase *Alloca) {
-  User->addDep(cl::sycl::detail::DepDesc{Dep, User->getRequirement(), Alloca});
+  (void)User->addDep(
+      cl::sycl::detail::DepDesc{Dep, User->getRequirement(), Alloca});
   Dep->addUser(User);
 }