-
Notifications
You must be signed in to change notification settings - Fork 808
[SYCL] Defer buffer release when no host memory to be updated #6837
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
0601210
aff3be6
1195b59
8d05802
b54b8e4
965a015
27ccbff
9540fe0
c00c7cb
661dace
6615db3
8174dc3
d55405e
bb2c4fb
5db9e85
53a1892
4b0a3fa
8daea20
c855f13
8dbcd1c
0f61c64
ddf215b
23bea82
aa41d76
e296d03
179c472
2076c7c
e911d0a
81c2b09
edcfcfc
b5e85de
a5980a0
ac06f1b
09b8359
1e75448
484b1cf
1061322
d4537a3
342ff91
fdab0e7
0872d7c
3a25f1e
28f008d
92b5e15
6247f8a
4133862
79b2125
868973c
1bc8e57
6e0943b
1c62d08
30dfaf2
60e3011
6964876
3d5315e
467a9ea
0b9032a
9d570ce
c6d5dc7
a0b37ef
619ee4e
3187f0a
dbe88e2
06e2608
71e9048
a89e577
ceea7f8
1f201a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -26,51 +26,73 @@ namespace sycl { | |||||
| __SYCL_INLINE_VER_NAMESPACE(_V1) { | ||||||
| namespace detail { | ||||||
|
|
||||||
| void Scheduler::waitForRecordToFinish(MemObjRecord *Record, | ||||||
| ReadLockT &GraphReadLock) { | ||||||
| #ifdef XPTI_ENABLE_INSTRUMENTATION | ||||||
| // Will contain the list of dependencies for the Release Command | ||||||
| std::set<Command *> DepCommands; | ||||||
| #endif | ||||||
| bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, | ||||||
| ReadLockT &GraphReadLock, | ||||||
| bool ForceWait) { | ||||||
| assert(Record); | ||||||
| std::vector<Command *> ToCleanUp; | ||||||
| for (Command *Cmd : Record->MReadLeaves) { | ||||||
| if (Cmd->getEvent()->isCompleted()) | ||||||
| continue; | ||||||
|
|
||||||
| EnqueueResultT Res; | ||||||
| bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); | ||||||
| if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) | ||||||
| throw runtime_error("Enqueue process failed.", | ||||||
| PI_ERROR_INVALID_OPERATION); | ||||||
| #ifdef XPTI_ENABLE_INSTRUMENTATION | ||||||
| // Capture the dependencies | ||||||
| DepCommands.insert(Cmd); | ||||||
| #endif | ||||||
| GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); | ||||||
| if (ForceWait) { | ||||||
| GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); | ||||||
| } else | ||||||
| return false; | ||||||
| } | ||||||
| for (Command *Cmd : Record->MWriteLeaves) { | ||||||
| if (Cmd->getEvent()->isCompleted()) | ||||||
| continue; | ||||||
|
|
||||||
| EnqueueResultT Res; | ||||||
| bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); | ||||||
| if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) | ||||||
| throw runtime_error("Enqueue process failed.", | ||||||
| PI_ERROR_INVALID_OPERATION); | ||||||
| #ifdef XPTI_ENABLE_INSTRUMENTATION | ||||||
| DepCommands.insert(Cmd); | ||||||
| #endif | ||||||
| GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); | ||||||
| if (ForceWait) { | ||||||
| GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); | ||||||
| } else | ||||||
| return false; | ||||||
| } | ||||||
| // all dependencies is completed and we can enqueue all ReleaseCmds in advance | ||||||
| for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { | ||||||
| Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); | ||||||
| EnqueueResultT Res; | ||||||
| bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, ToCleanUp); | ||||||
| if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) | ||||||
| throw runtime_error("Enqueue process failed.", | ||||||
| PI_ERROR_INVALID_OPERATION); | ||||||
| if (ReleaseCmd->isSuccessfullyEnqueued()) | ||||||
| continue; | ||||||
| #ifdef XPTI_ENABLE_INSTRUMENTATION | ||||||
| // Will contain the list of dependencies for the Release Command | ||||||
| std::set<Command *> DepCommands; | ||||||
| // Capture the read dependencies | ||||||
| for (Command *Cmd : Record->MWriteLeaves) | ||||||
| DepCommands.insert(Cmd); | ||||||
| for (Command *Cmd : Record->MReadLeaves) | ||||||
| DepCommands.insert(Cmd); | ||||||
| // Report these dependencies to the Command so these dependencies can be | ||||||
| // reported as edges | ||||||
| ReleaseCmd->resolveReleaseDependencies(DepCommands); | ||||||
| #endif | ||||||
| GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock, | ||||||
| ToCleanUp); | ||||||
| EnqueueResultT Res; | ||||||
| bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, ToCleanUp); | ||||||
| if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) | ||||||
| throw runtime_error("Enqueue process failed.", | ||||||
| PI_ERROR_INVALID_OPERATION); | ||||||
| } | ||||||
| // enqueue is fully done and we can check if ReleaseCmd is completed | ||||||
| for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { | ||||||
| Command *Cmd = AllocaCmd->getReleaseCmd(); | ||||||
| if (Cmd->getEvent()->isCompleted()) | ||||||
| continue; | ||||||
| if (ForceWait) | ||||||
| GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); | ||||||
| else | ||||||
| return false; | ||||||
| } | ||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup, | ||||||
|
|
@@ -258,6 +280,18 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { | |||||
| deallocateStreams(StreamsToDeallocate); | ||||||
| } | ||||||
|
|
||||||
| inline void Scheduler::releaseMemObjRecord( | ||||||
| detail::SYCLMemObjI *MemObj, | ||||||
| std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate, | ||||||
| std::vector<std::shared_ptr<const void>> &AuxResourcesToDeallocate) { | ||||||
| MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); | ||||||
| assert(Record); | ||||||
| MGraphBuilder.decrementLeafCountersForRecord(Record); | ||||||
| MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, | ||||||
| AuxResourcesToDeallocate); | ||||||
| MGraphBuilder.removeRecordForMemObj(MemObj); | ||||||
| } | ||||||
|
|
||||||
| void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { | ||||||
| // We are going to traverse a graph of finished commands. Gather stream | ||||||
| // objects from these commands if any and deallocate buffers for these stream | ||||||
|
|
@@ -283,16 +317,14 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { | |||||
| // No operations were performed on the mem object | ||||||
| return; | ||||||
|
|
||||||
| waitForRecordToFinish(Record, Lock); | ||||||
| waitForRecordToFinish(Record, Lock, true); | ||||||
romanovvlad marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| } | ||||||
|
|
||||||
| { | ||||||
| WriteLockT Lock(MGraphLock, std::defer_lock); | ||||||
| acquireWriteLock(Lock); | ||||||
| MGraphBuilder.decrementLeafCountersForRecord(Record); | ||||||
| MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, | ||||||
| AuxResourcesToDeallocate); | ||||||
| MGraphBuilder.removeRecordForMemObj(MemObj); | ||||||
| releaseMemObjRecord(MemObj, StreamsToDeallocate, | ||||||
| AuxResourcesToDeallocate); | ||||||
| } | ||||||
| } | ||||||
| deallocateStreams(StreamsToDeallocate); | ||||||
|
|
@@ -395,6 +427,16 @@ Scheduler::Scheduler() { | |||||
| } | ||||||
|
|
||||||
| Scheduler::~Scheduler() { | ||||||
| // Please be aware that releaseResources should be called before deletion of | ||||||
| // Scheduler. Otherwise there can be the case when objects Scheduler keeps as | ||||||
| // fields may need Scheduler for their release and they work with Scheduler | ||||||
| // via GlobalHandler::getScheduler that will create new Scheduler object. | ||||||
| // Still keep it here but it should no almost nothing if releaseResources | ||||||
romanovvlad marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| // called before. | ||||||
| releaseResources(); | ||||||
| } | ||||||
|
|
||||||
| void Scheduler::releaseResources() { | ||||||
| // By specification there are several possible sync points: buffer | ||||||
romanovvlad marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| // destruction, wait() method of a queue or event. Stream doesn't introduce | ||||||
| // any synchronization point. It is guaranteed that stream is flushed and | ||||||
|
|
@@ -414,6 +456,17 @@ Scheduler::~Scheduler() { | |||||
| // haven't been freed because of the graph mutex being locked at the time, | ||||||
| // clean them up now. | ||||||
| cleanupCommands({}); | ||||||
| DefaultHostQueue.reset(); | ||||||
|
||||||
|
|
||||||
| // We need loop since sometimes we may need new objects to be added to | ||||||
| // deferred mem objects storage during cleanup. Known example is: we cleanup | ||||||
| // existing deferred mem objects under write lock, during this process we | ||||||
| // cleanup commands related to this record, command may have last reference to | ||||||
| // queue_impl, ~queue_impl is called and buffer for assert (which is created | ||||||
| // with size only so all confitions for deferred release are satisfied) is | ||||||
| // added to deferred mem obj storage. So we may end up with leak. | ||||||
| while (!isNoDeferredMemObjects()) | ||||||
| cleanupDeferredMemObjects(true); | ||||||
romanovvlad marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| } | ||||||
|
|
||||||
| void Scheduler::acquireWriteLock(WriteLockT &Lock) { | ||||||
|
|
@@ -442,8 +495,8 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { | |||||
| } | ||||||
|
|
||||||
| void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) { | ||||||
| if (Cmds.empty()) | ||||||
| { | ||||||
| cleanupDeferredMemObjects(false); | ||||||
romanovvlad marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| if (Cmds.empty()) { | ||||||
| std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex}; | ||||||
| if (MDeferredCleanupCommands.empty()) | ||||||
| return; | ||||||
|
|
@@ -472,6 +525,75 @@ void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) { | |||||
| } | ||||||
| } | ||||||
|
|
||||||
| void Scheduler::deferMemObjRelease(const std::shared_ptr<SYCLMemObjI> &MemObj) { | ||||||
| { | ||||||
| std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex}; | ||||||
| MDeferredMemObjRelease.push_back(MemObj); | ||||||
| } | ||||||
| cleanupDeferredMemObjects(false); | ||||||
romanovvlad marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| } | ||||||
|
|
||||||
| inline bool Scheduler::isNoDeferredMemObjects() { | ||||||
| std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex}; | ||||||
| return MDeferredMemObjRelease.empty(); | ||||||
| } | ||||||
|
|
||||||
| void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { | ||||||
romanovvlad marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| if (isNoDeferredMemObjects()) | ||||||
| return; | ||||||
|
|
||||||
| // Need to aggregate ready to release object to acquire write lock once. | ||||||
| std::list<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease; | ||||||
|
||||||
| std::list<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease; | |
| std::vector<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease; |
Probably, it makes sense to use std::vector here as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed in 28f008d
Uh oh!
There was an error while loading. Please reload this page.