-
Notifications
You must be signed in to change notification settings - Fork 802
[SYCL] Defer buffer release when no host memory to be updated #6837
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 47 commits
0601210
aff3be6
1195b59
8d05802
b54b8e4
965a015
27ccbff
9540fe0
c00c7cb
661dace
6615db3
8174dc3
d55405e
bb2c4fb
5db9e85
53a1892
4b0a3fa
8daea20
c855f13
8dbcd1c
0f61c64
ddf215b
23bea82
aa41d76
e296d03
179c472
2076c7c
e911d0a
81c2b09
edcfcfc
b5e85de
a5980a0
ac06f1b
09b8359
1e75448
484b1cf
1061322
d4537a3
342ff91
fdab0e7
0872d7c
3a25f1e
28f008d
92b5e15
6247f8a
4133862
79b2125
868973c
1bc8e57
6e0943b
1c62d08
30dfaf2
60e3011
6964876
3d5315e
467a9ea
0b9032a
9d570ce
c6d5dc7
a0b37ef
619ee4e
3187f0a
dbe88e2
06e2608
71e9048
a89e577
ceea7f8
1f201a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1379,8 +1379,40 @@ pi_result piKernelRelease(pi_kernel) { DIE_NO_IMPLEMENTATION; } | |
|
|
||
| pi_result piEventCreate(pi_context, pi_event *) { DIE_NO_IMPLEMENTATION; } | ||
|
|
||
| pi_result piEventGetInfo(pi_event, pi_event_info, size_t, void *, size_t *) { | ||
| DIE_NO_IMPLEMENTATION; | ||
| pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, | ||
| size_t ParamValueSize, void *ParamValue, | ||
| size_t *ParamValueSizeRet) { | ||
| if (ParamName != PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { | ||
| DIE_NO_IMPLEMENTATION; | ||
| } | ||
|
|
||
| auto CheckAndFillStatus = [&](const cm_support::CM_STATUS &State) { | ||
| pi_int32 Result = PI_EVENT_RUNNING; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this an appropriate default?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It think it would be better to set
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hello @kbobrovs and @dongkyunahn-intel I did only two states intentionally, because other statuses (!= CM_STATUS_FINISHED) seems logical to map to running since it is all "work in progress" and RT does not need more details. I also used L0 plugin as reference which also has differentiation for two states - finished and not finished = running. I think it would be better to align plugins and not introduce extra value handling on level above. What do you think?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I was not aware that L0 has such implementation. Meanwhile, I looked into CM_EMU's
'CM_STATUS_RESET' is defined, but never used in CM_EMU. You can disregard it. |
||
| if (State == cm_support::CM_STATUS_FINISHED) | ||
| Result = PI_EVENT_COMPLETE; | ||
| if (ParamValue) { | ||
| if (ParamValueSize < sizeof(Result)) | ||
| return PI_ERROR_INVALID_VALUE; | ||
| *static_cast<pi_int32 *>(ParamValue) = Result; | ||
| } | ||
| if (ParamValueSizeRet) { | ||
| *ParamValueSizeRet = sizeof(Result); | ||
| } | ||
| return PI_SUCCESS; | ||
| }; | ||
| // Dummy event is already completed ones done by CM. | ||
| if (Event->IsDummyEvent) | ||
| return CheckAndFillStatus(cm_support::CM_STATUS_FINISHED); | ||
|
|
||
| if (Event->CmEventPtr == nullptr) | ||
| return PI_ERROR_INVALID_EVENT; | ||
|
|
||
| cm_support::CM_STATUS Status; | ||
| int32_t Result = Event->CmEventPtr->GetStatus(Status); | ||
| if (Result != cm_support::CM_SUCCESS) | ||
| return PI_ERROR_COMMAND_EXECUTION_FAILURE; | ||
|
|
||
| return CheckAndFillStatus(Status); | ||
| } | ||
|
|
||
| pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -47,6 +47,14 @@ T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types... Args) { | |||||
| return *IWL.Inst; | ||||||
| } | ||||||
|
|
||||||
| void GlobalHandler::attachScheduler(Scheduler *Scheduler) { | ||||||
| // The method is for testing purposes. Do not protect with lock since | ||||||
|
||||||
| // The method is for testing purposes. Do not protect with lock since | |
| // The method is used in unittests only. Do not protect with lock since |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:-) fixed in 71e9048
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,18 @@ namespace sycl { | |
| __SYCL_INLINE_VER_NAMESPACE(_V1) { | ||
| namespace detail { | ||
|
|
||
| bool Scheduler::checkLeavesCompletion(MemObjRecord *Record) { | ||
| for (Command *Cmd : Record->MReadLeaves) { | ||
| if (!Cmd->getEvent()->isCompleted()) | ||
| return false; | ||
| } | ||
| for (Command *Cmd : Record->MWriteLeaves) { | ||
| if (!Cmd->getEvent()->isCompleted()) | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| void Scheduler::waitForRecordToFinish(MemObjRecord *Record, | ||
| ReadLockT &GraphReadLock) { | ||
| #ifdef XPTI_ENABLE_INSTRUMENTATION | ||
|
|
@@ -271,21 +283,17 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { | |
| std::vector<std::shared_ptr<const void>> AuxResourcesToDeallocate; | ||
|
|
||
| { | ||
| MemObjRecord *Record = nullptr; | ||
| MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); | ||
| if (!Record) | ||
| // No operations were performed on the mem object | ||
| return; | ||
|
|
||
| { | ||
| // This only needs a shared mutex as it only involves enqueueing and | ||
| // awaiting for events | ||
| ReadLockT Lock(MGraphLock); | ||
|
|
||
| Record = MGraphBuilder.getMemObjRecord(MemObj); | ||
| if (!Record) | ||
| // No operations were performed on the mem object | ||
| return; | ||
|
|
||
| waitForRecordToFinish(Record, Lock); | ||
| } | ||
|
|
||
| { | ||
| WriteLockT Lock(MGraphLock, std::defer_lock); | ||
| acquireWriteLock(Lock); | ||
|
|
@@ -410,10 +418,31 @@ Scheduler::~Scheduler() { | |
| "not all resources were released. Please be sure that all kernels " | ||
| "have synchronization points.\n\n"); | ||
| } | ||
| // Please be aware that releaseResources should be called before deletion of | ||
| // Scheduler. Otherwise there can be the case when objects Scheduler keeps as | ||
| // fields may need Scheduler for their release and they work with Scheduler | ||
| // via GlobalHandler::getScheduler that will create new Scheduler object. | ||
| // Still keep it here but it should do almost nothing if releaseResources | ||
| // called before. | ||
| releaseResources(); | ||
| } | ||
|
|
||
| void Scheduler::releaseResources() { | ||
| // There might be some commands scheduled for post enqueue cleanup that | ||
| // haven't been freed because of the graph mutex being locked at the time, | ||
| // clean them up now. | ||
| cleanupCommands({}); | ||
| DefaultHostQueue.reset(); | ||
|
||
|
|
||
| // We need loop since sometimes we may need new objects to be added to | ||
| // deferred mem objects storage during cleanup. Known example is: we cleanup | ||
| // existing deferred mem objects under write lock, during this process we | ||
| // cleanup commands related to this record, command may have last reference to | ||
| // queue_impl, ~queue_impl is called and buffer for assert (which is created | ||
| // with size only so all confitions for deferred release are satisfied) is | ||
| // added to deferred mem obj storage. So we may end up with leak. | ||
| while (!isDeferredMemObjectsEmpty()) | ||
| cleanupDeferredMemObjects(BlockingT::BLOCKING); | ||
| } | ||
|
|
||
| void Scheduler::acquireWriteLock(WriteLockT &Lock) { | ||
|
|
@@ -442,8 +471,8 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { | |
| } | ||
|
|
||
| void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) { | ||
| if (Cmds.empty()) | ||
| { | ||
| cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); | ||
| if (Cmds.empty()) { | ||
| std::lock_guard<std::mutex> Lock{MDeferredCleanupMutex}; | ||
| if (MDeferredCleanupCommands.empty()) | ||
| return; | ||
|
|
@@ -472,6 +501,54 @@ void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) { | |
| } | ||
| } | ||
|
|
||
| void Scheduler::deferMemObjRelease(const std::shared_ptr<SYCLMemObjI> &MemObj) { | ||
| { | ||
| std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex}; | ||
| MDeferredMemObjRelease.push_back(MemObj); | ||
| } | ||
| cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); | ||
| } | ||
|
|
||
| inline bool Scheduler::isDeferredMemObjectsEmpty() { | ||
| std::lock_guard<std::mutex> Lock{MDeferredMemReleaseMutex}; | ||
| return MDeferredMemObjRelease.empty(); | ||
| } | ||
|
|
||
| void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { | ||
| if (isDeferredMemObjectsEmpty()) | ||
| return; | ||
| if (Blocking == BlockingT::BLOCKING) { | ||
| std::vector<std::shared_ptr<SYCLMemObjI>> TempStorage; | ||
| { | ||
| std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex}; | ||
| MDeferredMemObjRelease.swap(TempStorage); | ||
| } | ||
| // if any objects in TempStorage exist - it is leaving scope and being | ||
| // deleted | ||
| } | ||
|
|
||
| std::vector<std::shared_ptr<SYCLMemObjI>> ObjsReadyToRelease; | ||
| { | ||
|
|
||
| ReadLockT Lock = ReadLockT(MGraphLock, std::try_to_lock); | ||
| if (Lock.owns_lock()) { | ||
| // Not expected that Blocking == true will be used in parallel with | ||
| // adding MemObj to storage, no such scenario. | ||
| std::lock_guard<std::mutex> LockDef{MDeferredMemReleaseMutex}; | ||
| auto MemObjIt = MDeferredMemObjRelease.begin(); | ||
| while (MemObjIt != MDeferredMemObjRelease.end()) { | ||
| MemObjRecord *Record = MGraphBuilder.getMemObjRecord((*MemObjIt).get()); | ||
| if (!checkLeavesCompletion(Record)) { | ||
| MemObjIt++; | ||
| continue; | ||
| } | ||
| ObjsReadyToRelease.push_back(*MemObjIt); | ||
| MemObjIt = MDeferredMemObjRelease.erase(MemObjIt); | ||
| } | ||
| } | ||
| } | ||
| // if any ObjsReadyToRelease found - it is leaving scope and being deleted | ||
| } | ||
| } // namespace detail | ||
| } // __SYCL_INLINE_VER_NAMESPACE(_V1) | ||
| } // namespace sycl | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -442,9 +442,13 @@ class Scheduler { | |||||
| QueueImplPtr getDefaultHostQueue() { return DefaultHostQueue; } | ||||||
|
|
||||||
| static MemObjRecord *getMemObjRecord(const Requirement *const Req); | ||||||
| // Virtual for testing purposes only | ||||||
|
||||||
| // Virtual for testing purposes only | |
| // Virtual for testing purposes only |
Is it still relevant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nope, fixed in 71e9048
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please clarify why inline is needed here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed in 71e9048
you are right, compiler will decide
Uh oh!
There was an error while loading. Please reload this page.