-
Notifications
You must be signed in to change notification settings - Fork 808
[SYCL][ESIMD][EMU] Queue/Event handling change for esimd_cpu #4201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
2d26bcf
c291db3
05e656b
29d46df
349dc85
1987a64
e309c7f
65d39cd
605bd5e
a26379a
16b3de6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -351,7 +351,8 @@ extern "C" { | |
| std::cerr << "Warning : Not Implemented : " << __FUNCTION__ \ | ||
| << " - File : " << __FILE__; \ | ||
| std::cerr << " / Line : " << __LINE__ << std::endl; \ | ||
| } | ||
| } \ | ||
| return PI_SUCCESS; | ||
|
|
||
| pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, | ||
| pi_uint32 *NumPlatforms) { | ||
|
|
@@ -684,6 +685,12 @@ pi_result piContextRelease(pi_context Context) { | |
|
|
||
| pi_result piQueueCreate(pi_context Context, pi_device Device, | ||
| pi_queue_properties Properties, pi_queue *Queue) { | ||
| if (Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { | ||
| // TODO : Support Out-of-order Queue with piQueueFinish | ||
| *Queue = nullptr; | ||
| return PI_INVALID_QUEUE_PROPERTIES; | ||
| } | ||
|
|
||
| cm_support::CmQueue *CmQueue; | ||
|
|
||
| int Result = Context->Device->CmDevicePtr->CreateQueue(CmQueue); | ||
|
|
@@ -729,7 +736,9 @@ pi_result piQueueRelease(pi_queue Queue) { | |
| } | ||
|
|
||
| pi_result piQueueFinish(pi_queue) { | ||
| DIE_NO_IMPLEMENTATION; | ||
| // TODO/FIXME : Only in-order queue is called for this API. Support | ||
| // Out-of-order Queue with piQueueCreate | ||
| CONTINUE_NO_IMPLEMENTATION; | ||
| } | ||
|
|
||
| pi_result piextQueueGetNativeHandle(pi_queue, pi_native_handle *) { | ||
|
|
@@ -1190,6 +1199,12 @@ pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, | |
|
|
||
| _pi_buffer *buf = static_cast<_pi_buffer *>(Src); | ||
|
|
||
| std::unique_ptr<_pi_event> RetEv{nullptr}; | ||
| if (Event) { | ||
| RetEv = std::unique_ptr<_pi_event>(new _pi_event()); | ||
| RetEv->IsDummyEvent = true; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I understand your current implementation, with all commands being blocking, all events returned from enqueue operations are effectively in "completed" state once control is returned back to the caller of the enqueue operation.
The same applies to other places where events are created.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @romanovvlad suggested this implementation from CUDA PI implementation from recent phone call. Also, he was fine with this 'DummyEvent' approach.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
OK, now I see the point - unique_ptr is useful to automatically release the memory allocated by new _pi_even in case buf->CmBufferPtr->ReadSurface throws an exception. Disregard this my comment then.
|
||
| } | ||
|
|
||
| int Status = | ||
| buf->CmBufferPtr->ReadSurface(reinterpret_cast<unsigned char *>(Dst), | ||
| nullptr, // event | ||
|
|
@@ -1200,18 +1215,7 @@ pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, | |
| } | ||
|
|
||
| if (Event) { | ||
| try { | ||
| *Event = new _pi_event(); | ||
| } catch (const std::bad_alloc &) { | ||
| return PI_OUT_OF_HOST_MEMORY; | ||
| } catch (...) { | ||
| return PI_ERROR_UNKNOWN; | ||
| } | ||
|
|
||
| // At this point, CM already completed buffer-read (ReadSurface) | ||
| // operation. Therefore, 'event' corresponding to this operation | ||
| // is marked as dummy one and ignored during events-waiting. | ||
| (*Event)->IsDummyEvent = true; | ||
| *Event = RetEv.release(); | ||
| } | ||
|
|
||
| return PI_SUCCESS; | ||
|
|
@@ -1286,6 +1290,14 @@ pi_result piEnqueueMemImageRead(pi_queue CommandQueue, pi_mem Image, | |
| assert(false && "ESIMD_CPU does not support Blocking Read"); | ||
| } | ||
| _pi_image *PiImg = static_cast<_pi_image *>(Image); | ||
|
|
||
| std::unique_ptr<_pi_event> RetEv{nullptr}; | ||
|
|
||
| if (Event) { | ||
| RetEv = std::unique_ptr<_pi_event>(new _pi_event()); | ||
| RetEv->IsDummyEvent = true; | ||
| } | ||
|
|
||
| int Status = | ||
| PiImg->CmSurfacePtr->ReadSurface(reinterpret_cast<unsigned char *>(Ptr), | ||
| nullptr, // event | ||
|
|
@@ -1295,18 +1307,7 @@ pi_result piEnqueueMemImageRead(pi_queue CommandQueue, pi_mem Image, | |
| } | ||
|
|
||
| if (Event) { | ||
| try { | ||
| *Event = new _pi_event(); | ||
| } catch (const std::bad_alloc &) { | ||
| return PI_OUT_OF_HOST_MEMORY; | ||
| } catch (...) { | ||
| return PI_ERROR_UNKNOWN; | ||
| } | ||
|
|
||
| // At this point, CM already completed image-read (ReadSurface) | ||
| // operation. Therefore, 'event' corresponding to this operation | ||
| // is marked as dummy one and ignored during events-waiting. | ||
| (*Event)->IsDummyEvent = true; | ||
| *Event = RetEv.release(); | ||
| } | ||
| return PI_SUCCESS; | ||
| } | ||
|
|
@@ -1360,25 +1361,39 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, | |
| } | ||
| } | ||
|
|
||
| std::unique_ptr<_pi_event> RetEv{nullptr}; | ||
|
|
||
| if (Event) { | ||
| RetEv = std::unique_ptr<_pi_event>(new _pi_event()); | ||
| RetEv->IsDummyEvent = true; | ||
| } | ||
|
|
||
| switch (WorkDim) { | ||
| case 1: | ||
| InvokeImpl<1>::invoke(Kernel, GlobalWorkOffset, GlobalWorkSize, | ||
| LocalWorkSize); | ||
| return PI_SUCCESS; | ||
| break; | ||
|
|
||
| case 2: | ||
| InvokeImpl<2>::invoke(Kernel, GlobalWorkOffset, GlobalWorkSize, | ||
| LocalWorkSize); | ||
| return PI_SUCCESS; | ||
| break; | ||
|
|
||
| case 3: | ||
| InvokeImpl<3>::invoke(Kernel, GlobalWorkOffset, GlobalWorkSize, | ||
| LocalWorkSize); | ||
| return PI_SUCCESS; | ||
| break; | ||
|
|
||
| default: | ||
| DIE_NO_IMPLEMENTATION; | ||
| break; | ||
| } | ||
|
|
||
| if (Event) { | ||
| *Event = RetEv.release(); | ||
| } | ||
|
|
||
| return PI_SUCCESS; | ||
| } | ||
|
|
||
| pi_result piextKernelCreateWithNativeHandle(pi_native_handle, pi_context, bool, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
piQueueFinishmust guarantee that all previously enqueued commands had completed. The fact that the queue is in-order does not guarantee that. In-order queue just means enqueued commands will execute in the enqueue order. Still, they can can execute asynchronously with the thread which enqueues the commands.As far as I understand your current implementation, it is safe for
piQueueFinishto be no-op just because all the commands like kernel execution or memory copy are blocking - enqueue of any command with ESIMD EMU plugin does not return until the command is actually finished.If so, please change the comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed the comment.