@@ -654,8 +654,11 @@ void CHIPQueueOpenCL::recordEvent(chipstar::Event *ChipEvent) {
654654 logTrace (" chipstar::Queue::recordEvent({})" , (void *)ChipEvent);
655655 auto ChipEventCL = static_cast <CHIPEventOpenCL *>(ChipEvent);
656656
657- std::shared_ptr<chipstar::Event> LastEvent = getLastEvent ();
658- ChipEventCL->recordEventCopy (LastEvent ? LastEvent : enqueueMarker ());
657+ // Need profiling command queue for querying timestamps for possible
658+ // later hipEventElapsedTime() calls.
659+ switchModeTo (Profiling);
660+
661+ ChipEventCL->recordEventCopy (enqueueMarker ());
659662 ChipEventCL->setRecording ();
660663}
661664
@@ -1084,24 +1087,25 @@ void CHIPQueueOpenCL::MemMap(const chipstar::AllocationInfo *AllocInfo,
10841087 auto [SyncQueuesEventHandles, EventLocks] =
10851088 addDependenciesQueueSync (MemMapEvent);
10861089
1090+ auto QueueHandle = get ()->get ();
10871091 cl_int Status;
10881092 if (Type == chipstar::Queue::MEM_MAP_TYPE::HOST_READ) {
10891093 logDebug (" CHIPQueueOpenCL::MemMap HOST_READ" );
1090- Status = clEnqueueSVMMap (ClQueue_-> get (), CL_TRUE, CL_MAP_READ,
1091- AllocInfo-> HostPtr , AllocInfo->Size ,
1092- SyncQueuesEventHandles.size (),
1093- SyncQueuesEventHandles.data (), MemMapEventNative);
1094+ Status =
1095+ clEnqueueSVMMap (QueueHandle, CL_TRUE, CL_MAP_READ , AllocInfo->HostPtr ,
1096+ AllocInfo-> Size , SyncQueuesEventHandles.size (),
1097+ SyncQueuesEventHandles.data (), MemMapEventNative);
10941098 } else if (Type == chipstar::Queue::MEM_MAP_TYPE::HOST_WRITE) {
10951099 logDebug (" CHIPQueueOpenCL::MemMap HOST_WRITE" );
1096- Status = clEnqueueSVMMap (ClQueue_-> get (), CL_TRUE, CL_MAP_WRITE,
1097- AllocInfo-> HostPtr , AllocInfo->Size ,
1098- SyncQueuesEventHandles.size (),
1099- SyncQueuesEventHandles.data (), MemMapEventNative);
1100+ Status =
1101+ clEnqueueSVMMap (QueueHandle, CL_TRUE, CL_MAP_WRITE , AllocInfo->HostPtr ,
1102+ AllocInfo-> Size , SyncQueuesEventHandles.size (),
1103+ SyncQueuesEventHandles.data (), MemMapEventNative);
11001104 } else if (Type == chipstar::Queue::MEM_MAP_TYPE::HOST_READ_WRITE) {
11011105 logDebug (" CHIPQueueOpenCL::MemMap HOST_READ_WRITE" );
1102- Status = clEnqueueSVMMap (ClQueue_-> get () , CL_TRUE,
1103- CL_MAP_READ | CL_MAP_WRITE , AllocInfo->HostPtr ,
1104- AllocInfo-> Size , SyncQueuesEventHandles.size (),
1106+ Status = clEnqueueSVMMap (QueueHandle , CL_TRUE, CL_MAP_READ | CL_MAP_WRITE ,
1107+ AllocInfo-> HostPtr , AllocInfo->Size ,
1108+ SyncQueuesEventHandles.size (),
11051109 SyncQueuesEventHandles.data (), MemMapEventNative);
11061110 } else {
11071111 assert (0 && " Invalid MemMap Type" );
@@ -1123,13 +1127,23 @@ void CHIPQueueOpenCL::MemUnmap(const chipstar::AllocationInfo *AllocInfo) {
11231127 addDependenciesQueueSync (MemMapEvent);
11241128
11251129 auto Status = clEnqueueSVMUnmap (
1126- ClQueue_ ->get (), AllocInfo->HostPtr , SyncQueuesEventHandles.size (),
1130+ get () ->get (), AllocInfo->HostPtr , SyncQueuesEventHandles.size (),
11271131 SyncQueuesEventHandles.data (),
11281132 std::static_pointer_cast<CHIPEventOpenCL>(MemMapEvent)->getNativePtr ());
11291133 assert (Status == CL_SUCCESS);
11301134}
11311135
1132- cl::CommandQueue *CHIPQueueOpenCL::get () { return ClQueue_; }
1136+ cl::CommandQueue *CHIPQueueOpenCL::get () {
1137+ switch (QueueMode_) {
1138+ default :
1139+ assert (!" Unknown queue mode!" );
1140+ // Fallthrough.
1141+ case Profiling:
1142+ return &ClProfilingQueue_;
1143+ case Regular:
1144+ return &ClRegularQueue_;
1145+ }
1146+ }
11331147
11341148void CHIPQueueOpenCL::addCallback (hipStreamCallback_t Callback,
11351149 void *UserData) {
@@ -1187,7 +1201,7 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback,
11871201 CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorTbd);
11881202
11891203 updateLastEvent (CallbackCompleted);
1190- ClQueue_ ->flush ();
1204+ get () ->flush ();
11911205
11921206 // Now the CB can start executing in the background:
11931207 clSetUserEventStatus (
@@ -1251,7 +1265,7 @@ CHIPQueueOpenCL::launchImpl(chipstar::ExecItem *ExecItem) {
12511265 auto [SyncQueuesEventHandles, EventLocks] =
12521266 addDependenciesQueueSync (LaunchEvent);
12531267 auto Status = clEnqueueNDRangeKernel (
1254- ClQueue_ ->get (), KernelHandle, NumDims, GlobalOffset, Global, Local,
1268+ get () ->get (), KernelHandle, NumDims, GlobalOffset, Global, Local,
12551269 SyncQueuesEventHandles.size (), SyncQueuesEventHandles.data (),
12561270 std::static_pointer_cast<CHIPEventOpenCL>(LaunchEvent)->getNativePtr ());
12571271 CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorTbd);
@@ -1289,7 +1303,7 @@ CHIPQueueOpenCL::launchImpl(chipstar::ExecItem *ExecItem) {
12891303}
12901304
12911305CHIPQueueOpenCL::CHIPQueueOpenCL (chipstar::Device *ChipDevice, int Priority,
1292- cl_command_queue Queue )
1306+ cl_command_queue QueueForInterop )
12931307 : chipstar::Queue(ChipDevice, chipstar::QueueFlags{}, Priority) {
12941308
12951309 cl_queue_priority_khr PrioritySelection;
@@ -1312,31 +1326,57 @@ CHIPQueueOpenCL::CHIPQueueOpenCL(chipstar::Device *ChipDevice, int Priority,
13121326 if (PrioritySelection != CL_QUEUE_PRIORITY_MED_KHR)
13131327 logWarn (" CHIPQueueOpenCL is ignoring Priority value" );
13141328
1315- if (Queue)
1316- ClQueue_ = new cl::CommandQueue (Queue);
1317- else {
1318- cl::Context *ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get ();
1319- cl::Device *ClDevice_ = ((CHIPDeviceOpenCL *)ChipDevice_)->get ();
1329+ if (QueueForInterop) {
1330+ auto QFromUser = cl::CommandQueue (QueueForInterop);
1331+
13201332 cl_int Status;
1321- // Adding priority breaks correctness?
1322- // cl_queue_properties QueueProperties[] = {
1323- // CL_QUEUE_PRIORITY_KHR, PrioritySelection, CL_QUEUE_PROPERTIES,
1324- // CL_QUEUE_PROFILING_ENABLE, 0};
1325- cl_queue_properties QueueProperties[] = {CL_QUEUE_PROPERTIES,
1326- CL_QUEUE_PROFILING_ENABLE, 0 };
1333+ cl_command_queue_properties QProps =
1334+ QFromUser.getInfo <CL_QUEUE_PROPERTIES>(&Status);
1335+ CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS,
1336+ hipErrorInitializationError);
13271337
1328- const cl_command_queue Q = clCreateCommandQueueWithProperties (
1329- ClContext_->get (), ClDevice_->get (), QueueProperties, &Status);
1330- ClQueue_ = new cl::CommandQueue (Q);
1338+ bool ProfilingIsEnabled = CL_QUEUE_PROFILING_ENABLE & QProps;
1339+ if (ProfilingIsEnabled) {
1340+ ClProfilingQueue_ = QFromUser;
1341+ QueueMode_ = Profiling;
1342+ } else {
1343+ logWarn (" Provided queue has profiling disable. hipEventElapsedTime() "
1344+ " calls will fail." );
1345+ ClRegularQueue_ = QFromUser;
1346+ QueueMode_ = Regular;
1347+ }
1348+ UsedInInterOp = true ;
1349+ } else {
1350+ cl::Context &ClContext =
1351+ *static_cast <CHIPContextOpenCL *>(ChipContext_)->get ();
1352+ cl::Device &ClDevice = *static_cast <CHIPDeviceOpenCL *>(ChipDevice_)->get ();
13311353
1354+ cl_queue_properties QPropsForProfiling[] = {CL_QUEUE_PROPERTIES,
1355+ CL_QUEUE_PROFILING_ENABLE, 0 };
1356+
1357+ cl_int Status;
1358+ ClRegularQueue_ = cl::CommandQueue (
1359+ clCreateCommandQueueWithProperties (ClContext.get (), ClDevice.get (),
1360+ nullptr , &Status),
1361+ false );
13321362 CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS,
13331363 hipErrorInitializationError);
1364+
1365+ if (!ChipEnvVars.getOCLDisableQueueProfiling ()) {
1366+ ClProfilingQueue_ = cl::CommandQueue (
1367+ clCreateCommandQueueWithProperties (ClContext.get (), ClDevice.get (),
1368+ QPropsForProfiling, &Status),
1369+ false );
1370+ CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS,
1371+ hipErrorInitializationError);
1372+ }
1373+
1374+ QueueMode_ = Regular;
13341375 }
13351376}
13361377
13371378CHIPQueueOpenCL::~CHIPQueueOpenCL () {
13381379 logTrace (" ~CHIPQueueOpenCL() {}" , (void *)this );
1339- delete ClQueue_;
13401380}
13411381
13421382std::shared_ptr<chipstar::Event>
@@ -1356,7 +1396,7 @@ CHIPQueueOpenCL::memCopyAsyncImpl(void *Dst, const void *Src, size_t Size) {
13561396 // a maker here, so we can return an event.
13571397 cl::Event MarkerEvent;
13581398 auto Status = clEnqueueMarker (
1359- ClQueue_ ->get (),
1399+ get () ->get (),
13601400 std::static_pointer_cast<CHIPEventOpenCL>(Event)->getNativePtr ());
13611401 CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorTbd);
13621402 } else {
@@ -1365,8 +1405,8 @@ CHIPQueueOpenCL::memCopyAsyncImpl(void *Dst, const void *Src, size_t Size) {
13651405#endif
13661406 auto [SyncQueuesEventHandles, EventLocks] = addDependenciesQueueSync (Event);
13671407 auto Status = ::clEnqueueSVMMemcpy (
1368- ClQueue_ ->get (), CL_FALSE, Dst, Src, Size,
1369- SyncQueuesEventHandles.size (), SyncQueuesEventHandles. data (),
1408+ get () ->get (), CL_FALSE, Dst, Src, Size, SyncQueuesEventHandles. size () ,
1409+ SyncQueuesEventHandles.data (),
13701410 std::static_pointer_cast<CHIPEventOpenCL>(Event)->getNativePtr ());
13711411 CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorRuntimeMemory);
13721412 }
@@ -1378,7 +1418,7 @@ void CHIPQueueOpenCL::finish() {
13781418#ifdef CHIP_DUBIOUS_LOCKS
13791419 LOCK (Backend->DubiousLockOpenCL )
13801420#endif
1381- auto Status = ClQueue_ ->finish ();
1421+ auto Status = get () ->finish ();
13821422 CHIPERR_CHECK_LOG_AND_ABORT (Status, CL_SUCCESS, hipErrorTbd);
13831423}
13841424
@@ -1391,7 +1431,7 @@ CHIPQueueOpenCL::memFillAsyncImpl(void *Dst, size_t Size, const void *Pattern,
13911431 logTrace (" clSVMmemfill {} / {} B\n " , Dst, Size);
13921432 auto [SyncQueuesEventHandles, EventLocks] = addDependenciesQueueSync (Event);
13931433 int Retval = ::clEnqueueSVMMemFill (
1394- ClQueue_ ->get (), Dst, Pattern, PatternSize, Size,
1434+ get () ->get (), Dst, Pattern, PatternSize, Size,
13951435 SyncQueuesEventHandles.size (), SyncQueuesEventHandles.data (),
13961436 std::static_pointer_cast<CHIPEventOpenCL>(Event)->getNativePtr ());
13971437 CHIPERR_CHECK_LOG_AND_THROW (Retval, CL_SUCCESS, hipErrorRuntimeMemory);
@@ -1433,8 +1473,12 @@ hipError_t CHIPQueueOpenCL::getBackendHandles(uintptr_t *NativeInfo,
14331473 return hipSuccess;
14341474 }
14351475
1476+ // Interop API expects one queue handle. Switch to the profiling queue which
1477+ // works for the interop user and for us in case we need profiling later.
1478+ switchModeTo (Profiling);
1479+
14361480 // Get queue handler
1437- NativeInfo[4 ] = (uintptr_t )ClQueue_ ->get ();
1481+ NativeInfo[4 ] = (uintptr_t )get () ->get ();
14381482
14391483 // Get context handler
14401484 cl::Context *Ctx = ((CHIPContextOpenCL *)ChipContext_)->get ();
@@ -1481,15 +1525,15 @@ std::shared_ptr<chipstar::Event> CHIPQueueOpenCL::enqueueBarrierImpl(
14811525 SyncQueuesEventHandles.push_back (Event);
14821526 }
14831527 auto Status = clEnqueueBarrierWithWaitList (
1484- ClQueue_ ->get (), SyncQueuesEventHandles.size (),
1528+ get () ->get (), SyncQueuesEventHandles.size (),
14851529 SyncQueuesEventHandles.data (),
14861530 &(std::static_pointer_cast<CHIPEventOpenCL>(Event)->getNativeRef ()));
14871531 CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorTbd);
14881532 } else {
14891533 // auto Status = ClQueue_->enqueueBarrierWithWaitList(nullptr, &Barrier);
14901534 auto [SyncQueuesEventHandles, EventLocks] = addDependenciesQueueSync (Event);
14911535 auto Status = clEnqueueBarrierWithWaitList (
1492- ClQueue_ ->get (), SyncQueuesEventHandles.size (),
1536+ get () ->get (), SyncQueuesEventHandles.size (),
14931537 SyncQueuesEventHandles.data (),
14941538 std::static_pointer_cast<CHIPEventOpenCL>(Event)->getNativePtr ());
14951539 CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorTbd);
@@ -1502,6 +1546,47 @@ std::shared_ptr<chipstar::Event> CHIPQueueOpenCL::enqueueBarrierImpl(
15021546 return Event;
15031547}
15041548
1549+ void CHIPQueueOpenCL::switchModeTo (QueueMode ToMode) {
1550+ if (QueueMode_ == ToMode)
1551+ return ;
1552+
1553+ if (ToMode == Profiling && ChipEnvVars.getOCLDisableQueueProfiling ()) {
1554+ logWarn (" Queue profiling is disabled by request. hipEventElapsedTime() "
1555+ " will not work." );
1556+ return ;
1557+ }
1558+
1559+ // Can't switch mode if the queue is used for interop. Otherwise, we
1560+ // may cause situations where commands enqueued via HIP and other
1561+ // API end up be executed out of order.
1562+ if (UsedInInterOp)
1563+ CHIPERR_LOG_AND_THROW (" Can't switch mode for a queue used of interop." ,
1564+ hipErrorTbd);
1565+
1566+ logDebug (" Queue {}: switching mode to {}" , (void *)this ,
1567+ ToMode == Profiling ? " profiling" : " regular" );
1568+
1569+ // Make sure commands are executed in-order across the current and
1570+ // switched-to queue.
1571+ auto &FromQ = *get ();
1572+ auto &ToQ = ToMode == Profiling ? ClProfilingQueue_ : ClRegularQueue_;
1573+ assert (FromQ.get ());
1574+ assert (ToQ.get ());
1575+
1576+ cl_event SwitchEv;
1577+ cl_int Status;
1578+ Status = clEnqueueMarkerWithWaitList (FromQ.get (), 0 , nullptr , &SwitchEv);
1579+ CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorTbd);
1580+
1581+ Status = clEnqueueBarrierWithWaitList (ToQ.get (), 1 , &SwitchEv, nullptr );
1582+ CHIPERR_CHECK_LOG_AND_THROW (Status, CL_SUCCESS, hipErrorTbd);
1583+
1584+ auto *ChipEv = new CHIPEventOpenCL (
1585+ static_cast <CHIPContextOpenCL *>(ChipContext_), SwitchEv);
1586+ updateLastEvent (std::shared_ptr<chipstar::Event>(ChipEv));
1587+ QueueMode_ = ToMode;
1588+ }
1589+
15051590// CHIPExecItemOpenCL
15061591// *************************************************************************
15071592
0 commit comments