diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 9a2997427890d..8a46b7f9002e8 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5408,6 +5408,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, // reference count on the kernel, using the kernel saved in CommandData. PI_CALL(piKernelRetain(Kernel)); + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } + // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) Queue->KernelsToBeSubmitted.push_back(Kernel); @@ -5760,9 +5765,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, return ReturnValue(ContextEndTime); } case PI_PROFILING_INFO_COMMAND_QUEUED: - case PI_PROFILING_INFO_COMMAND_SUBMIT: - // TODO: Support these when Level Zero supported is added. - return ReturnValue(uint64_t{0}); + case PI_PROFILING_INFO_COMMAND_SUBMIT: { + // No solid way of handling possible wrap around as the the event may not be + // signalled by device, thus no way of obtaining event start time + return ReturnValue(Event->submitTime); + } default: zePrint("piEventGetProfilingInfo: not supported ParamName\n"); return PI_ERROR_INVALID_VALUE; @@ -6314,6 +6321,11 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } + // Execute command list asynchronously as the event will be used // to track down its completion. return Queue->executeCommandList(CommandList); @@ -6407,6 +6419,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, insertBarrierIntoCmdList(CmdList, TmpWaitList, *Event, IsInternal)) return Res; + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } if (auto Res = Queue->executeCommandList(CmdList, false, OkToBatch)) return Res; @@ -6678,6 +6694,10 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst, ZE_CALL(zeCommandListAppendMemoryCopy, (ZeCommandList, Dst, Src, Size, ZeEvent, 0, nullptr)); + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } if (auto Res = Queue->executeCommandList(CommandList, BlockingWrite, OkToBatch)) return Res; @@ -6780,6 +6800,10 @@ static pi_result enqueueMemCopyRectHelper( zePrint("calling zeCommandListAppendBarrier() with Event %#lx\n", pi_cast(ZeEvent)); + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } if (auto Res = Queue->executeCommandList(CommandList, Blocking, OkToBatch)) return Res; @@ -6999,6 +7023,10 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr, pi_cast(ZeEvent)); printZeEventList(WaitList); + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } // Execute command list asynchronously, as the event will be used // to track down its completion. if (auto Res = Queue->executeCommandList(CommandList, false, OkToBatch)) @@ -7054,6 +7082,11 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, ze_event_handle_t ZeEvent = nullptr; bool UseCopyEngine = false; + + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } { // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -7512,6 +7545,11 @@ static pi_result enqueueMemImageCommandHelper( return PI_ERROR_INVALID_OPERATION; } + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } + if (auto Res = Queue->executeCommandList(CommandList, IsBlocking, OkToBatch)) return Res; @@ -8417,6 +8455,11 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, // so manually add command to signal our event. ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } + if (auto Res = Queue->executeCommandList(CommandList, false)) return Res; @@ -8484,6 +8527,11 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, // so manually add command to signal our event. ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); + auto res=Queue->Device->getSubmitTime(*Event); + if(res != PI_SUCCESS){ + return res; + } + Queue->executeCommandList(CommandList, false); return PI_SUCCESS; @@ -9004,4 +9052,22 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } +inline pi_result _pi_device::getDeviceTime(uint64_t *deviceTime) { + + uint64_t ZeTimerResolution = ZeDeviceProperties->timerResolution; + uint64_t TimestampMaxCount = ((1ULL << ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t deviceClockCount, dummy; + + ZE_CALL(zeDeviceGetGlobalTimestamps, + (ZeDevice, &dummy, &deviceClockCount)); + *deviceTime = (deviceClockCount & TimestampMaxCount) * ZeTimerResolution; + return PI_SUCCESS; +} + +inline pi_result _pi_device::getSubmitTime(pi_event event) { + if (!event->isProfilingEnabled()) { + return PI_SUCCESS; + } + return getDeviceTime(&(event->submitTime)); +} } // extern "C" diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 7e35310ff93b1..15e91698b964b 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -484,7 +484,7 @@ struct _pi_device : _pi_object { pi_device ParentDevice = nullptr) : ZeDevice{Device}, Platform{Plt}, RootDevice{ParentDevice}, ImmCommandListsPreferred{false}, ZeDeviceProperties{}, - ZeDeviceComputeProperties{} { + ZeDeviceComputeProperties{}{ // NOTE: one must additionally call initialize() to complete // PI device creation. } @@ -569,6 +569,17 @@ struct _pi_device : _pi_object { bool isSubDevice() { return RootDevice != nullptr; } + /// Retreives current wall-clock time from device + /// + /// \param out Variable where device time would be stored + inline pi_result getDeviceTime(uint64_t *out); + + /// Retrieves the current wall-clock time from device + /// and stores it in the submitTime field of the event. + /// + /// \param event + inline pi_result getSubmitTime(pi_event event); + // Cache of the immutable device properties. ZeCache> ZeDeviceProperties; ZeCache> ZeDeviceComputeProperties; @@ -1350,6 +1361,10 @@ struct _pi_event : _pi_object { (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0; } + // Keeps track of the submisison time of the commadlist associated with this + // event, if event is user visible + uint64_t submitTime = 0; + // Keeps the command-queue and command associated with the event. // These are NULL for the user events. pi_queue Queue = {nullptr}; diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 2cde4ca788830..c0271eb9f198b 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -58,6 +58,7 @@ piMemRelease piMemRetain piPlatformGetInfo piPlatformsGet +piPluginGetLastError piPluginInit piProgramBuild piProgramCompile @@ -78,7 +79,6 @@ piSamplerCreate piSamplerGetInfo piSamplerRelease piSamplerRetain -piPluginGetLastError piTearDown piclProgramCreateWithSource piextContextCreateWithNativeHandle diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index f7c2736a1432b..931a88e9d769a 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -20,13 +20,13 @@ piMemBufferCreate piMemBufferPartition piMemImageCreate piPlatformsGet +piPluginGetLastError piPluginInit piProgramCreate piProgramCreateWithBinary piProgramLink piQueueCreate piSamplerCreate -piPluginGetLastError piTearDown piclProgramCreateWithSource piextContextCreateWithNativeHandle