Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5408,6 +5408,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
// reference count on the kernel, using the kernel saved in CommandData.
PI_CALL(piKernelRetain(Kernel));

auto res=Queue->Device->getSubmitTime(*Event);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be moved to executeCommandList?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would need to add a pi_event parameter to executeCommandList and executeOpenCommandList. Is that okay?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you just record ALL events in the command-list? Maybe we should actually do this at batch submission?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you just record ALL events in the command-list?

Wouldn't it be redundant to record all events as their profiling information wouldn't be visible to the user?

Maybe we should actually do this at batch submission?

I might be wrong but doesn't executeCommandList submit to the command to a batch if were not using immediatecommandlist ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The executeCommandList does add commands to a batch, but closes it and submits if the batch is full. This is the time when I think you should record the "command_submit" time. All events in the command-list are from these enqueue interfaces, so you are already recording submit time for all of the commands.

Copy link
Contributor Author

@raaiq1 raaiq1 Nov 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to SYCL specifications, the submission time has to be calculated before the submit method returns. If we record the submission time when the batch is submitted, can't we run into a possibility that the time is recorded after queue.submit() returns ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you are right here, still you'd do it inside executeCommandList

if(res != PI_SUCCESS){
return res;
}

// Add to list of kernels to be submitted
if (IndirectAccessTrackingEnabled)
Queue->KernelsToBeSubmitted.push_back(Kernel);
Expand Down Expand Up @@ -5760,9 +5765,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
return ReturnValue(ContextEndTime);
}
case PI_PROFILING_INFO_COMMAND_QUEUED:
case PI_PROFILING_INFO_COMMAND_SUBMIT:
// TODO: Support these when Level Zero supported is added.
return ReturnValue(uint64_t{0});
case PI_PROFILING_INFO_COMMAND_SUBMIT: {
// No solid way of handling possible wrap around as the the event may not be
// signalled by device, thus no way of obtaining event start time
return ReturnValue(Event->submitTime);
}
default:
zePrint("piEventGetProfilingInfo: not supported ParamName\n");
return PI_ERROR_INVALID_VALUE;
Expand Down Expand Up @@ -6314,6 +6321,11 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList,

ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}

// Execute command list asynchronously as the event will be used
// to track down its completion.
return Queue->executeCommandList(CommandList);
Expand Down Expand Up @@ -6407,6 +6419,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
insertBarrierIntoCmdList(CmdList, TmpWaitList, *Event, IsInternal))
return Res;

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}
if (auto Res = Queue->executeCommandList(CmdList, false, OkToBatch))
return Res;

Expand Down Expand Up @@ -6678,6 +6694,10 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
ZE_CALL(zeCommandListAppendMemoryCopy,
(ZeCommandList, Dst, Src, Size, ZeEvent, 0, nullptr));

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}
if (auto Res =
Queue->executeCommandList(CommandList, BlockingWrite, OkToBatch))
return Res;
Expand Down Expand Up @@ -6780,6 +6800,10 @@ static pi_result enqueueMemCopyRectHelper(
zePrint("calling zeCommandListAppendBarrier() with Event %#lx\n",
pi_cast<std::uintptr_t>(ZeEvent));

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}
if (auto Res = Queue->executeCommandList(CommandList, Blocking, OkToBatch))
return Res;

Expand Down Expand Up @@ -6999,6 +7023,10 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
pi_cast<pi_uint64>(ZeEvent));
printZeEventList(WaitList);

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}
// Execute command list asynchronously, as the event will be used
// to track down its completion.
if (auto Res = Queue->executeCommandList(CommandList, false, OkToBatch))
Expand Down Expand Up @@ -7054,6 +7082,11 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap,
ze_event_handle_t ZeEvent = nullptr;

bool UseCopyEngine = false;

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}
{
// Lock automatically releases when this goes out of scope.
std::scoped_lock<pi_shared_mutex> lock(Queue->Mutex);
Expand Down Expand Up @@ -7512,6 +7545,11 @@ static pi_result enqueueMemImageCommandHelper(
return PI_ERROR_INVALID_OPERATION;
}

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}

if (auto Res = Queue->executeCommandList(CommandList, IsBlocking, OkToBatch))
return Res;

Expand Down Expand Up @@ -8417,6 +8455,11 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size,
// so manually add command to signal our event.
ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}

if (auto Res = Queue->executeCommandList(CommandList, false))
return Res;

Expand Down Expand Up @@ -8484,6 +8527,11 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
// so manually add command to signal our event.
ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));

auto res=Queue->Device->getSubmitTime(*Event);
if(res != PI_SUCCESS){
return res;
}

Queue->executeCommandList(CommandList, false);

return PI_SUCCESS;
Expand Down Expand Up @@ -9004,4 +9052,22 @@ pi_result _pi_buffer::free() {
return PI_SUCCESS;
}

inline pi_result _pi_device::getDeviceTime(uint64_t *deviceTime) {

uint64_t ZeTimerResolution = ZeDeviceProperties->timerResolution;
uint64_t TimestampMaxCount = ((1ULL << ZeDeviceProperties->kernelTimestampValidBits) - 1ULL);
uint64_t deviceClockCount, dummy;

ZE_CALL(zeDeviceGetGlobalTimestamps,
(ZeDevice, &dummy, &deviceClockCount));
*deviceTime = (deviceClockCount & TimestampMaxCount) * ZeTimerResolution;
return PI_SUCCESS;
}

inline pi_result _pi_device::getSubmitTime(pi_event event) {
if (!event->isProfilingEnabled()) {
return PI_SUCCESS;
}
return getDeviceTime(&(event->submitTime));
}
} // extern "C"
17 changes: 16 additions & 1 deletion sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ struct _pi_device : _pi_object {
pi_device ParentDevice = nullptr)
: ZeDevice{Device}, Platform{Plt}, RootDevice{ParentDevice},
ImmCommandListsPreferred{false}, ZeDeviceProperties{},
ZeDeviceComputeProperties{} {
ZeDeviceComputeProperties{}{
// NOTE: one must additionally call initialize() to complete
// PI device creation.
}
Expand Down Expand Up @@ -569,6 +569,17 @@ struct _pi_device : _pi_object {

bool isSubDevice() { return RootDevice != nullptr; }

/// Retreives current wall-clock time from device
///
/// \param out Variable where device time would be stored
inline pi_result getDeviceTime(uint64_t *out);

/// Retrieves the current wall-clock time from device
/// and stores it in the submitTime field of the event.
///
/// \param event
inline pi_result getSubmitTime(pi_event event);

// Cache of the immutable device properties.
ZeCache<ZeStruct<ze_device_properties_t>> ZeDeviceProperties;
ZeCache<ZeStruct<ze_device_compute_properties_t>> ZeDeviceComputeProperties;
Expand Down Expand Up @@ -1350,6 +1361,10 @@ struct _pi_event : _pi_object {
(Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;
}

// Keeps track of the submisison time of the commadlist associated with this
// event, if event is user visible
uint64_t submitTime = 0;

// Keeps the command-queue and command associated with the event.
// These are NULL for the user events.
pi_queue Queue = {nullptr};
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/abi/pi_level_zero_symbol_check.dump
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ piMemRelease
piMemRetain
piPlatformGetInfo
piPlatformsGet
piPluginGetLastError
piPluginInit
piProgramBuild
piProgramCompile
Expand All @@ -78,7 +79,6 @@ piSamplerCreate
piSamplerGetInfo
piSamplerRelease
piSamplerRetain
piPluginGetLastError
piTearDown
piclProgramCreateWithSource
piextContextCreateWithNativeHandle
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/abi/pi_opencl_symbol_check.dump
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ piMemBufferCreate
piMemBufferPartition
piMemImageCreate
piPlatformsGet
piPluginGetLastError
piPluginInit
piProgramCreate
piProgramCreateWithBinary
piProgramLink
piQueueCreate
piSamplerCreate
piPluginGetLastError
piTearDown
piclProgramCreateWithSource
piextContextCreateWithNativeHandle
Expand Down