Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -140,5 +140,7 @@ _PI_API(piPluginGetLastError)

_PI_API(piTearDown)

_PI_API(piSetEventProperty)


#undef _PI_API
17 changes: 16 additions & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@
// 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for
// piDeviceGetInfo.
// 11.15 piEventCreate creates even in the signalled state now.
// 11.16 piSetEventProperty modifies properties of a pi_event.
// Currently only supports marking a to be constructed pi_event
// user-visible

#define _PI_H_VERSION_MAJOR 11
#define _PI_H_VERSION_MINOR 15
#define _PI_H_VERSION_MINOR 16

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -526,6 +529,8 @@ typedef enum {
PI_PROFILING_INFO_COMMAND_END = 0x1283
} _pi_profiling_info;

typedef enum { IS_USER_VISIBLE = 0x1 } _pi_event_property;

// NOTE: this is made 64-bit to match the size of cl_mem_flags to
// make the translation to OpenCL transparent.
// TODO: populate
Expand Down Expand Up @@ -1791,6 +1796,16 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter);
/// runtime must handle it or end the application.
__SYCL_EXPORT pi_result piPluginGetLastError(char **message);

/// Modifies property of a pi_event
/// \param event pointer to pi_event to modify
/// \param property enum to event property to modify
/// \param propertySize size of propertyValue
/// \param propertyValue value to assign to event property
__SYCL_EXPORT pi_result piSetEventProperty(pi_event *event,
_pi_event_property property,
size_t propertySize,
void *propertyValue);

struct _pi_plugin {
// PI version supported by host passed to the plugin. The Plugin
// checks and writes the appropriate Function Pointers in
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5380,6 +5380,11 @@ pi_result cuda_piTearDown(void *) {
return PI_SUCCESS;
}

pi_result cuda_piSetEventProperty(pi_event *event, _pi_event_property property,
size_t propertySize, void *propertyValue) {
assert(0 && "Operation not supported");
}

const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING;

pi_result piPluginInit(pi_plugin *PluginInit) {
Expand Down Expand Up @@ -5524,6 +5529,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler)
_PI_CL(piPluginGetLastError, cuda_piPluginGetLastError)
_PI_CL(piTearDown, cuda_piTearDown)
_PI_CL(piSetEventProperty, cuda_piSetEventProperty)

#undef _PI_CL

Expand Down
2 changes: 1 addition & 1 deletion sycl/plugins/cuda/pi_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

// This version should be incremented for any change made to this file or its
// corresponding .cpp file.
#define _PI_CUDA_PLUGIN_VERSION 1
#define _PI_CUDA_PLUGIN_VERSION 2

#define _PI_CUDA_PLUGIN_VERSION_STRING \
_PI_PLUGIN_VERSION_STRING(_PI_CUDA_PLUGIN_VERSION)
Expand Down
5 changes: 5 additions & 0 deletions sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1983,6 +1983,11 @@ pi_result piTearDown(void *) {
return PI_SUCCESS;
}

pi_result piSetEventProperty(pi_event *event, _pi_event_property property,
size_t propertySize, void *propertyValue) {
assert(0 && "Operation not supported");
}

const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING;

pi_result piPluginInit(pi_plugin *PluginInit) {
Expand Down
2 changes: 1 addition & 1 deletion sycl/plugins/esimd_emulator/pi_esimd_emulator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

// This version should be incremented for any change made to this file or its
// corresponding .cpp file.
#define _PI_ESIMD_PLUGIN_VERSION 1
#define _PI_ESIMD_PLUGIN_VERSION 2

#define _PI_ESIMD_PLUGIN_VERSION_STRING \
_PI_PLUGIN_VERSION_STRING(_PI_ESIMD_PLUGIN_VERSION)
Expand Down
5 changes: 5 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5108,6 +5108,10 @@ pi_result hip_piTearDown(void *PluginParameter) {
return PI_SUCCESS;
}

pi_result hip_piSetEventProperty(pi_event *event, _pi_event_property property,
size_t propertySize, void *propertyValue) {
assert(0 && "Operation not supported");
}
const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING;

pi_result piPluginInit(pi_plugin *PluginInit) {
Expand Down Expand Up @@ -5246,6 +5250,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextKernelSetArgSampler, hip_piextKernelSetArgSampler)
_PI_CL(piPluginGetLastError, hip_piPluginGetLastError)
_PI_CL(piTearDown, hip_piTearDown)
_PI_CL(piSetEventProperty, hip_piSetEventProperty)

#undef _PI_CL

Expand Down
2 changes: 1 addition & 1 deletion sycl/plugins/hip/pi_hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

// This version should be incremented for any change made to this file or its
// corresponding .cpp file.
#define _PI_HIP_PLUGIN_VERSION 1
#define _PI_HIP_PLUGIN_VERSION 2

#define _PI_HIP_PLUGIN_VERSION_STRING \
_PI_PLUGIN_VERSION_STRING(_PI_HIP_PLUGIN_VERSION)
Expand Down
96 changes: 93 additions & 3 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,39 @@ static const bool DisableEventsCaching = [] {
return std::stoi(DisableEventsCachingFlag) != 0;
}();

// Stores pointers to events that are user visible.
// TODO: When ABI breaking changes are allowed. Pass boolean to piEnqueue
// methods instead
// (piEnqueueKernelLaunch,etc) to indicate if an event is user visible.
static std::list<pi_event *> piUserVisibleEvents{};
// Mutex for piUserVisibleEvents
static pi_mutex piUserVisibleEventsMutex{};

/// Checks if an event is user visible by seeing if it's pointer value is
/// present in piUserVisibleEvents
///
/// \param event The event to check
bool piIsEventUserVisible(pi_event *event) {

std::unique_lock lock{piUserVisibleEventsMutex};
for (auto it = piUserVisibleEvents.begin(); it != piUserVisibleEvents.end();
it++) {
if (*it == event) {
piUserVisibleEvents.erase(it);
return true;
}
}
return false;
}

/// Marks the event as user visible
///
/// \param event To mark as user visible
void piMarkEventUserVisible(pi_event *event) {
std::unique_lock lock{piUserVisibleEventsMutex};
piUserVisibleEvents.push_front(event);
}

// This class encapsulates actions taken along with a call to Level Zero API.
class ZeCall {
private:
Expand Down Expand Up @@ -5408,6 +5441,8 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
// reference count on the kernel, using the kernel saved in CommandData.
PI_CALL(piKernelRetain(Kernel));

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));

// Add to list of kernels to be submitted
if (IndirectAccessTrackingEnabled)
Queue->KernelsToBeSubmitted.push_back(Kernel);
Expand Down Expand Up @@ -5760,9 +5795,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
return ReturnValue(ContextEndTime);
}
case PI_PROFILING_INFO_COMMAND_QUEUED:
case PI_PROFILING_INFO_COMMAND_SUBMIT:
// TODO: Support these when Level Zero supported is added.
return ReturnValue(uint64_t{0});
case PI_PROFILING_INFO_COMMAND_SUBMIT: {
// No solid way of handling possible wrap around as the the event may not be
// signalled by device, thus no way of obtaining event start time
return ReturnValue(Event->submitTime);
}
default:
zePrint("piEventGetProfilingInfo: not supported ParamName\n");
return PI_ERROR_INVALID_VALUE;
Expand Down Expand Up @@ -6314,6 +6351,8 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList,

ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));

// Execute command list asynchronously as the event will be used
// to track down its completion.
return Queue->executeCommandList(CommandList);
Expand Down Expand Up @@ -6407,6 +6446,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
insertBarrierIntoCmdList(CmdList, TmpWaitList, *Event, IsInternal))
return Res;

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));
if (auto Res = Queue->executeCommandList(CmdList, false, OkToBatch))
return Res;

Expand Down Expand Up @@ -6678,6 +6718,7 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
ZE_CALL(zeCommandListAppendMemoryCopy,
(ZeCommandList, Dst, Src, Size, ZeEvent, 0, nullptr));

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));
if (auto Res =
Queue->executeCommandList(CommandList, BlockingWrite, OkToBatch))
return Res;
Expand Down Expand Up @@ -6780,6 +6821,7 @@ static pi_result enqueueMemCopyRectHelper(
zePrint("calling zeCommandListAppendBarrier() with Event %#lx\n",
pi_cast<std::uintptr_t>(ZeEvent));

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));
if (auto Res = Queue->executeCommandList(CommandList, Blocking, OkToBatch))
return Res;

Expand Down Expand Up @@ -6999,6 +7041,7 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr,
pi_cast<pi_uint64>(ZeEvent));
printZeEventList(WaitList);

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));
// Execute command list asynchronously, as the event will be used
// to track down its completion.
if (auto Res = Queue->executeCommandList(CommandList, false, OkToBatch))
Expand Down Expand Up @@ -7054,6 +7097,8 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap,
ze_event_handle_t ZeEvent = nullptr;

bool UseCopyEngine = false;

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));
{
// Lock automatically releases when this goes out of scope.
std::scoped_lock<pi_shared_mutex> lock(Queue->Mutex);
Expand Down Expand Up @@ -7512,6 +7557,7 @@ static pi_result enqueueMemImageCommandHelper(
return PI_ERROR_INVALID_OPERATION;
}

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));
if (auto Res = Queue->executeCommandList(CommandList, IsBlocking, OkToBatch))
return Res;

Expand Down Expand Up @@ -8417,6 +8463,8 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size,
// so manually add command to signal our event.
ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));

if (auto Res = Queue->executeCommandList(CommandList, false))
return Res;

Expand Down Expand Up @@ -8484,6 +8532,8 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
// so manually add command to signal our event.
ZE_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));

PI_CALL(Queue->Device->deviceTime.getSubmitTime(Event));

Queue->executeCommandList(CommandList, false);

return PI_SUCCESS;
Expand Down Expand Up @@ -9004,4 +9054,44 @@ pi_result _pi_buffer::free() {
return PI_SUCCESS;
}

inline pi_result piDeviceTime::get(uint64_t *deviceTime) {
if (!initialized) {
std::unique_lock{mutex};
initialized = true;
ZeTimerResolution = device->ZeDeviceProperties->timerResolution;
TimestampMaxCount =
((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL);
}
uint64_t deviceClockCount, dummy;
ZE_CALL(zeDeviceGetGlobalTimestamps,
(device->ZeDevice, &dummy, &deviceClockCount));
*deviceTime = (deviceClockCount & TimestampMaxCount) * ZeTimerResolution;
return PI_SUCCESS;
}

inline pi_result piDeviceTime::getSubmitTime(pi_event *event) {
if (!(*event)->isProfilingEnabled() || !piIsEventUserVisible(event)) {
return PI_SUCCESS;
}
return get(&((*event)->submitTime));
}
pi_result piSetEventProperty(pi_event *event, _pi_event_property property,
size_t propertySize, void *propertyValue) {

switch (property) {
case IS_USER_VISIBLE: {
bool isHostVisible = *static_cast<bool *>(propertyValue);
if (isHostVisible) {
piMarkEventUserVisible(event);
} else {
piIsEventUserVisible(event);
}
break;
}
default: {
return PI_ERROR_INVALID_VALUE;
}
}
return PI_SUCCESS;
}
} // extern "C"
36 changes: 34 additions & 2 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

// This version should be incremented for any change made to this file or its
// corresponding .cpp file.
#define _PI_LEVEL_ZERO_PLUGIN_VERSION 1
#define _PI_LEVEL_ZERO_PLUGIN_VERSION 2

#define _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING \
_PI_PLUGIN_VERSION_STRING(_PI_LEVEL_ZERO_PLUGIN_VERSION)
Expand Down Expand Up @@ -356,6 +356,33 @@ struct MemAllocRecord : _pi_object {
bool OwnZeMemHandle;
};

// Struct used to fetch device wall-clock time
struct piDeviceTime {
private:
// Device to query
pi_device device;
// ZeTimerResolution is number of nanoseconds per clock step assuming
// stype==ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES
uint64_t ZeTimerResolution, TimestampMaxCount;
bool initialized = false;
pi_mutex mutex;

public:
/// Retreives current wall-clock time from device
///
/// \param deviceTime Variable where device time would be stored
inline pi_result get(uint64_t *deviceTime);

/// Checks if the passed in event is user visible.
/// If so then retrieves the current wall-clock time from device
/// and stores it in the submitTime field of the event.
/// Used to calculate the submission time of a commandlist
///
/// \param event is the event to check for user visiblity
inline pi_result getSubmitTime(pi_event *event);
piDeviceTime(pi_device dev) : device(dev) {}
};

// Define the types that are opaque in pi.h in a manner suitabale for Level Zero
// plugin

Expand Down Expand Up @@ -484,7 +511,7 @@ struct _pi_device : _pi_object {
pi_device ParentDevice = nullptr)
: ZeDevice{Device}, Platform{Plt}, RootDevice{ParentDevice},
ImmCommandListsPreferred{false}, ZeDeviceProperties{},
ZeDeviceComputeProperties{} {
ZeDeviceComputeProperties{}, deviceTime(this) {
// NOTE: one must additionally call initialize() to complete
// PI device creation.
}
Expand Down Expand Up @@ -579,6 +606,7 @@ struct _pi_device : _pi_object {
ZeCache<ZeStruct<ze_device_memory_access_properties_t>>
ZeDeviceMemoryAccessProperties;
ZeCache<ZeStruct<ze_device_cache_properties_t>> ZeDeviceCacheProperties;
piDeviceTime deviceTime;
};

// Structure describing the specific use of a command-list in a queue.
Expand Down Expand Up @@ -1350,6 +1378,10 @@ struct _pi_event : _pi_object {
(Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;
}

// Keeps track of the submisison time of the commadlist associated with this
// event, if event is user visible
uint64_t submitTime = 0;

// Keeps the command-queue and command associated with the event.
// These are NULL for the user events.
pi_queue Queue = {nullptr};
Expand Down
Loading