Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 28 additions & 31 deletions src/backend/Level0/CHIPBackendLevel0.cc
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,10 @@ ze_command_list_handle_t CHIPQueueLevel0::getCmdListImm() {
return ZeCmdListImm_;
}

ze_command_list_handle_t CHIPQueueLevel0::getCmdListImmCopy() {
return ZeCmdListImmCopy_;
}

std::shared_ptr<CHIPEventLevel0> CHIPContextLevel0::getEventFromPool() {
// go through all pools and try to get an allocated event
LOCK(ContextMtx); // Context::EventPools
Expand Down Expand Up @@ -936,41 +940,27 @@ Borrowed<FencedCmdList> CHIPContextLevel0::getCmdListReg() {
}

CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev)
: CHIPQueueLevel0(ChipDev, 0, L0_DEFAULT_QUEUE_PRIORITY,
LevelZeroQueueType::Compute) {}
: CHIPQueueLevel0(ChipDev, 0, L0_DEFAULT_QUEUE_PRIORITY) {}

CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev,
chipstar::QueueFlags Flags)
: CHIPQueueLevel0(ChipDev, Flags, L0_DEFAULT_QUEUE_PRIORITY,
LevelZeroQueueType::Compute) {}
: CHIPQueueLevel0(ChipDev, Flags, L0_DEFAULT_QUEUE_PRIORITY) {}

CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev,
chipstar::QueueFlags Flags, int Priority)
: CHIPQueueLevel0(ChipDev, Flags, Priority, LevelZeroQueueType::Compute) {}

CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev,
chipstar::QueueFlags Flags, int Priority,
LevelZeroQueueType TheType)
: Queue(ChipDev, Flags, Priority), ChipDevLz_(ChipDev),
ChipCtxLz_(static_cast<CHIPContextLevel0 *>(ChipDev->getContext())) {
logTrace("CHIPQueueLevel0() {}", (void *)this);
ChipDevLz_ = ChipDev;
auto Ctx = ChipDevLz_->getContext();
ChipCtxLz_ = (CHIPContextLevel0 *)Ctx;

if (TheType == Compute) {
QueueProperties_ = ChipDev->getComputeQueueProps();
QueueDescriptor_ = ChipDev->getNextComputeQueueDesc(Priority);
CommandListDesc_ = ChipDev->getCommandListComputeDesc();
} else if (TheType == Copy) {
QueueProperties_ = ChipDev->getCopyQueueProps();
QueueDescriptor_ = ChipDev->getNextCopyQueueDesc(Priority);
CommandListDesc_ = ChipDev->getCommandListCopyDesc();

} else {
CHIPERR_LOG_AND_ABORT("Unknown queue type requested");
}
QueueType = TheType;
QueueProperties_ = ChipDev->getComputeQueueProps();
QueueDescriptor_ = ChipDev->getNextComputeQueueDesc(Priority);
CommandListDesc_ = ChipDev->getCommandListComputeDesc();
QueuePropertiesCopy_ = ChipDev->getCopyQueueProps();
QueueDescriptorCopy_ = ChipDev->getNextCopyQueueDesc(Priority);
CommandListDescCopy_ = ChipDev->getCommandListCopyDesc();

SharedBuf_ =
ChipCtxLz_->allocateImpl(32, 8, hipMemoryType::hipMemoryTypeUnified);
Expand Down Expand Up @@ -1011,6 +1001,14 @@ void CHIPQueueLevel0::initializeCmdListImm() {
zeStatus = zeCommandListCreateImmediate(ZeCtx_, ZeDev_, &QueueDescriptor_,
&ZeCmdListImm_);
CHIPERR_CHECK_LOG_AND_THROW_TABLE(zeCommandListCreateImmediate);

if (QueueDescriptorCopy_.ordinal < 0) {
zeStatus = zeCommandListCreateImmediate(
ZeCtx_, ZeDev_, &QueueDescriptorCopy_, &ZeCmdListImmCopy_);
CHIPERR_CHECK_LOG_AND_THROW_TABLE(zeCommandListCreateImmediate);
} else {
ZeCmdListImmCopy_ = ZeCmdListImm_;
}
}

void CHIPDeviceLevel0::initializeQueueGroupProperties() {
Expand Down Expand Up @@ -1109,8 +1107,6 @@ ze_command_queue_desc_t CHIPDeviceLevel0::getQueueDesc_(int Priority) {

ze_command_queue_desc_t
CHIPDeviceLevel0::getNextComputeQueueDesc(int Priority) {

assert(ComputeQueueGroupOrdinal_ > -1);
ze_command_queue_desc_t CommandQueueComputeDesc = getQueueDesc_(Priority);
CommandQueueComputeDesc.ordinal = ComputeQueueGroupOrdinal_;

Expand All @@ -1123,14 +1119,15 @@ CHIPDeviceLevel0::getNextComputeQueueDesc(int Priority) {
}

ze_command_queue_desc_t CHIPDeviceLevel0::getNextCopyQueueDesc(int Priority) {
assert(CopyQueueGroupOrdinal_ > -1);
ze_command_queue_desc_t CommandQueueCopyDesc = getQueueDesc_(Priority);
CommandQueueCopyDesc.ordinal = CopyQueueGroupOrdinal_;

auto MaxQueues = CopyQueueProperties_.numQueues;
LOCK(NextQueueIndexMtx_); // CHIPDeviceLevel0::NextCopyQueueIndex_
CommandQueueCopyDesc.index = NextCopyQueueIndex_;
NextCopyQueueIndex_ = (NextCopyQueueIndex_ + 1) % MaxQueues;
if (MaxQueues > 0) {
LOCK(NextQueueIndexMtx_); // CHIPDeviceLevel0::NextCopyQueueIndex_
CommandQueueCopyDesc.index = NextCopyQueueIndex_;
NextCopyQueueIndex_ = (NextCopyQueueIndex_ + 1) % MaxQueues;
}

return CommandQueueCopyDesc;
}
Expand Down Expand Up @@ -1222,7 +1219,7 @@ CHIPQueueLevel0::memFillAsyncImpl(void *Dst, size_t Size, const void *Pattern,
}

LOCK(CommandListMtx);
auto CommandList = this->getCmdListImm();
auto CommandList = this->getCmdListImmCopy();
// The application must not call this function from
// simultaneous threads with the same command list handle.
// Done via LOCK(CommandListMtx)
Expand Down Expand Up @@ -1270,7 +1267,7 @@ CHIPQueueLevel0::memCopy3DAsyncImpl(void *Dst, size_t Dpitch, size_t Dspitch,
SrcRegion.height = Height;
SrcRegion.depth = Depth;
LOCK(CommandListMtx);
auto CommandList = this->getCmdListImm();
auto CommandList = this->getCmdListImmCopy();
// The application must not call this function from
// simultaneous threads with the same command list handle.
// Done via LOCK(CommandListMtx)
Expand Down Expand Up @@ -1453,7 +1450,7 @@ CHIPQueueLevel0::memCopyAsyncImpl(void *Dst, const void *Src, size_t Size,
std::shared_ptr<chipstar::Event> MemCopyEvent =
static_cast<CHIPBackendLevel0 *>(Backend)->createEventShared(ChipCtxZe);
LOCK(CommandListMtx);
auto CommandList = this->getCmdListImm();
auto CommandList = this->getCmdListImmCopy();
// The application must not call this function from simultaneous threads with
// the same command list handle
// Done via LOCK(CommandListMtx)
Expand Down
18 changes: 8 additions & 10 deletions src/backend/Level0/CHIPBackendLevel0.hh
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,6 @@ public:
std::shared_ptr<CHIPEventLevel0> getEvent();
};

enum LevelZeroQueueType {
Unknown = 0,
Compute,
Copy,
};

class FencedCmdList {
ze_command_list_handle_t ZeCmdList_ = nullptr;
ze_fence_handle_t ZeFence_ = nullptr;
Expand Down Expand Up @@ -278,8 +272,15 @@ protected:
ze_command_queue_group_properties_t QueueProperties_;
ze_command_queue_desc_t QueueDescriptor_;
ze_command_list_desc_t CommandListDesc_;

ze_command_queue_group_properties_t QueuePropertiesCopy_;
ze_command_queue_desc_t QueueDescriptorCopy_;
ze_command_list_desc_t CommandListDescCopy_;


ze_command_queue_handle_t ZeCmdQ_ = 0;
ze_command_list_handle_t ZeCmdListImm_ = 0;
ze_command_list_handle_t ZeCmdListImmCopy_ = 0;

void initializeCmdListImm();

Expand All @@ -296,6 +297,7 @@ public:
* @return ze_command_list_handle_t
*/
ze_command_list_handle_t getCmdListImm();
ze_command_list_handle_t getCmdListImmCopy();
CHIPDeviceLevel0 *getDeviceLz() { return ChipDevLz_; }
CHIPContextLevel0 *getContextLz() { return ChipCtxLz_; }
std::pair<std::vector<ze_event_handle_t>, chipstar::LockGuardVector>
Expand All @@ -304,14 +306,10 @@ public:
size_t getMaxMemoryFillPatternSize() {
return QueueProperties_.maxMemoryFillPatternSize;
}
LevelZeroQueueType QueueType = LevelZeroQueueType::Unknown;
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev);
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, chipstar::QueueFlags Flags);
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, chipstar::QueueFlags Flags,
int Priority);
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, chipstar::QueueFlags Flags,
int Priority, LevelZeroQueueType TheQueueType);

CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, ze_command_queue_handle_t ZeQue);
virtual ~CHIPQueueLevel0() override;

Expand Down