Skip to content

Commit 5a92dba

Browse files
authored
Merge pull request #949 from CHIP-SPV/LevelZeroCopy
Use Level Zero Copy Queues
2 parents c0e378c + 9a93d03 commit 5a92dba

2 files changed

Lines changed: 36 additions & 41 deletions

File tree

src/backend/Level0/CHIPBackendLevel0.cc

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,10 @@ ze_command_list_handle_t CHIPQueueLevel0::getCmdListImm() {
881881
return ZeCmdListImm_;
882882
}
883883

884+
ze_command_list_handle_t CHIPQueueLevel0::getCmdListImmCopy() {
885+
return ZeCmdListImmCopy_;
886+
}
887+
884888
std::shared_ptr<CHIPEventLevel0> CHIPContextLevel0::getEventFromPool() {
885889
// go through all pools and try to get an allocated event
886890
LOCK(ContextMtx); // Context::EventPools
@@ -936,41 +940,27 @@ Borrowed<FencedCmdList> CHIPContextLevel0::getCmdListReg() {
936940
}
937941

938942
CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev)
939-
: CHIPQueueLevel0(ChipDev, 0, L0_DEFAULT_QUEUE_PRIORITY,
940-
LevelZeroQueueType::Compute) {}
943+
: CHIPQueueLevel0(ChipDev, 0, L0_DEFAULT_QUEUE_PRIORITY) {}
941944

942945
CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev,
943946
chipstar::QueueFlags Flags)
944-
: CHIPQueueLevel0(ChipDev, Flags, L0_DEFAULT_QUEUE_PRIORITY,
945-
LevelZeroQueueType::Compute) {}
947+
: CHIPQueueLevel0(ChipDev, Flags, L0_DEFAULT_QUEUE_PRIORITY) {}
946948

947949
CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev,
948950
chipstar::QueueFlags Flags, int Priority)
949-
: CHIPQueueLevel0(ChipDev, Flags, Priority, LevelZeroQueueType::Compute) {}
950-
951-
CHIPQueueLevel0::CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev,
952-
chipstar::QueueFlags Flags, int Priority,
953-
LevelZeroQueueType TheType)
954951
: Queue(ChipDev, Flags, Priority), ChipDevLz_(ChipDev),
955952
ChipCtxLz_(static_cast<CHIPContextLevel0 *>(ChipDev->getContext())) {
956953
logTrace("CHIPQueueLevel0() {}", (void *)this);
957954
ChipDevLz_ = ChipDev;
958955
auto Ctx = ChipDevLz_->getContext();
959956
ChipCtxLz_ = (CHIPContextLevel0 *)Ctx;
960957

961-
if (TheType == Compute) {
962-
QueueProperties_ = ChipDev->getComputeQueueProps();
963-
QueueDescriptor_ = ChipDev->getNextComputeQueueDesc(Priority);
964-
CommandListDesc_ = ChipDev->getCommandListComputeDesc();
965-
} else if (TheType == Copy) {
966-
QueueProperties_ = ChipDev->getCopyQueueProps();
967-
QueueDescriptor_ = ChipDev->getNextCopyQueueDesc(Priority);
968-
CommandListDesc_ = ChipDev->getCommandListCopyDesc();
969-
970-
} else {
971-
CHIPERR_LOG_AND_ABORT("Unknown queue type requested");
972-
}
973-
QueueType = TheType;
958+
QueueProperties_ = ChipDev->getComputeQueueProps();
959+
QueueDescriptor_ = ChipDev->getNextComputeQueueDesc(Priority);
960+
CommandListDesc_ = ChipDev->getCommandListComputeDesc();
961+
QueuePropertiesCopy_ = ChipDev->getCopyQueueProps();
962+
QueueDescriptorCopy_ = ChipDev->getNextCopyQueueDesc(Priority);
963+
CommandListDescCopy_ = ChipDev->getCommandListCopyDesc();
974964

975965
SharedBuf_ =
976966
ChipCtxLz_->allocateImpl(32, 8, hipMemoryType::hipMemoryTypeUnified);
@@ -1011,6 +1001,14 @@ void CHIPQueueLevel0::initializeCmdListImm() {
10111001
zeStatus = zeCommandListCreateImmediate(ZeCtx_, ZeDev_, &QueueDescriptor_,
10121002
&ZeCmdListImm_);
10131003
CHIPERR_CHECK_LOG_AND_THROW_TABLE(zeCommandListCreateImmediate);
1004+
1005+
if (QueueDescriptorCopy_.ordinal < 0) {
1006+
zeStatus = zeCommandListCreateImmediate(
1007+
ZeCtx_, ZeDev_, &QueueDescriptorCopy_, &ZeCmdListImmCopy_);
1008+
CHIPERR_CHECK_LOG_AND_THROW_TABLE(zeCommandListCreateImmediate);
1009+
} else {
1010+
ZeCmdListImmCopy_ = ZeCmdListImm_;
1011+
}
10141012
}
10151013

10161014
void CHIPDeviceLevel0::initializeQueueGroupProperties() {
@@ -1109,8 +1107,6 @@ ze_command_queue_desc_t CHIPDeviceLevel0::getQueueDesc_(int Priority) {
11091107

11101108
ze_command_queue_desc_t
11111109
CHIPDeviceLevel0::getNextComputeQueueDesc(int Priority) {
1112-
1113-
assert(ComputeQueueGroupOrdinal_ > -1);
11141110
ze_command_queue_desc_t CommandQueueComputeDesc = getQueueDesc_(Priority);
11151111
CommandQueueComputeDesc.ordinal = ComputeQueueGroupOrdinal_;
11161112

@@ -1123,14 +1119,15 @@ CHIPDeviceLevel0::getNextComputeQueueDesc(int Priority) {
11231119
}
11241120

11251121
ze_command_queue_desc_t CHIPDeviceLevel0::getNextCopyQueueDesc(int Priority) {
1126-
assert(CopyQueueGroupOrdinal_ > -1);
11271122
ze_command_queue_desc_t CommandQueueCopyDesc = getQueueDesc_(Priority);
11281123
CommandQueueCopyDesc.ordinal = CopyQueueGroupOrdinal_;
11291124

11301125
auto MaxQueues = CopyQueueProperties_.numQueues;
1131-
LOCK(NextQueueIndexMtx_); // CHIPDeviceLevel0::NextCopyQueueIndex_
1132-
CommandQueueCopyDesc.index = NextCopyQueueIndex_;
1133-
NextCopyQueueIndex_ = (NextCopyQueueIndex_ + 1) % MaxQueues;
1126+
if (MaxQueues > 0) {
1127+
LOCK(NextQueueIndexMtx_); // CHIPDeviceLevel0::NextCopyQueueIndex_
1128+
CommandQueueCopyDesc.index = NextCopyQueueIndex_;
1129+
NextCopyQueueIndex_ = (NextCopyQueueIndex_ + 1) % MaxQueues;
1130+
}
11341131

11351132
return CommandQueueCopyDesc;
11361133
}
@@ -1222,7 +1219,7 @@ CHIPQueueLevel0::memFillAsyncImpl(void *Dst, size_t Size, const void *Pattern,
12221219
}
12231220

12241221
LOCK(CommandListMtx);
1225-
auto CommandList = this->getCmdListImm();
1222+
auto CommandList = this->getCmdListImmCopy();
12261223
// The application must not call this function from
12271224
// simultaneous threads with the same command list handle.
12281225
// Done via LOCK(CommandListMtx)
@@ -1270,7 +1267,7 @@ CHIPQueueLevel0::memCopy3DAsyncImpl(void *Dst, size_t Dpitch, size_t Dspitch,
12701267
SrcRegion.height = Height;
12711268
SrcRegion.depth = Depth;
12721269
LOCK(CommandListMtx);
1273-
auto CommandList = this->getCmdListImm();
1270+
auto CommandList = this->getCmdListImmCopy();
12741271
// The application must not call this function from
12751272
// simultaneous threads with the same command list handle.
12761273
// Done via LOCK(CommandListMtx)
@@ -1453,7 +1450,7 @@ CHIPQueueLevel0::memCopyAsyncImpl(void *Dst, const void *Src, size_t Size,
14531450
std::shared_ptr<chipstar::Event> MemCopyEvent =
14541451
static_cast<CHIPBackendLevel0 *>(Backend)->createEventShared(ChipCtxZe);
14551452
LOCK(CommandListMtx);
1456-
auto CommandList = this->getCmdListImm();
1453+
auto CommandList = this->getCmdListImmCopy();
14571454
// The application must not call this function from simultaneous threads with
14581455
// the same command list handle
14591456
// Done via LOCK(CommandListMtx)

src/backend/Level0/CHIPBackendLevel0.hh

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,6 @@ public:
195195
std::shared_ptr<CHIPEventLevel0> getEvent();
196196
};
197197

198-
enum LevelZeroQueueType {
199-
Unknown = 0,
200-
Compute,
201-
Copy,
202-
};
203-
204198
class FencedCmdList {
205199
ze_command_list_handle_t ZeCmdList_ = nullptr;
206200
ze_fence_handle_t ZeFence_ = nullptr;
@@ -278,8 +272,15 @@ protected:
278272
ze_command_queue_group_properties_t QueueProperties_;
279273
ze_command_queue_desc_t QueueDescriptor_;
280274
ze_command_list_desc_t CommandListDesc_;
275+
276+
ze_command_queue_group_properties_t QueuePropertiesCopy_;
277+
ze_command_queue_desc_t QueueDescriptorCopy_;
278+
ze_command_list_desc_t CommandListDescCopy_;
279+
280+
281281
ze_command_queue_handle_t ZeCmdQ_ = 0;
282282
ze_command_list_handle_t ZeCmdListImm_ = 0;
283+
ze_command_list_handle_t ZeCmdListImmCopy_ = 0;
283284

284285
void initializeCmdListImm();
285286

@@ -296,6 +297,7 @@ public:
296297
* @return ze_command_list_handle_t
297298
*/
298299
ze_command_list_handle_t getCmdListImm();
300+
ze_command_list_handle_t getCmdListImmCopy();
299301
CHIPDeviceLevel0 *getDeviceLz() { return ChipDevLz_; }
300302
CHIPContextLevel0 *getContextLz() { return ChipCtxLz_; }
301303
std::pair<std::vector<ze_event_handle_t>, chipstar::LockGuardVector>
@@ -304,14 +306,10 @@ public:
304306
size_t getMaxMemoryFillPatternSize() {
305307
return QueueProperties_.maxMemoryFillPatternSize;
306308
}
307-
LevelZeroQueueType QueueType = LevelZeroQueueType::Unknown;
308309
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev);
309310
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, chipstar::QueueFlags Flags);
310311
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, chipstar::QueueFlags Flags,
311312
int Priority);
312-
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, chipstar::QueueFlags Flags,
313-
int Priority, LevelZeroQueueType TheQueueType);
314-
315313
CHIPQueueLevel0(CHIPDeviceLevel0 *ChipDev, ze_command_queue_handle_t ZeQue);
316314
virtual ~CHIPQueueLevel0() override;
317315

0 commit comments

Comments
 (0)