Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a2acac8
[SYCL] Make host task blocking and detach empty command. Part 1
KseniyaTikhomirova Oct 7, 2022
1fafe3b
Merge branch 'sycl' into empty_task_removal
KseniyaTikhomirova Oct 7, 2022
521a4cc
Temporary disable unit test for depends_on
KseniyaTikhomirova Oct 7, 2022
01898c2
Fix impl gaps and update/enable 1 unit test
KseniyaTikhomirova Oct 9, 2022
007fbb7
Fix & enable unit tests
KseniyaTikhomirova Oct 10, 2022
d442362
Fix blocking enqueueCommand call & update queue unit tests
KseniyaTikhomirova Oct 10, 2022
9c05886
Fix build
KseniyaTikhomirova Oct 11, 2022
30d46b5
Remove duplicated enqueue
KseniyaTikhomirova Oct 21, 2022
65e97c9
Fix data races for BlockedUsers
KseniyaTikhomirova Oct 27, 2022
65451cb
Merge branch 'sycl' into empty_task_removal
KseniyaTikhomirova Oct 27, 2022
4871246
Fix event isCompleted impl
KseniyaTikhomirova Oct 27, 2022
e6b91da
Merge branch 'sycl' into empty_task_removal
KseniyaTikhomirova Nov 16, 2022
b128d7b
DRAFT
KseniyaTikhomirova Nov 17, 2022
3403de2
Merge branch 'sycl' into empty_task_removal
KseniyaTikhomirova Nov 28, 2022
64ef595
Remove empty task usage from host accessor path
KseniyaTikhomirova Nov 29, 2022
abcf47c
Merge https://github.com/KseniyaTikhomirova/llvm into empty_task_remo…
KseniyaTikhomirova Dec 2, 2022
1aff2c8
Remove lines uploaded by mistake
KseniyaTikhomirova Dec 2, 2022
4d389ec
draft to amend when sycl_throw_on_block deleted
KseniyaTikhomirova Dec 2, 2022
b634f5f
Restore unit tests & cleanup redundant code
KseniyaTikhomirova Dec 5, 2022
3c9bb1a
Fix test
KseniyaTikhomirova Dec 8, 2022
247db96
Fix waiting of manually blocked command
KseniyaTikhomirova Dec 11, 2022
649bb80
Recursive mutex is not needed after fix
KseniyaTikhomirova Dec 12, 2022
c9eaa53
Add tests
KseniyaTikhomirova Dec 12, 2022
d23c5ec
remove unused function
KseniyaTikhomirova Dec 19, 2022
09d7092
Merge branch 'sycl' into host_acc_without_empty_task
KseniyaTikhomirova Dec 19, 2022
76388ae
more tests
KseniyaTikhomirova Dec 19, 2022
adf898a
fix order
KseniyaTikhomirova Dec 19, 2022
f6f35eb
Merge branch 'sycl' into host_acc_without_empty_task
KseniyaTikhomirova Dec 20, 2022
a51b53a
Fix merge issues
KseniyaTikhomirova Dec 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 24 additions & 114 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,6 @@ static std::string commandToNodeType(Command::CommandType Type) {
return "memory_transfer_node";
case Command::CommandType::UPDATE_REQUIREMENT:
return "host_acc_create_buffer_lock_node";
case Command::CommandType::EMPTY_TASK:
return "host_acc_destroy_buffer_release_node";
case Command::CommandType::FUSION:
return "kernel_fusion_placeholder_node";
default:
Expand Down Expand Up @@ -196,8 +194,6 @@ static std::string commandToName(Command::CommandType Type) {
return "Memory Transfer (Unmap)";
case Command::CommandType::UPDATE_REQUIREMENT:
return "Host Accessor Creation/Buffer Lock";
case Command::CommandType::EMPTY_TASK:
return "Host Accessor Destruction/Buffer Lock Release";
case Command::CommandType::FUSION:
return "Kernel Fusion Placeholder";
default:
Expand Down Expand Up @@ -236,6 +232,21 @@ bool Command::isHostTask() const {
CG::CGTYPE::CodeplayHostTask);
}

bool Command::blockManually(const BlockReason &Reason) {
if (MIsManuallyBlocked)
return false;
MBlockReason = Reason;
MIsManuallyBlocked = true;
return true;
}

bool Command::unblock() {
if (!MIsManuallyBlocked)
return false;
MIsManuallyBlocked = false;
return true;
}

static void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls,
const QueueImplPtr &Queue) {
for (auto &EventImpl : EventImpls) {
Expand Down Expand Up @@ -625,7 +636,7 @@ bool Command::supportsPostEnqueueCleanup() const { return true; }

bool Command::readyForCleanup() const {
return MLeafCounter == 0 &&
MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess;
MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess && !isBlocking();
}

Command *Command::addDep(DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
Expand All @@ -636,7 +647,7 @@ Command *Command::addDep(DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
processDepEvent(NewDep.MDepCommand->getEvent(), NewDep, ToCleanUp);
}
// ConnectionCmd insertion builds the following dependency structure:
// this -> emptyCmd (for ConnectionCmd) -> ConnectionCmd -> NewDep
// this -> ConnectionCmd -> NewDep
// that means that this and NewDep are already dependent
if (!ConnectionCmd) {
MDeps.push_back(NewDep);
Expand Down Expand Up @@ -693,35 +704,12 @@ void Command::emitInstrumentation(uint16_t Type, const char *Txt) {

bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
std::vector<Command *> &ToCleanUp) {
assert(MEnqueueStatus != EnqueueResultT::SyclEnqueueBlocked &&
"Final command enqueue should always be not blocked.");
// Exit if already enqueued
if (MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess)
return true;

// If the command is blocked from enqueueing
if (MIsBlockable && MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked) {
// Exit if enqueue type is not blocking
if (!Blocking) {
EnqueueResult = EnqueueResultT(EnqueueResultT::SyclEnqueueBlocked, this);
return false;
}

#ifdef XPTI_ENABLE_INSTRUMENTATION
// Scoped trace event notifier that emits a barrier begin and barrier end
// event, which models the barrier while enqueuing along with the blocked
// reason, as determined by the scheduler
std::string Info = "enqueue.barrier[";
Info += std::string(getBlockReason()) + "]";
emitInstrumentation(xpti::trace_barrier_begin, Info.c_str());
#endif

// Wait if blocking
while (MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked)
;
#ifdef XPTI_ENABLE_INSTRUMENTATION
emitInstrumentation(xpti::trace_barrier_end, Info.c_str());
#endif
}

std::lock_guard<std::mutex> Lock(MEnqueueMtx);

// Exit if the command is already enqueued
Expand Down Expand Up @@ -1535,83 +1523,6 @@ pi_int32 MemCpyCommandHost::enqueueImp() {
return PI_SUCCESS;
}

EmptyCommand::EmptyCommand(QueueImplPtr Queue)
: Command(CommandType::EMPTY_TASK, std::move(Queue)) {
emitInstrumentationDataProxy();
}

pi_int32 EmptyCommand::enqueueImp() {
waitForPreparedHostEvents();
waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef());

return PI_SUCCESS;
}

void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd,
const Requirement *Req) {
const Requirement &ReqRef = *Req;
MRequirements.emplace_back(ReqRef);
const Requirement *const StoredReq = &MRequirements.back();

// EmptyCommand is always host one, so we believe that result of addDep is
// nil
std::vector<Command *> ToCleanUp;
Command *Cmd = addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}, ToCleanUp);
assert(Cmd == nullptr && "Conection command should be null for EmptyCommand");
assert(ToCleanUp.empty() && "addDep should add a command for cleanup only if "
"there's a connection command");
(void)Cmd;
}

void EmptyCommand::emitInstrumentationData() {
#ifdef XPTI_ENABLE_INSTRUMENTATION
if (!xptiTraceEnabled())
return;
// Create a payload with the command name and an event using this payload to
// emit a node_create
if (MRequirements.empty())
return;

Requirement &Req = *MRequirements.begin();

MAddress = Req.MSYCLMemObj;
makeTraceEventProlog(MAddress);

if (MFirstInstance) {
xpti_td *CmdTraceEvent = static_cast<xpti_td *>(MTraceEvent);
xpti::addMetadata(CmdTraceEvent, "sycl_device",
deviceToID(MQueue->get_device()));
xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
deviceToString(MQueue->get_device()));
xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast<size_t>(MAddress));
makeTraceEventEpilog();
}
#endif
}

void EmptyCommand::printDot(std::ostream &Stream) const {
Stream << "\"" << this << "\" [style=filled, fillcolor=\"#8d8f29\", label=\"";

Stream << "ID = " << this << "\\n";
Stream << "EMPTY NODE"
<< "\\n";

Stream << "\"];" << std::endl;

for (const auto &Dep : MDeps) {
Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\""
<< " [ label = \"Access mode: "
<< accessModeToString(Dep.MDepRequirement->MAccessMode) << "\\n"
<< "MemObj: " << Dep.MDepRequirement->MSYCLMemObj << " \" ]"
<< std::endl;
}
}

bool EmptyCommand::producesPiEvent() const { return false; }

void MemCpyCommandHost::printDot(std::ostream &Stream) const {
Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";

Expand Down Expand Up @@ -1662,6 +1573,11 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
#endif
}

bool UpdateHostRequirementCommand::supportsPostEnqueueCleanup() const {
// TODO: consider moving to base class
return !isBlocking();
}

static std::string cgTypeToString(detail::CG::CGTYPE Type) {
switch (Type) {
case detail::CG::Kernel:
Expand Down Expand Up @@ -2585,12 +2501,6 @@ bool ExecCGCommand::supportsPostEnqueueCleanup() const {
(MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask);
}

bool ExecCGCommand::readyForCleanup() const {
if (MCommandGroup->getType() == CG::CGTYPE::CodeplayHostTask)
return MLeafCounter == 0 && MEvent->isCompleted();
return Command::readyForCleanup();
}

KernelFusionCommand::KernelFusionCommand(QueueImplPtr Queue)
: Command(Command::CommandType::FUSION, Queue),
MStatus(FusionStatus::ACTIVE) {
Expand Down
59 changes: 12 additions & 47 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class AllocaCommand;
class AllocaCommandBase;
class ReleaseCommand;
class ExecCGCommand;
class EmptyCommand;

enum BlockingT { NON_BLOCKING = 0, BLOCKING };

Expand Down Expand Up @@ -103,7 +102,6 @@ class Command {
MAP_MEM_OBJ,
UNMAP_MEM_OBJ,
UPDATE_REQUIREMENT,
EMPTY_TASK,
HOST_TASK,
FUSION
};
Expand Down Expand Up @@ -143,14 +141,14 @@ class Command {
return MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess;
}

// Shows that command could not be enqueued, now it may be true for empty task
// only
bool isEnqueueBlocked() const {
return MIsBlockable && MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked;
// Shows that command could be enqueued, but blocks enqueue of all
// commands depending on it. Regular usage - host task & host accessors.
bool isBlocking() const {
return MIsManuallyBlocked || (isHostTask() && !MEvent->isCompleted());
}
// Shows that command could be enqueued, but is blocking enqueue of all
// commands depending on it. Regular usage - host task.
bool isBlocking() const { return isHostTask() && !MEvent->isCompleted(); }
enum class BlockReason : int { HostAccessor = 0, HostTask };
bool blockManually(const BlockReason &Reason);
bool unblock();

void addBlockedUserUnique(const EventImplPtr &NewUser) {
if (std::find(MBlockedUsers.begin(), MBlockedUsers.end(), NewUser) !=
Expand Down Expand Up @@ -281,8 +279,8 @@ class Command {
std::vector<DepDesc> MDeps;
/// Contains list of commands that depend on the command.
std::unordered_set<Command *> MUsers;
/// Indicates whether the command can be blocked from enqueueing.
bool MIsBlockable = false;
/// Indicates whether the command is set as blocking for its users.
std::atomic_bool MIsManuallyBlocked = false;
/// Counts the number of memory objects this command is a leaf for.
unsigned MLeafCounter = 0;

Expand All @@ -295,10 +293,8 @@ class Command {
/// Used for marking the node during graph traversal.
Marks MMarks;

enum class BlockReason : int { HostAccessor = 0, HostTask };

// Only have reasonable value while MIsBlockable is true
BlockReason MBlockReason;
// Only have reasonable value while isBlocking returns true
BlockReason MBlockReason = BlockReason::HostTask;

/// Describes the status of the command.
std::atomic<EnqueueResultT::ResultT> MEnqueueStatus;
Expand Down Expand Up @@ -352,30 +348,6 @@ class Command {
std::mutex MBlockedUsersMutex;
};

/// The empty command does nothing during enqueue. The task can be used to
/// implement lock in the graph, or to merge several nodes into one.
class EmptyCommand : public Command {
public:
EmptyCommand(QueueImplPtr Queue);

void printDot(std::ostream &Stream) const final;
const Requirement *getRequirement() const final { return &MRequirements[0]; }
void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd,
const Requirement *Req);

void emitInstrumentationData() override;

bool producesPiEvent() const final;

private:
pi_int32 enqueueImp() final;

// Employing deque here as it allows to push_back/emplace_back without
// invalidation of pointer or reference to stored data item regardless of
// iterator invalidation.
std::deque<Requirement> MRequirements;
};

/// The release command enqueues release of a memory object instance allocated
/// on Host or underlying framework.
class ReleaseCommand : public Command {
Expand Down Expand Up @@ -589,12 +561,6 @@ class ExecCGCommand : public Command {

detail::CG &getCG() const { return *MCommandGroup; }

// MEmptyCmd is only employed if this command refers to host-task.
// The mechanism of lookup for single EmptyCommand amongst users of
// host-task-representing command is unreliable. This unreliability roots in
// the cleanup process.
EmptyCommand *MEmptyCmd = nullptr;

// MFusionCommand is employed to mark a CG command as part of a kernel fusion
// and allows to refer back to the corresponding KernelFusionCommand if
// necessary.
Expand All @@ -604,8 +570,6 @@ class ExecCGCommand : public Command {

bool supportsPostEnqueueCleanup() const final;

bool readyForCleanup() const final;

private:
pi_int32 enqueueImp() final;

Expand All @@ -624,6 +588,7 @@ class UpdateHostRequirementCommand : public Command {
void printDot(std::ostream &Stream) const final;
const Requirement *getRequirement() const final { return &MDstReq; }
void emitInstrumentationData() final;
bool supportsPostEnqueueCleanup() const final;

private:
pi_int32 enqueueImp() final;
Expand Down
Loading