Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ typedef enum {
using pi_mem_flags = pi_bitfield;
// Access
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = CL_MEM_READ_WRITE;
constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RO = CL_MEM_READ_ONLY;
// Host pointer
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = CL_MEM_USE_HOST_PTR;
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = CL_MEM_COPY_HOST_PTR;
Expand Down
21 changes: 21 additions & 0 deletions sycl/source/detail/device_image_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,29 @@ class device_image_impl {

const context &get_context() const noexcept { return MContext; }

void set_kernel_ids(std::vector<kernel_id> KernelIDs) noexcept {
MKernelIDs = std::move(KernelIDs);
}

std::vector<kernel_id> &get_kernel_ids_ref() noexcept { return MKernelIDs; }

std::vector<unsigned char> &get_spec_const_blob_ref() noexcept {
return MSpecConstsBlob;
}

RT::PiMem &get_spec_const_buffer_ref() noexcept {
std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
if (nullptr == MSpecConstsBuffer) {
const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
Plugin.call<PiApiKind::piMemBufferCreate>(
detail::getSyclObjImpl(MContext)->getHandleRef(),
PI_MEM_FLAGS_ACCESS_RO | PI_MEM_FLAGS_HOST_PTR_USE,
MSpecConstsBlob.size(), MSpecConstsBlob.data(), &MSpecConstsBuffer,
nullptr);
}
return MSpecConstsBuffer;
}

const std::map<std::string, std::vector<SpecConstDescT>> &
get_spec_const_data_ref() const noexcept {
return MSpecConstSymMap;
Expand Down Expand Up @@ -244,6 +261,10 @@ class device_image_impl {
// Binary blob which can have values of all specialization constants in the
// image
std::vector<unsigned char> MSpecConstsBlob;
// Buffer containing binary blob which can have values of all specialization
// constants in the image, it is using for storing non-native specialization
// constants
RT::PiMem MSpecConstsBuffer = nullptr;
// Contains map of spec const names to their descriptions + offsets in
// the MSpecConstsBlob
std::map<std::string, std::vector<SpecConstDescT>> MSpecConstSymMap;
Expand Down
42 changes: 41 additions & 1 deletion sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,42 @@ class kernel_bundle_impl {
kernel
get_kernel(const kernel_id &KernelID,
const std::shared_ptr<detail::kernel_bundle_impl> &Self) const {
// TODO: remove this workaround after AOT binaries contain kernel IDs by
// default
bool HasKernelIdProp = std::any_of(
MDeviceImages.begin(), MDeviceImages.end(),
[](const auto &DeviceImage) {
return !getSyclObjImpl(DeviceImage)->get_kernel_ids().empty();
});
if (!HasKernelIdProp) {
for (const auto &DeviceImage : MDeviceImages) {
size_t Size;
const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
if (nullptr == getSyclObjImpl(DeviceImage)->get_program_ref()) {
continue;
}
Plugin.call<PiApiKind::piProgramGetInfo>(
getSyclObjImpl(DeviceImage)->get_program_ref(),
PI_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size);
std::string RawResult(Size, ' ');
Plugin.call<PiApiKind::piProgramGetInfo>(
getSyclObjImpl(DeviceImage)->get_program_ref(),
PI_PROGRAM_INFO_KERNEL_NAMES, RawResult.size(), &RawResult[0],
nullptr);
// Get rid of the null terminator
RawResult.pop_back();
std::vector<std::string> KernelNames(split_string(RawResult, ';'));
std::vector<kernel_id> KernelIDs;
for (const auto &KernelName : KernelNames) {
KernelIDs.push_back(detail::createSyclObjFromImpl<kernel_id>(
std::make_shared<detail::kernel_id_impl>(KernelName)));
}

std::sort(KernelIDs.begin(), KernelIDs.end(), detail::LessByNameComp{});

getSyclObjImpl(DeviceImage)->set_kernel_ids(KernelIDs);
}
}

auto It = std::find_if(MDeviceImages.begin(), MDeviceImages.end(),
[&KernelID](const device_image_plain &DeviceImage) {
Expand Down Expand Up @@ -373,7 +409,11 @@ class kernel_bundle_impl {
});
}

const device_image_plain *begin() const { return &MDeviceImages.front(); }
const device_image_plain *begin() const {
assert(!MDeviceImages.empty() && "MDeviceImages can't be empty");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please clarify why MDeviceImages can't be empty?
I believe this should behave as std::vector which has end() == begin() if empty() is true.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree that this is not a valid assert. I'll submit a fix as a separate pull request.

Copy link
Contributor Author

@dm-vodopyanov dm-vodopyanov May 4, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MDeviceImage can't be empty because MDeviceImages.front() is UB in case of MDeviceImages.empty() == true.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dm-vodopyanov it's UB to access front, but it doesn't mean, that kernel_bundle must have any device image at all. The spec mentions empty() member function for kernel_bundle, which @romanovvlad refers to: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_the_kernel_bundle_class

// UB in case MDeviceImages is empty
return &MDeviceImages.front();
}

const device_image_plain *end() const { return &MDeviceImages.back() + 1; }

Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1309,7 +1309,8 @@ void ProgramManager::bringSYCLDeviceImagesToState(
break;
}
case bundle_state::executable:
// Device image is already in the desired state.
DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I assume this build call is optionally needed to do native device code linking? Why not call to link then? Please add a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Build is needed here to create device image which contain spec constants; as device image is in executable state because of AOT, build instead of link (object state) is used.

/*PropList=*/{});
break;
}
break;
Expand Down
39 changes: 27 additions & 12 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1636,8 +1636,9 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) {
}

pi_result ExecCGCommand::SetKernelParamsAndLaunch(
CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
CGExecKernel *ExecKernel,
std::shared_ptr<device_image_impl> DeviceImageImpl, RT::PiKernel Kernel,
NDRDescT &NDRDesc, std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
ProgramManager::KernelArgMask EliminatedArgMask) {
vector_class<ArgDesc> &Args = ExecKernel->MArgs;
// TODO this is not necessary as long as we can guarantee that the arguments
Expand Down Expand Up @@ -1692,9 +1693,21 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
break;
}
case kernel_param_kind_t::kind_specialization_constants_buffer: {
throw cl::sycl::feature_not_supported(
"SYCL2020 specialization constants are not yet fully supported",
PI_INVALID_OPERATION);
if (MQueue->is_host()) {
throw cl::sycl::feature_not_supported(
"SYCL2020 specialization constants are not yet supported on host "
"device",
PI_INVALID_OPERATION);
}
if (DeviceImageImpl != nullptr) {
RT::PiMem SpecConstsBuffer =
DeviceImageImpl->get_spec_const_buffer_ref();
Plugin.call<PiApiKind::piKernelSetArg>(
Kernel, NextTrueIndex, sizeof(RT::PiMem), &SpecConstsBuffer);
} else {
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, NextTrueIndex,
sizeof(RT::PiMem), nullptr);
}
break;
}
}
Expand Down Expand Up @@ -1917,6 +1930,8 @@ cl_int ExecCGCommand::enqueueImp() {
bool KnownProgram = true;

std::shared_ptr<kernel_impl> SyclKernelImpl;
std::shared_ptr<device_image_impl> DeviceImageImpl;

// Use kernel_bundle is available
if (KernelBundleImplPtr) {

Expand All @@ -1930,9 +1945,7 @@ cl_int ExecCGCommand::enqueueImp() {
SyclKernelImpl = detail::getSyclObjImpl(SyclKernel);

Kernel = SyclKernelImpl->getHandleRef();

std::shared_ptr<device_image_impl> DeviceImageImpl =
SyclKernelImpl->getDeviceImage();
DeviceImageImpl = SyclKernelImpl->getDeviceImage();

Program = DeviceImageImpl->get_program_ref();

Expand Down Expand Up @@ -1980,11 +1993,13 @@ cl_int ExecCGCommand::enqueueImp() {
if (KernelMutex != nullptr) {
// For cacheable kernels, we use per-kernel mutex
std::lock_guard<std::mutex> Lock(*KernelMutex);
Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
Event, EliminatedArgMask);
Error =
SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc,
RawEvents, Event, EliminatedArgMask);
} else {
Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
Event, EliminatedArgMask);
Error =
SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc,
RawEvents, Event, EliminatedArgMask);
}

if (PI_SUCCESS != Error) {
Expand Down
7 changes: 4 additions & 3 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,9 +519,10 @@ class ExecCGCommand : public Command {
AllocaCommandBase *getAllocaForReq(Requirement *Req);

pi_result SetKernelParamsAndLaunch(
CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
ProgramManager::KernelArgMask EliminatedArgMask);
CGExecKernel *ExecKernel,
std::shared_ptr<device_image_impl> DeviceImageImpl, RT::PiKernel Kernel,
NDRDescT &NDRDesc, std::vector<RT::PiEvent> &RawEvents,
RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask);

std::unique_ptr<detail::CG> MCommandGroup;

Expand Down
10 changes: 7 additions & 3 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ handler::getOrInsertHandlerKernelBundle(bool Insert) const {
if (!KernelBundleImpPtr && Insert) {
KernelBundleImpPtr = detail::getSyclObjImpl(
get_kernel_bundle<bundle_state::input>(MQueue->get_context()));
if (KernelBundleImpPtr->empty()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT. It would be nice to have a comment explaining this logic.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll submit some comments as a separate PR.

KernelBundleImpPtr = detail::getSyclObjImpl(
get_kernel_bundle<bundle_state::executable>(MQueue->get_context()));
}

detail::ExtendedMemberT EMember = {
detail::ExtendedMembersType::HANDLER_KERNEL_BUNDLE, KernelBundleImpPtr};
Expand Down Expand Up @@ -336,9 +340,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
break;
}
case kernel_param_kind_t::kind_specialization_constants_buffer: {
throw cl::sycl::feature_not_supported(
"SYCL2020 specialization constants are not yet fully supported",
PI_INVALID_OPERATION);
MArgs.emplace_back(
kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
Index + IndexShift);
break;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
// and parallel_for_work_group to verify that this code compiles and runs
// correctly with user's lambda with and without sycl::kernel_handler argument

// TODO: enable cuda support when non-native spec constants started to be
// supported
// UNSUPPORTED: cuda

#include <CL/sycl.hpp>

int main() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#include <sycl/sycl.hpp>

#include <cmath>

class Kernel1Name;
class Kernel2Name;

struct TestStruct {
int a;
int b;
};

const static sycl::specialization_id<int> SpecConst1{42};
const static sycl::specialization_id<int> SpecConst2{42};
const static sycl::specialization_id<TestStruct> SpecConst3{TestStruct{42, 42}};
const static sycl::specialization_id<short> SpecConst4{42};

int main() {
sycl::queue Q;

// No support for host device so far
if (Q.is_host())
return 0;

{
sycl::buffer<int, 1> Buf{sycl::range{1}};
Q.submit([&](sycl::handler &CGH) {
CGH.set_specialization_constant<SpecConst2>(1);
auto Acc = Buf.get_access<sycl::access::mode::read_write>(CGH);
CGH.single_task<class Kernel3Name>([=](sycl::kernel_handler KH) {
Acc[0] = KH.get_specialization_constant<SpecConst2>();
});
});
auto Acc = Buf.get_access<sycl::access::mode::read>();
assert(Acc[0] == 1);
}

{
sycl::buffer<TestStruct, 1> Buf{sycl::range{1}};
Q.submit([&](sycl::handler &CGH) {
auto Acc = Buf.get_access<sycl::access::mode::read_write>(CGH);
CGH.set_specialization_constant<SpecConst3>(TestStruct{1, 2});
const auto SC = CGH.get_specialization_constant<SpecConst4>();
assert(SC == 42);
CGH.single_task<class Kernel4Name>([=](sycl::kernel_handler KH) {
Acc[0] = KH.get_specialization_constant<SpecConst3>();
});
});
auto Acc = Buf.get_access<sycl::access::mode::read>();
assert(Acc[0].a == 1 && Acc[0].b == 2);
}

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// REQUIRES: aoc, accelerator

// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test checks correctness of SYCL2020 non-native specialization constants
// on accelerator device
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

// This test checks correctness of compiling and running of application with
// kernel lambdas containing kernel_handler arguments and w/o usage of
// specialization constants in AOT mode

#include <CL/sycl.hpp>

int main() {
sycl::queue q;

q.submit([&](sycl::handler &cgh) {
cgh.single_task<class KernelSingleTaskWithKernelHandler>(
[=](sycl::kernel_handler kh) {});
});

q.submit([&](sycl::handler &cgh) {
cgh.parallel_for<class KernelParallelForNDItemWithKernelHandler>(
sycl::nd_range<3>(sycl::range<3>(4, 4, 4), sycl::range<3>(2, 2, 2)),
[=](sycl::nd_item<3> item, sycl::kernel_handler kh) {});
});

// parallel_for_work_group with kernel_handler arg
q.submit([&](sycl::handler &cgh) {
cgh.parallel_for_work_group<
class KernelParallelForWorkGroupWithoutKernelHandler>(
sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2),
[=](sycl::group<3> myGroup, sycl::kernel_handler kh) {
myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {});
myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {});
});
});
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// REQUIRES: opencl-aot, cpu

// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

// This test checks correctness of SYCL2020 non-native specialization constants
// on CPU device
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out
// RUN: env SYCL_DEVICE_FILTER=cuda %t.out

// This test checks correctness of SYCL2020 non-native specialization constants
// on CUDA device
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// REQUIRES: ocloc, gpu
// UNSUPPORTED: cuda
// CUDA is not compatible with SPIR.

// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device *" %S/Inputs/common.cpp -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// This test checks correctness of SYCL2020 non-native specialization constants
// on GPU device