Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/source/backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext,
// this by pre-building the device image and extracting kernel info. We can't
// do the same to user images, since they may contain references to undefined
// symbols (e.g. when kernel_bundle is supposed to be joined with another).
std::vector<kernel_id> KernelIDs{};
std::shared_ptr<std::vector<kernel_id>> KernelIDs{new std::vector<kernel_id>};
auto DevImgImpl = std::make_shared<device_image_impl>(
nullptr, TargetContext, Devices, State, KernelIDs, PiProgram);
device_image_plain DevImg{DevImgImpl};
Expand Down
20 changes: 13 additions & 7 deletions sycl/source/detail/device_image_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ __SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {

template <class T> struct LessByHash {
bool operator()(const T &LHS, const T &RHS) const {
return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
}
};

// The class is impl counterpart for sycl::device_image
// It can represent a program in different states, kernel_id's it has and state
// of specialization constants for it
Expand All @@ -51,7 +57,7 @@ class device_image_impl {

device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
std::vector<device> Devices, bundle_state State,
std::vector<kernel_id> KernelIDs, RT::PiProgram Program)
std::shared_ptr<std::vector<kernel_id>> KernelIDs, RT::PiProgram Program)
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State), MProgram(Program),
MKernelIDs(std::move(KernelIDs)) {
Expand All @@ -60,7 +66,7 @@ class device_image_impl {

device_image_impl(const RTDeviceBinaryImage *BinImage, context Context,
std::vector<device> Devices, bundle_state State,
std::vector<kernel_id> KernelIDs, RT::PiProgram Program,
std::shared_ptr<std::vector<kernel_id>> KernelIDs, RT::PiProgram Program,
const SpecConstMapT &SpecConstMap,
const std::vector<unsigned char> &SpecConstsBlob)
: MBinImage(BinImage), MContext(std::move(Context)),
Expand All @@ -69,8 +75,8 @@ class device_image_impl {
MSpecConstSymMap(SpecConstMap) {}

bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
return std::binary_search(MKernelIDs.begin(), MKernelIDs.end(),
KernelIDCand, LessByNameComp{});
return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(),
KernelIDCand, LessByHash<kernel_id>{});
}

bool has_kernel(const kernel_id &KernelIDCand,
Expand All @@ -83,7 +89,7 @@ class device_image_impl {
}

const std::vector<kernel_id> &get_kernel_ids() const noexcept {
return MKernelIDs;
return *MKernelIDs;
}

bool has_specialization_constants() const noexcept {
Expand Down Expand Up @@ -176,7 +182,7 @@ class device_image_impl {

const context &get_context() const noexcept { return MContext; }

std::vector<kernel_id> &get_kernel_ids_ref() noexcept { return MKernelIDs; }
std::shared_ptr<std::vector<kernel_id>> &get_kernel_ids_ref() noexcept { return MKernelIDs; }

std::vector<unsigned char> &get_spec_const_blob_ref() noexcept {
return MSpecConstsBlob;
Expand Down Expand Up @@ -312,7 +318,7 @@ class device_image_impl {
RT::PiProgram MProgram = nullptr;
// List of kernel ids available in this image, elements should be sorted
// according to LessByNameComp
std::vector<kernel_id> MKernelIDs;
std::shared_ptr<std::vector<kernel_id>> MKernelIDs;

// A mutex for sycnhronizing access to spec constants blob. Mutable because
// needs to be locked in the const method for getting spec constant value.
Expand Down
6 changes: 0 additions & 6 deletions sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ __SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {

template <class T> struct LessByHash {
bool operator()(const T &LHS, const T &RHS) const {
return getSyclObjImpl(LHS) < getSyclObjImpl(RHS);
}
};

static bool checkAllDevicesAreInContext(const std::vector<device> &Devices,
const context &Context) {
const std::vector<device> &ContextDevices = Context.get_devices();
Expand Down
215 changes: 106 additions & 109 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1070,40 +1070,22 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
ArgMaskMap[Info->Name] =
createKernelArgMask(pi::DeviceBinaryProperty(Info).asByteArray());
}
// Use the entry information if it's available
if (EntriesB != EntriesE) {
// The kernel sets for any pair of images are either disjoint or
// identical, look up the kernel set using the first kernel name...
StrToKSIdMap &KSIdMap = m_KernelSets[M];
auto KSIdIt = KSIdMap.find(EntriesB->name);
if (KSIdIt != KSIdMap.end()) {
for (_pi_offload_entry EntriesIt = EntriesB + 1; EntriesIt != EntriesE;
++EntriesIt)
assert(KSIdMap[EntriesIt->name] == KSIdIt->second &&
"Kernel sets are not disjoint");
auto &Imgs = m_DeviceImages[KSIdIt->second];
assert(Imgs && "Device image vector should have been already created");

cacheKernelUsesAssertInfo(M, *Img);

Imgs->push_back(std::move(Img));
continue;
}
// ... or create the set first if it hasn't been
KernelSetId KSId = getNextKernelSetId();
{
// Fill maps for kernel bundles
if (EntriesB != EntriesE) {
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);

// Register all exported symbols
auto ExportedSymbols = Img->getExportedSymbols();
for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols)
m_ExportedSymbols.insert(ExportedSymbol->Name);


m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);


for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
++EntriesIt) {
auto Result = KSIdMap.insert(std::make_pair(EntriesIt->name, KSId));
(void)Result;
assert(Result.second && "Kernel sets are not disjoint");

// Skip creating unique kernel ID if it is a service kernel.
// SYCL service kernels are identified by having
Expand All @@ -1122,12 +1104,59 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
continue;

// ... and create a unique kernel ID for the entry
std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
sycl::kernel_id KernelID =
detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
m_KernelIDs.insert(
std::make_pair(EntriesIt->name, std::move(KernelID)));
auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
if(It == m_KernelName2KernelIDs.end()) {
std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
sycl::kernel_id KernelID =
detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);

It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name,
KernelID);
}

m_KernelName2KernelIDs.insert(
std::make_pair(EntriesIt->name, It->second));

m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
m_BinImg2KernelIDs[Img.get()]->push_back(It->second);

}

// Sort kernel ids for faster search
std::sort(m_BinImg2KernelIDs[Img.get()]->begin(),
m_BinImg2KernelIDs[Img.get()]->end(), LessByHash<kernel_id>{});
}

// TODO: Remove the code below once program manager works trought kernel
// bundles only
// Use the entry information if it's available
if (EntriesB != EntriesE) {
// The kernel sets for any pair of images are either disjoint or
// identical, look up the kernel set using the first kernel name...
StrToKSIdMap &KSIdMap = m_KernelSets[M];
auto KSIdIt = KSIdMap.find(EntriesB->name);
if (KSIdIt != KSIdMap.end()) {
for (_pi_offload_entry EntriesIt = EntriesB + 1; EntriesIt != EntriesE;
++EntriesIt)
assert(KSIdMap[EntriesIt->name] == KSIdIt->second &&
"Kernel sets are not disjoint");
auto &Imgs = m_DeviceImages[KSIdIt->second];
assert(Imgs && "Device image vector should have been already created");

cacheKernelUsesAssertInfo(M, *Img);

Imgs->push_back(std::move(Img));
continue;
}
// ... or create the set first if it hasn't been
KernelSetId KSId = getNextKernelSetId();
{
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);

for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
++EntriesIt) {
KSIdMap.insert(std::make_pair(EntriesIt->name, KSId));
}
}
m_DeviceImages[KSId].reset(new std::vector<RTDeviceBinaryImageUPtr>());
Expand Down Expand Up @@ -1347,8 +1376,8 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage,
kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) {
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);

auto KernelID = m_KernelIDs.find(KernelName);
if (KernelID == m_KernelIDs.end())
auto KernelID = m_KernelName2KernelIDs.find(KernelName);
if (KernelID == m_KernelName2KernelIDs.end())
throw runtime_error("No kernel found with the specified name",
PI_INVALID_KERNEL_NAME);

Expand All @@ -1359,8 +1388,9 @@ std::vector<kernel_id> ProgramManager::getAllSYCLKernelIDs() {
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);

std::vector<sycl::kernel_id> AllKernelIDs;
AllKernelIDs.reserve(m_KernelIDs.size());
for (std::pair<std::string, kernel_id> KernelID : m_KernelIDs) {
AllKernelIDs.reserve(m_KernelName2KernelIDs.size());
// TODO: Replace with inserts of vectors from m_BinImg2KernelIDs ?
for (std::pair<std::string, kernel_id> KernelID : m_KernelName2KernelIDs) {
AllKernelIDs.push_back(KernelID.second);
}
return AllKernelIDs;
Expand All @@ -1382,80 +1412,58 @@ kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) {
std::vector<device_image_plain>
ProgramManager::getSYCLDeviceImagesWithCompatibleState(
const context &Ctx, const std::vector<device> &Devs,
bundle_state TargetState) {

// Collect raw device images
std::vector<RTDeviceBinaryImage *> BinImages;
{
bundle_state TargetState, const std::vector<kernel_id> &KernelIDs) {

// Collect unique raw device images taking into account kernel ids passed
// TODO: Can we avoid repacking?
std::set<RTDeviceBinaryImage *> BinImages;
if (!KernelIDs.empty()) {
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
for (const kernel_id &KID : KernelIDs) {
auto Range = m_KernelIDs2BinImage.equal_range(KID);
for (auto It = Range.first, End = Range.second; It != End; ++It)
BinImages.insert(It->second);
}
} else {
std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
for (auto &ImagesSets : m_DeviceImages) {
auto &ImagesUPtrs = *ImagesSets.second.get();
for (auto &ImageUPtr : ImagesUPtrs) {
const RTDeviceBinaryImage *BinImage = ImageUPtr.get();
const bundle_state ImgState = getBinImageState(BinImage);

// Ignore images with incompatible state. Image is considered compatible
// with a target state if an image is already in the target state or can
// be brought to target state by compiling/linking/building.
//
// Example: an image in "executable" state is not compatible with
// "input" target state - there is no operation to convert the image it
// to "input" state. An image in "input" state is compatible with
// "executable" target state because it can be built to get into
// "executable" state.
if (ImgState > TargetState)
continue;

BinImages.push_back(ImageUPtr.get());
}
for (auto &ImageUPtr : ImagesUPtrs)
BinImages.insert(ImageUPtr.get());
}
}
// TODO: Add a diagnostic on multiple device images with conflicting kernel
// names, and remove OSModuleHandle usage, as conflicting kernel names will be
// an error.
assert(BinImages.size() > 0 && "Expected to find at least on device image");

// TODO: Cache device_image objects
// Create SYCL device image from those that have compatible state and at least
// one device
std::vector<device_image_plain> SYCLDeviceImages;
for (RTDeviceBinaryImage *BinImage : BinImages) {
const bundle_state ImgState = getBinImageState(BinImage);

// Ignore images with incompatible state. Image is considered compatible
// with a target state if an image is already in the target state or can
// be brought to target state by compiling/linking/building.
//
// Example: an image in "executable" state is not compatible with
// "input" target state - there is no operation to convert the image it
// to "input" state. An image in "input" state is compatible with
// "executable" target state because it can be built to get into
// "executable" state.
if (ImgState > TargetState)
continue;

for (const sycl::device &Dev : Devs) {
if (!compatibleWithDevice(BinImage, Dev))
continue;

std::vector<sycl::kernel_id> KernelIDs;
std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
// Collect kernel names for the image
pi_device_binary DevBin =
const_cast<pi_device_binary>(&BinImage->getRawData());
{
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
for (_pi_offload_entry EntriesIt = DevBin->EntriesBegin;
EntriesIt != DevBin->EntriesEnd; ++EntriesIt) {
auto KernelID = m_KernelIDs.find(EntriesIt->name);

if (KernelID == m_KernelIDs.end()) {
// Service kernels and exported symbols do not have kernel IDs
assert((m_ServiceKernels.find(EntriesIt->name) !=
m_ServiceKernels.end() ||
m_ExportedSymbols.find(EntriesIt->name) !=
m_ExportedSymbols.end()) &&
"Kernel ID in device binary missing from cache");
continue;
}

KernelIDs.push_back(KernelID->second);
}
KernelIDs = m_BinImg2KernelIDs[BinImage];
// If the image does not contain any non-service kernels we can skip it.
if (KernelIDs->empty())
continue;
}

// If the image does not contain any non-service kernels we can skip it.
if (KernelIDs.empty())
continue;

// device_image_impl expects kernel ids to be sorted for fast search
std::sort(KernelIDs.begin(), KernelIDs.end(), LessByNameComp{});

DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);

Expand Down Expand Up @@ -1556,8 +1564,9 @@ std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
{
std::lock_guard<std::mutex> BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex);

for (const kernel_id &ID : KernelIDs) {
if (m_BuiltInKernelIDs.find(ID.get_name()) != m_BuiltInKernelIDs.end())
for (auto &It : m_BuiltInKernelIDs) {
if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) !=
KernelIDs.end())
throw sycl::exception(make_error_code(errc::kernel_argument),
"Attempting to use a built-in kernel. They are "
"not fully supported");
Expand All @@ -1566,19 +1575,7 @@ std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(

// Collect device images with compatible state
std::vector<device_image_plain> DeviceImages =
getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);

// Filter out images that have no kernel_ids specified
auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(),
[&KernelIDs](const device_image_plain &Image) {
return std::none_of(
KernelIDs.begin(), KernelIDs.end(),
[&Image](const sycl::kernel_id &KernelID) {
return Image.has_kernel(KernelID);
});
});

DeviceImages.erase(It, DeviceImages.end());
getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);

// Brind device images with compatible state to desired state
bringSYCLDeviceImagesToState(DeviceImages, TargetState);
Expand Down Expand Up @@ -1683,15 +1680,15 @@ ProgramManager::link(const std::vector<device_image_plain> &DeviceImages,
Plugin.reportPiError(Error, "link()");
}

std::vector<kernel_id> KernelIDs;
std::shared_ptr<std::vector<kernel_id>> KernelIDs{new std::vector<kernel_id>};
for (const device_image_plain &DeviceImage : DeviceImages) {
// Duplicates are not expected here, otherwise piProgramLink should fail
KernelIDs.insert(KernelIDs.end(),
getSyclObjImpl(DeviceImage)->get_kernel_ids().begin(),
getSyclObjImpl(DeviceImage)->get_kernel_ids().end());
KernelIDs->insert(KernelIDs->end(),
getSyclObjImpl(DeviceImage)->get_kernel_ids_ref()->begin(),
getSyclObjImpl(DeviceImage)->get_kernel_ids_ref()->end());
}
// device_image_impl expects kernel ids to be sorted for fast search
std::sort(KernelIDs.begin(), KernelIDs.end(), LessByNameComp{});
std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash<kernel_id>{});

DeviceImageImplPtr ExecutableImpl =
std::make_shared<detail::device_image_impl>(
Expand Down
Loading