diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 7e086225ed97b..a256870a1058d 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -13,14 +13,7 @@ inline namespace _V1 { namespace detail { DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) - : CompileTimeKernelInfoTy(Info) { - init(Name.data()); -} - -void DeviceKernelInfo::init(std::string_view KernelName) { - auto &PM = detail::ProgramManager::getInstance(); - MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); -} + : CompileTimeKernelInfoTy(Info) {} template inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, @@ -50,6 +43,10 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded( assert(Info == *this); } +void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) { + assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos); + MImplicitLocalArgPos = Pos; +} } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 0533de2b72a55..710cab687bb32 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -96,10 +96,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { FastKernelSubcacheT &getKernelSubcache() { return MFastKernelSubcache; } - std::optional getImplicitLocalArgPos() const { + const std::optional &getImplicitLocalArgPos() const { return MImplicitLocalArgPos; } + // Implicit local argument position is used only for some backends, so this + // funciton allows setting it as more images are added. + void setImplicitLocalArgPos(int Pos); + private: bool isCompileTimeInfoSet() const { return KernelSize != 0; } diff --git a/sycl/source/detail/get_device_kernel_info.cpp b/sycl/source/detail/get_device_kernel_info.cpp index 9d4f30efeaf8a..d660b4499ce43 100644 --- a/sycl/source/detail/get_device_kernel_info.cpp +++ b/sycl/source/detail/get_device_kernel_info.cpp @@ -16,7 +16,7 @@ inline namespace _V1 { namespace detail { DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { - return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info); + return ProgramManager::getInstance().getDeviceKernelInfo(Info); } } // namespace detail diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 11031f792f347..484291366ee3d 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -31,8 +31,8 @@ kernel_impl::kernel_impl(Managed &&Kernel, MCreatedFromSource(true), MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this() : nullptr), - MIsInterop(true), MKernelArgMaskPtr{ArgMask}, - MInteropDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { + MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true), + MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) { ur_context_handle_t UrContext = nullptr; // Using the adapter from the passed ContextImpl getAdapter().call( @@ -59,9 +59,11 @@ kernel_impl::kernel_impl(Managed &&Kernel, MKernelBundleImpl(KernelBundleImpl.shared_from_this()), MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex}, - MInteropDeviceKernelInfo(MIsInterop - ? createCompileTimeKernelInfo(getName()) - : createCompileTimeKernelInfo()) { + MOwnsDeviceKernelInfo(checkOwnsDeviceKernelInfo()), + MDeviceKernelInfo(MOwnsDeviceKernelInfo + ? createCompileTimeKernelInfo(getName()) + : createCompileTimeKernelInfo()) { + // Enable USM indirect access for interop and non-sycl-jit source kernels. // sycl-jit kernels will enable this if needed through the regular kernel // path. @@ -121,6 +123,16 @@ std::string_view kernel_impl::getName() const { return MName; } +bool kernel_impl::checkOwnsDeviceKernelInfo() { + // If the image originates from something other than standard offline + // compilation, this kernel needs to own its info structure. + // We could also have a mixed origin image, in which case the device kernel + // info might reside in program manager. + return MDeviceImageImpl->getOriginMask() != ImageOriginSYCLOffline && + (!(MDeviceImageImpl->getOriginMask() & ImageOriginSYCLOffline) || + !ProgramManager::getInstance().tryGetDeviceKernelInfo(getName())); +} + bool kernel_impl::isBuiltInKernel(device_impl &Device) const { auto BuiltInKernels = Device.get_info(); if (BuiltInKernels.empty()) diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index da7de6138bf5a..e69946504f9b6 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -235,10 +235,11 @@ class kernel_impl { std::mutex *getCacheMutex() const { return MCacheMutex; } std::string_view getName() const; + bool checkOwnsDeviceKernelInfo(); DeviceKernelInfo &getDeviceKernelInfo() { - return MIsInterop - ? MInteropDeviceKernelInfo - : ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + return MOwnsDeviceKernelInfo + ? MDeviceKernelInfo + : ProgramManager::getInstance().getDeviceKernelInfo( std::string_view(getName())); } @@ -255,9 +256,11 @@ class kernel_impl { std::mutex *MCacheMutex = nullptr; mutable std::string MName; - // It is used for the interop kernels only. + // Used for images that aren't obtained with standard SYCL offline + // compilation. // For regular kernel we get DeviceKernelInfo from the ProgramManager. - DeviceKernelInfo MInteropDeviceKernelInfo; + bool MOwnsDeviceKernelInfo = false; + DeviceKernelInfo MDeviceKernelInfo; bool isBuiltInKernel(device_impl &Device) const; void checkIfValidForNumArgsInfoQuery() const; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index ba59d2cbb7a5a..31a60ca00ecdc 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1510,27 +1510,34 @@ void ProgramManager::cacheKernelImplicitLocalArg( Img.getImplicitLocalArg(); if (ImplicitLocalArgRange.isAvailable()) for (auto Prop : ImplicitLocalArgRange) { - m_KernelImplicitLocalArgPos[Prop->Name] = - DeviceBinaryProperty(Prop).asUint32(); + auto It = m_DeviceKernelInfoMap.find(Prop->Name); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32()); } } -DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( - const CompileTimeKernelInfoTy &Info) { +DeviceKernelInfo & +ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); - auto [Iter, Inserted] = m_DeviceKernelInfoMap.try_emplace(Info.Name, Info); - if (!Inserted) - Iter->second.setCompileTimeInfoIfNeeded(Info); - return Iter->second; + auto It = m_DeviceKernelInfoMap.find(Info.Name); + assert(It != m_DeviceKernelInfoMap.end()); + It->second.setCompileTimeInfoIfNeeded(Info); + return It->second; } DeviceKernelInfo & -ProgramManager::getOrCreateDeviceKernelInfo(std::string_view KernelName) { +ProgramManager::getDeviceKernelInfo(std::string_view KernelName) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); - CompileTimeKernelInfoTy DefaultCompileTimeInfo{KernelName}; - auto Result = - m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo); - return Result.first->second; + auto It = m_DeviceKernelInfoMap.find(KernelName); + assert(It != m_DeviceKernelInfoMap.end()); + return It->second; +} + +DeviceKernelInfo * +ProgramManager::tryGetDeviceKernelInfo(std::string_view KernelName) { + std::lock_guard Guard(m_DeviceKernelInfoMapMutex); + auto It = m_DeviceKernelInfoMap.find(KernelName); + return It != m_DeviceKernelInfoMap.end() ? &It->second : nullptr; } static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, @@ -1733,6 +1740,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); KernelIDs->push_back(It->second); + CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)}; + m_DeviceKernelInfoMap.try_emplace(std::string_view(name), + DefaultCompileTimeInfo); + // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; } @@ -1924,7 +1935,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { if (--RefCount == 0) { // TODO aggregate all these maps into a single one since their entries // share lifetime. - m_KernelImplicitLocalArgPos.erase(Name); m_DeviceKernelInfoMap.erase(Name); m_KernelNameRefCount.erase(RefCountIt); if (Name2IDIt != m_KernelName2KernelIDs.end()) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index c279fe4934830..9f83b864c3aa7 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -381,17 +381,11 @@ class ProgramManager { SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } - std::optional - kernelImplicitLocalArgPos(std::string_view KernelName) const { - auto it = m_KernelImplicitLocalArgPos.find(KernelName); - if (it != m_KernelImplicitLocalArgPos.end()) - return it->second; - return {}; - } + void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - DeviceKernelInfo & - getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); - DeviceKernelInfo &getOrCreateDeviceKernelInfo(std::string_view KernelName); + DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + DeviceKernelInfo &getDeviceKernelInfo(std::string_view KernelName); + DeviceKernelInfo *tryGetDeviceKernelInfo(std::string_view KernelName); std::set getRawDeviceImages(const std::vector &KernelIDs); @@ -420,9 +414,6 @@ class ProgramManager { /// Dumps image to current directory void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const; - /// Add info on kernels using local arg into cache - void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev); @@ -529,12 +520,6 @@ class ProgramManager { bool m_UseSpvFile = false; RTDeviceBinaryImageUPtr m_SpvFileImage; - // std::less<> is a transparent comparator that enabled comparison between - // different types without temporary key_type object creation. This includes - // standard overloads, such as comparison between std::string and - // std::string_view or just char*. - std::unordered_map m_KernelImplicitLocalArgPos; - // Map for storing device kernel information. Runtime lookup should be avoided // by caching the pointers when possible. std::unordered_map m_DeviceKernelInfoMap; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 152b36efa68f2..0733cbe332322 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2438,7 +2438,7 @@ static ur_result_t SetKernelParamsAndLaunch( applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } - std::optional ImplicitLocalArg = + const std::optional &ImplicitLocalArg = DeviceKernelInfo.getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5e3ca043a9031..77e110c39f312 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -488,7 +488,7 @@ detail::EventImplPtr handler::finalize() { // Fetch the device kernel info pointer if it hasn't been set (e.g. // in kernel bundle or free function cases). impl->MKernelData.setDeviceKernelInfoPtr( - &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + &detail::ProgramManager::getInstance().getDeviceKernelInfo( std::string_view(MKernelName))); } assert(impl->MKernelData.getKernelName() == MKernelName); diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp index c5ae870000ce9..1f64745ec36d7 100644 --- a/sycl/test-e2e/Config/kernel_from_file.cpp +++ b/sycl/test-e2e/Config/kernel_from_file.cpp @@ -1,11 +1,9 @@ // REQUIRES: target-spir -// FIXME Disabled fallback assert as it'll require either online linking or -// explicit offline linking step here // FIXME separate compilation requires -fno-sycl-dead-args-optimization // As we are doing a separate device compilation here, we need to explicitly // add the device lib instrumentation (itt_compiler_wrapper) -// RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict +// RUN: %clangxx -Wno-error=ignored-attributes -DUSED_KERNEL -fno-sycl-dead-args-optimization %cxx_std_optionc++17 -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict // >> ---- unbundle compiler wrapper and asan device objects // RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle // RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-asan%obj_ext -output=%t_asan.bc -unbundle %} @@ -13,7 +11,9 @@ // RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_asan.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %} // >> ---- translate to SPIR-V // RUN: llvm-spirv -o %t.spv %t_app.bc -// RUN: %clangxx -Wno-error=ignored-attributes %sycl_include -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -Xclang -verify-ignore-unexpected=note,warning %if preview-mode %{-Wno-unused-command-line-argument%} +// Need to perform full compilation here since the SYCL runtime uses image +// properties from the fat binary. +// RUN: %{build} -fno-sycl-dead-args-optimization -o %t.out // RUN: env SYCL_USE_KERNEL_SPV=%t.spv %{run} %t.out #include @@ -31,10 +31,15 @@ int main(int argc, char **argv) { event e = myQueue.submit([&](handler &cgh) { auto ptr = buf.get_access(cgh); - cgh.single_task([=]() { ptr[0]++; }); + cgh.single_task([=]() { +#ifdef USED_KERNEL + ptr[0]++; +#else + ptr[0]--; +#endif + }); }); e.wait_and_throw(); - } catch (sycl::exception const &e) { std::cerr << "SYCL exception caught:\n"; std::cerr << e.what() << "\n"; diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 8ffd8d0dbe5bd..655957f9adc00 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -15,6 +15,12 @@ class Kernel3; MOCK_INTEGRATION_HEADER(Kernel1) MOCK_INTEGRATION_HEADER(Kernel2) MOCK_INTEGRATION_HEADER(Kernel3) +static sycl::unittest::MockDeviceImage CommandGraphImgs[3] = { + sycl::unittest::generateDefaultImage({"Kernel1"}), + sycl::unittest::generateDefaultImage({"Kernel2"}), + sycl::unittest::generateDefaultImage({"Kernel3"})}; +static sycl::unittest::MockDeviceImageArray<3> CommandGraphImgArray{ + CommandGraphImgs}; using namespace sycl; using namespace sycl::ext::oneapi; diff --git a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp index 498e3c39582f1..1303d27851e36 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp @@ -14,7 +14,10 @@ using namespace sycl::ext::oneapi; class MockKernel; MOCK_INTEGRATION_HEADER(MockKernel) - +static sycl::unittest::MockDeviceImage MockKernelImg = + sycl::unittest::generateDefaultImage({"MockKernel"}); +static sycl::unittest::MockDeviceImageArray<1> MockKernelImgArray{ + &MockKernelImg}; /** * Checks that the operators and constructors of graph related classes meet the * common reference semantics. diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 083fdf05adc17..aa7f0f023154b 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -71,10 +71,6 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_EliminatedKernelArgMasks; } - std::unordered_map &getKernelImplicitLocalArgPos() { - return m_KernelImplicitLocalArgPos; - } - std::unordered_map> & getHostPipes() { @@ -304,8 +300,6 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, "Kernel name reference count " + CommentPostfix); EXPECT_EQ(PM.getEliminatedKernelArgMask().size(), ExpectedImgCount) << "Eliminated kernel arg mask " + CommentPostfix; - EXPECT_EQ(PM.getKernelImplicitLocalArgPos().size(), ExpectedEntryCount) - << "Kernel implicit local arg pos " + CommentPostfix; if (!MultipleImgsPerEntryTestCase) { // FIXME expected to fail for now, device globals cleanup seems to be @@ -355,10 +349,6 @@ TEST(ImageRemoval, BaseContainers) { generateRefName("B", "HostPipe").c_str()); PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(), generateRefName("C", "HostPipe").c_str()); - std::vector KernelNames = - generateRefNames({"A", "B", "C"}, "Kernel"); - for (const std::string &Name : KernelNames) - PM.getOrCreateDeviceKernelInfo(Name); checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(), {"A", "B", "C"}, "check failed before removal"); @@ -382,8 +372,6 @@ TEST(ImageRemoval, MultipleImagesPerEntry) { convertAndAddImages(PM, ImagesToRemoveSameEntries, NativeImagesForRemoval, TestBinaries); - std::string KernelName = generateRefName("A", "Kernel"); - PM.getOrCreateDeviceKernelInfo(KernelName); checkAllInvolvedContainers( PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(), /*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal",