From 6e98e3f68e40769c8ba5a049a85b483eaac45a66 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 22 May 2024 04:12:01 -0700 Subject: [PATCH 01/52] not buildable: remove host device from device_impl.* Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/device_impl.cpp | 38 +++++------------------------- sycl/source/detail/device_impl.hpp | 14 ----------- 2 files changed, 6 insertions(+), 46 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 532cffe22500f..d043a59d9cebd 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -17,11 +17,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -device_impl::device_impl() - : MIsHostDevice(true), MPlatform(platform_impl::getHostPlatformImpl()), - // assert is natively supported by host - MIsAssertFailSupported(true) {} - device_impl::device_impl(pi_native_handle InteropDeviceHandle, const PluginPtr &Plugin) : device_impl(InteropDeviceHandle, nullptr, nullptr, Plugin) {} @@ -39,7 +34,7 @@ device_impl::device_impl(sycl::detail::pi::PiDevice Device, device_impl::device_impl(pi_native_handle InteropDeviceHandle, sycl::detail::pi::PiDevice Device, PlatformImplPtr Platform, const PluginPtr &Plugin) - : MDevice(Device), MIsHostDevice(false), + : MDevice(Device), MDeviceHostBaseTime(std::make_pair(0, 0)) { bool InteroperabilityConstructor = false; @@ -84,13 +79,11 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle, } device_impl::~device_impl() { - if (!MIsHostDevice) { - // TODO catch an exception and put it to list of asynchronous exceptions - const PluginPtr &Plugin = getPlugin(); - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck(MDevice); - __SYCL_CHECK_OCL_CODE_NO_EXC(Err); - } + // TODO catch an exception and put it to list of asynchronous exceptions + const PluginPtr &Plugin = getPlugin(); + sycl::detail::pi::PiResult Err = + Plugin->call_nocheck(MDevice); + __SYCL_CHECK_OCL_CODE_NO_EXC(Err); } bool device_impl::is_affinity_supported( @@ -101,11 +94,6 @@ bool device_impl::is_affinity_supported( } cl_device_id device_impl::get() const { - if (MIsHostDevice) { - throw invalid_object_error( - "This instance of device doesn't support OpenCL interoperability.", - PI_ERROR_INVALID_DEVICE); - } // TODO catch an exception and put it to list of asynchronous exceptions getPlugin()->call(MDevice); return pi::cast(getNative()); @@ -180,9 +168,6 @@ device_impl::get_backend_info() const { } bool device_impl::has_extension(const std::string &ExtensionName) const { - if (MIsHostDevice) - // TODO: implement extension management for host device; - return false; std::string AllExtensionNames = get_device_info_string(PiInfoCode::value); return (AllExtensionNames.find(ExtensionName) != std::string::npos); @@ -224,8 +209,6 @@ device_impl::create_sub_devices(const cl_device_partition_property *Properties, } std::vector device_impl::create_sub_devices(size_t ComputeUnits) const { - assert(!MIsHostDevice && "Partitioning is not supported on host."); - if (!is_partition_supported(info::partition_property::partition_equally)) { throw sycl::feature_not_supported( "Device does not support " @@ -248,8 +231,6 @@ std::vector device_impl::create_sub_devices(size_t ComputeUnits) const { std::vector device_impl::create_sub_devices(const std::vector &Counts) const { - assert(!MIsHostDevice && "Partitioning is not supported on host."); - if (!is_partition_supported(info::partition_property::partition_by_counts)) { throw sycl::feature_not_supported( "Device does not support " @@ -291,8 +272,6 @@ device_impl::create_sub_devices(const std::vector &Counts) const { std::vector device_impl::create_sub_devices( info::partition_affinity_domain AffinityDomain) const { - assert(!MIsHostDevice && "Partitioning is not supported on host."); - if (!is_partition_supported( info::partition_property::partition_by_affinity_domain)) { throw sycl::feature_not_supported( @@ -319,8 +298,6 @@ std::vector device_impl::create_sub_devices( } std::vector device_impl::create_sub_devices() const { - assert(!MIsHostDevice && "Partitioning is not supported on host."); - if (!is_partition_supported( info::partition_property::ext_intel_partition_by_cslice)) { throw sycl::feature_not_supported( @@ -789,9 +766,6 @@ uint64_t device_impl::getCurrentDeviceTime() { uint64_t HostTime = duration_cast(steady_clock::now().time_since_epoch()) .count(); - if (MIsHostDevice) { - return HostTime; - } // To account for potential clock drift between host clock and device clock. // The value set is arbitrary: 200 seconds diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 981b1e059a30e..2526647152892 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -65,10 +65,6 @@ class device_impl { /// /// \return non-constant reference to PI device sycl::detail::pi::PiDevice &getHandleRef() { - if (MIsHostDevice) - throw invalid_object_error("This instance of device is a host instance", - PI_ERROR_INVALID_DEVICE); - return MDevice; } @@ -78,18 +74,9 @@ class device_impl { /// /// \return constant reference to PI device const sycl::detail::pi::PiDevice &getHandleRef() const { - if (MIsHostDevice) - throw invalid_object_error("This instance of device is a host instance", - PI_ERROR_INVALID_DEVICE); - return MDevice; } - /// Check if SYCL device is a host device - /// - /// \return true if SYCL device is a host device - bool is_host() const { return MIsHostDevice; } - /// Check if device is a CPU device /// /// \return true if SYCL device is a CPU device @@ -327,7 +314,6 @@ class device_impl { sycl::detail::pi::PiDevice MDevice = 0; sycl::detail::pi::PiDeviceType MType; sycl::detail::pi::PiDevice MRootDevice = nullptr; - bool MIsHostDevice; PlatformImplPtr MPlatform; bool MIsAssertFailSupported = false; mutable std::string MDeviceName; From abe4586ce16a07b69a1d2c662679697754db00a2 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 22 May 2024 04:13:51 -0700 Subject: [PATCH 02/52] not-buildable: remove getHostPlatformImpl Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/context_impl.cpp | 3 --- sycl/source/detail/device_info.hpp | 4 ---- sycl/source/detail/platform_impl.cpp | 6 ------ sycl/source/detail/platform_impl.hpp | 8 -------- 4 files changed, 21 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 388c312305d4a..c2124456dae24 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -177,9 +177,6 @@ uint32_t context_impl::get_info() const { this->getPlugin()); } template <> platform context_impl::get_info() const { - if (is_host()) - return createSyclObjFromImpl( - platform_impl::getHostPlatformImpl()); return createSyclObjFromImpl(MPlatform); } template <> diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index a8769b69e83cc..61cb09e1b0b38 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -1802,10 +1802,6 @@ get_device_info_host() { return {}; } -template <> inline platform get_device_info_host() { - return createSyclObjFromImpl(platform_impl::getHostPlatformImpl()); -} - template <> inline std::string get_device_info_host() { return "SYCL host device"; } diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 2bdfab26676d9..9700fde466803 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -30,12 +30,6 @@ namespace detail { using PlatformImplPtr = std::shared_ptr; -PlatformImplPtr platform_impl::getHostPlatformImpl() { - static PlatformImplPtr HostImpl = std::make_shared(); - - return HostImpl; -} - PlatformImplPtr platform_impl::getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, const PluginPtr &Plugin) { diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index 34537c7191af6..0bb8d1ab77e2f 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -192,14 +192,6 @@ class platform_impl { getOrMakeDeviceImpl(sycl::detail::pi::PiDevice PiDevice, const std::shared_ptr &PlatformImpl); - /// Static functions that help maintain platform uniquess and - /// equality of comparison - - /// Returns the host platform impl - /// - /// \return the host platform impl - static std::shared_ptr getHostPlatformImpl(); - /// Queries the cache to see if the specified PiPlatform has been seen /// before. If so, return the cached platform_impl, otherwise create a new /// one and cache it. From 6a0a25005b1b9b831419e94ed56b0bb8f15b4017 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 22 May 2024 04:18:11 -0700 Subject: [PATCH 03/52] not buildable: remove get_device_info_host Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/device_impl.cpp | 3 - sycl/source/detail/device_info.hpp | 1032 ---------------------------- 2 files changed, 1035 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index d043a59d9cebd..2e87300425c20 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -105,9 +105,6 @@ platform device_impl::get_platform() const { template typename Param::return_type device_impl::get_info() const { - if (is_host()) { - return get_device_info_host(); - } return get_device_info( MPlatform->getOrMakeDeviceImpl(MDevice, MPlatform)); } diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 61cb09e1b0b38..9322b65128652 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -1272,1038 +1272,6 @@ typename Param::return_type get_device_info(const DeviceImplPtr &Dev) { return get_device_info_impl::get(Dev); } -// SYCL host device information - -// Default template is disabled, all possible instantiations are -// specified explicitly. -template -inline typename Param::return_type get_device_info_host() = delete; - -template <> -inline std::vector get_device_info_host() { - return std::vector(); -} - -template <> -inline ext::oneapi::experimental::architecture -get_device_info_host() { - return ext::oneapi::experimental::architecture::x86_64; -} - -template <> -inline info::device_type get_device_info_host() { - return info::device_type::host; -} - -template <> inline uint32_t get_device_info_host() { - return 0x8086; -} - -template <> -inline uint32_t get_device_info_host() { - return std::thread::hardware_concurrency(); -} - -template <> -inline uint32_t get_device_info_host() { - return 3; -} - -template <> -inline range<1> get_device_info_host>() { - // current value is the required minimum - return {1}; -} - -template <> -inline range<2> get_device_info_host>() { - // current value is the required minimum - return {1, 1}; -} - -template <> -inline range<3> get_device_info_host>() { - // current value is the required minimum - return {1, 1, 1}; -} - -template <> -inline constexpr size_t get_device_info_host< - ext::oneapi::experimental::info::device::max_global_work_groups>() { - // See handler.hpp for the maximum value : - return static_cast((std::numeric_limits::max)()); -} - -template <> -inline id<1> get_device_info_host< - ext::oneapi::experimental::info::device::max_work_groups<1>>() { - // See handler.hpp for the maximum value : - static constexpr size_t Limit = get_device_info_host< - ext::oneapi::experimental::info::device::max_global_work_groups>(); - return {Limit}; -} - -template <> -inline id<2> get_device_info_host< - ext::oneapi::experimental::info::device::max_work_groups<2>>() { - // See handler.hpp for the maximum value : - static constexpr size_t Limit = get_device_info_host< - ext::oneapi::experimental::info::device::max_global_work_groups>(); - return {Limit, Limit}; -} - -template <> -inline id<3> get_device_info_host< - ext::oneapi::experimental::info::device::max_work_groups<3>>() { - // See handler.hpp for the maximum value : - static constexpr size_t Limit = get_device_info_host< - ext::oneapi::experimental::info::device::max_global_work_groups>(); - return {Limit, Limit, Limit}; -} - -// TODO:remove with deprecated feature -// device::get_info -template <> -inline constexpr size_t -get_device_info_host() { - return get_device_info_host< - ext::oneapi::experimental::info::device::max_global_work_groups>(); -} - -// TODO:remove with deprecated feature -// device::get_info -template <> -inline id<1> -get_device_info_host() { - - return get_device_info_host< - ext::oneapi::experimental::info::device::max_work_groups<1>>(); -} - -// TODO:remove with deprecated feature -// device::get_info -template <> -inline id<2> -get_device_info_host() { - return get_device_info_host< - ext::oneapi::experimental::info::device::max_work_groups<2>>(); -} - -// TODO:remove with deprecated feature -// device::get_info -template <> -inline id<3> -get_device_info_host() { - return get_device_info_host< - ext::oneapi::experimental::info::device::max_work_groups<3>>(); -} - -template <> -inline size_t get_device_info_host() { - // current value is the required minimum - return 1; -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update when appropriate - return 1; -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update when appropriate - return 1; -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update when appropriate - return 1; -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update when appropriate - return 1; -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update when appropriate - return 1; -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update when appropriate - return 1; -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update when appropriate - return 0; -} - -template <> -inline uint32_t get_device_info_host() { - return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Char); -} - -template <> -inline uint32_t -get_device_info_host() { - return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Short); -} - -template <> -inline uint32_t get_device_info_host() { - return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Int); -} - -template <> -inline uint32_t get_device_info_host() { - return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Long); -} - -template <> -inline uint32_t -get_device_info_host() { - return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Float); -} - -template <> -inline uint32_t -get_device_info_host() { - return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Double); -} - -template <> -inline uint32_t get_device_info_host() { - return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Half); -} - -template <> -inline uint32_t get_device_info_host() { - return PlatformUtil::getMaxClockFrequency(); -} - -template <> inline uint32_t get_device_info_host() { - return sizeof(void *) * 8; -} - -template <> -inline uint64_t get_device_info_host() { - return static_cast(OSUtil::getOSMemSize()); -} - -template <> -inline uint64_t get_device_info_host() { - // current value is the required minimum - const uint64_t a = get_device_info_host() / 4; - const uint64_t b = 128ul * 1024 * 1024; - return (a > b) ? a : b; -} - -template <> inline bool get_device_info_host() { - return true; -} - -template <> inline bool get_device_info_host() { - return false; -} - -template <> -inline std::vector -get_device_info_host() { - return {memory_order::relaxed, memory_order::acquire, memory_order::release, - memory_order::acq_rel, memory_order::seq_cst}; -} - -template <> -inline std::vector -get_device_info_host() { - return {memory_order::relaxed, memory_order::acquire, memory_order::release, - memory_order::acq_rel}; -} - -template <> -inline std::vector -get_device_info_host() { - return {memory_scope::work_item, memory_scope::sub_group, - memory_scope::work_group, memory_scope::device, memory_scope::system}; -} - -template <> -inline std::vector -get_device_info_host() { - return {memory_scope::work_item, memory_scope::sub_group, - memory_scope::work_group, memory_scope::device, memory_scope::system}; -} - -template <> -inline bool -get_device_info_host() { - return false; -} - -template <> -inline uint32_t get_device_info_host() { - // current value is the required minimum - return 128; -} - -template <> -inline uint32_t get_device_info_host() { - // current value is the required minimum - return 8; -} - -template <> -inline size_t get_device_info_host() { - // SYCL guarantees at least 8192. Some devices already known to provide more - // than that (i.e. it is 16384 for opencl:gpu), which may create issues during - // image object allocation on host. - // Using any fixed number (i.e. 16384) brings the risk of having similar - // issues on newer devices in future. Thus it does not make sense limiting - // the returned value on host. Practially speaking the returned value on host - // depends only on memory required for the image, which also depends on - // the image channel_type and the image height. Both are not known in this - // query, thus it becomes user's responsibility to choose proper image - // parameters depending on similar query to (non-host device) and amount - // of available/allocatable memory. - return std::numeric_limits::max(); -} - -template <> -inline size_t get_device_info_host() { - // SYCL guarantees at least 8192. Some devices already known to provide more - // than that (i.e. it is 16384 for opencl:gpu), which may create issues during - // image object allocation on host. - // Using any fixed number (i.e. 16384) brings the risk of having similar - // issues on newer devices in future. Thus it does not make sense limiting - // the returned value on host. Practially speaking the returned value on host - // depends only on memory required for the image, which also depends on - // the image channel_type and the image width. Both are not known in this - // query, thus it becomes user's responsibility to choose proper image - // parameters depending on similar query to (non-host device) and amount - // of available/allocatable memory. - return std::numeric_limits::max(); -} - -template <> -inline size_t get_device_info_host() { - // SYCL guarantees at least 8192. Some devices already known to provide more - // than that (i.e. it is 16384 for opencl:gpu), which may create issues during - // image object allocation on host. - // Using any fixed number (i.e. 16384) brings the risk of having similar - // issues on newer devices in future. Thus it does not make sense limiting - // the returned value on host. Practially speaking the returned value on host - // depends only on memory required for the image, which also depends on - // the image channel_type and the image height/depth. Both are not known - // in this query, thus it becomes user's responsibility to choose proper image - // parameters depending on similar query to (non-host device) and amount - // of available/allocatable memory. - return std::numeric_limits::max(); -} - -template <> -inline size_t get_device_info_host() { - // SYCL guarantees at least 8192. Some devices already known to provide more - // than that (i.e. it is 16384 for opencl:gpu), which may create issues during - // image object allocation on host. - // Using any fixed number (i.e. 16384) brings the risk of having similar - // issues on newer devices in future. Thus it does not make sense limiting - // the returned value on host. Practially speaking the returned value on host - // depends only on memory required for the image, which also depends on - // the image channel_type and the image width/depth. Both are not known - // in this query, thus it becomes user's responsibility to choose proper image - // parameters depending on similar query to (non-host device) and amount - // of available/allocatable memory. - return std::numeric_limits::max(); -} - -template <> -inline size_t get_device_info_host() { - // SYCL guarantees at least 8192. Some devices already known to provide more - // than that (i.e. it is 16384 for opencl:gpu), which may create issues during - // image object allocation on host. - // Using any fixed number (i.e. 16384) brings the risk of having similar - // issues on newer devices in future. Thus it does not make sense limiting - // the returned value on host. Practially speaking the returned value on host - // depends only on memory required for the image, which also depends on - // the image channel_type and the image height/width, which are not known - // in this query, thus it becomes user's responsibility to choose proper image - // parameters depending on similar query to (non-host device) and amount - // of available/allocatable memory. - return std::numeric_limits::max(); -} - -template <> -inline size_t get_device_info_host() { - // Not supported in SYCL - return 0; -} - -template <> -inline size_t get_device_info_host() { - // current value is the required minimum - return 2048; -} - -template <> inline uint32_t get_device_info_host() { - // current value is the required minimum - return 16; -} - -template <> -inline size_t get_device_info_host() { - // current value is the required minimum - return 1024; -} - -template <> -inline uint32_t get_device_info_host() { - return 1024; -} - -template <> -inline std::vector -get_device_info_host() { - // current value is the required minimum - return {}; -} - -template <> -inline std::vector -get_device_info_host() { - // current value is the required minimum - return {info::fp_config::round_to_nearest, info::fp_config::inf_nan}; -} - -template <> -inline std::vector -get_device_info_host() { - // current value is the required minimum - return {info::fp_config::fma, info::fp_config::round_to_nearest, - info::fp_config::round_to_zero, info::fp_config::round_to_inf, - info::fp_config::inf_nan, info::fp_config::denorm}; -} - -template <> -inline info::global_mem_cache_type -get_device_info_host() { - return info::global_mem_cache_type::read_write; -} - -template <> -inline uint32_t -get_device_info_host() { - return PlatformUtil::getMemCacheLineSize(); -} - -template <> -inline uint64_t get_device_info_host() { - return PlatformUtil::getMemCacheSize(); -} - -template <> -inline uint64_t get_device_info_host() { - // current value is the required minimum - return 64 * 1024; -} - -template <> -inline uint32_t get_device_info_host() { - // current value is the required minimum - return 8; -} - -template <> -inline info::local_mem_type -get_device_info_host() { - return info::local_mem_type::global; -} - -template <> -inline uint64_t get_device_info_host() { - // current value is the required minimum - return 32 * 1024; -} - -template <> -inline bool get_device_info_host() { - return false; -} - -template <> -inline bool get_device_info_host() { - return true; -} - -template <> -inline size_t get_device_info_host() { - typedef std::ratio_divide - ns_period; - return ns_period::num / ns_period::den; -} - -template <> inline bool get_device_info_host() { - union { - uint16_t a; - uint8_t b[2]; - } u = {0x0100}; - - return u.b[1]; -} - -template <> inline bool get_device_info_host() { - return true; -} - -template <> -inline bool get_device_info_host() { - return true; -} - -template <> -inline bool get_device_info_host() { - return true; -} - -template <> -inline std::vector -get_device_info_host() { - return {info::execution_capability::exec_kernel}; -} - -template <> inline bool get_device_info_host() { - return true; -} - -template <> -inline std::vector -get_device_info_host() { - return {}; -} - -template <> -inline std::vector -get_device_info_host() { - return {}; -} - -template <> inline std::string get_device_info_host() { - return "SYCL host device"; -} - -template <> inline std::string get_device_info_host() { - return ""; -} - -template <> -inline std::string get_device_info_host() { - return "1.2"; -} - -template <> inline std::string get_device_info_host() { - return "FULL PROFILE"; -} - -template <> inline std::string get_device_info_host() { - return "1.2"; -} - -template <> -inline std::string get_device_info_host() { - return "not applicable"; -} - -template <> -inline std::vector -get_device_info_host() { - // TODO update when appropriate - return {}; -} - -template <> -inline size_t get_device_info_host() { - // current value is the required minimum - return 1024 * 1024; -} - -template <> -inline bool get_device_info_host() { - return false; -} - -template <> inline device get_device_info_host() { - throw invalid_object_error( - "Partitioning to subdevices of the host device is not implemented", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint32_t -get_device_info_host() { - // TODO update once subdevice creation is enabled - return 1; -} - -template <> -inline std::vector -get_device_info_host() { - // TODO update once subdevice creation is enabled - return {}; -} - -template <> -inline std::vector -get_device_info_host() { - // TODO update once subdevice creation is enabled - return {}; -} - -template <> -inline info::partition_property -get_device_info_host() { - return info::partition_property::no_partition; -} - -template <> -inline info::partition_affinity_domain -get_device_info_host() { - // TODO update once subdevice creation is enabled - return info::partition_affinity_domain::not_applicable; -} - -template <> -inline uint32_t get_device_info_host() { - // TODO update once subdevice creation is enabled - return 1; -} - -template <> -inline uint32_t get_device_info_host() { - // TODO update once subgroups are enabled - throw runtime_error("Sub-group feature is not supported on HOST device.", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline std::vector -get_device_info_host() { - // TODO update once subgroups are enabled - throw runtime_error("Sub-group feature is not supported on HOST device.", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline bool -get_device_info_host() { - // TODO update once subgroups are enabled - throw runtime_error("Sub-group feature is not supported on HOST device.", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline bool get_device_info_host() { - return false; -} - -template <> -inline std::string get_device_info_host() { - throw runtime_error( - "Backend version feature is not supported on HOST device.", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline bool get_device_info_host() { - return true; -} - -template <> -inline bool get_device_info_host() { - return true; -} - -template <> -inline bool get_device_info_host() { - return true; -} - -template <> -inline bool -get_device_info_host() { - return true; -} - -template <> -inline bool get_device_info_host() { - return true; -} - -template <> -inline bool get_device_info_host() { - return false; -} - -// Specializations for intel extensions for Level Zero low-level -// detail device descriptors (not support on host). -template <> -inline uint32_t get_device_info_host() { - throw runtime_error("Obtaining the device ID is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline std::string -get_device_info_host() { - throw runtime_error( - "Obtaining the PCI address is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline uint32_t get_device_info_host() { - throw runtime_error("Obtaining the EU count is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the EU SIMD width is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline uint32_t get_device_info_host() { - throw runtime_error( - "Obtaining the number of slices is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error("Obtaining the number of subslices per slice is not " - "supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the EU count per subslice is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the HW threads count per EU is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline uint64_t -get_device_info_host() { - throw runtime_error( - "Obtaining the maximum memory bandwidth is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -template <> -inline detail::uuid_type -get_device_info_host() { - throw runtime_error( - "Obtaining the device uuid is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline std::string get_device_info_host() { - throw runtime_error( - "Obtaining the PCI address is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline uint32_t get_device_info_host() { - throw runtime_error("Obtaining the EU count is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the EU SIMD width is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline uint32_t get_device_info_host() { - throw runtime_error( - "Obtaining the number of slices is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error("Obtaining the number of subslices per slice is not " - "supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the EU count per subslice is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the HW threads count per EU is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline uint64_t -get_device_info_host() { - throw runtime_error( - "Obtaining the maximum memory bandwidth is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} -// TODO:Move to namespace ext::intel::info::device -template <> inline bool get_device_info_host() { - return false; -} - -// TODO: Remove with deprecated feature -// device::get_info() -template <> -inline detail::uuid_type -get_device_info_host() { - throw runtime_error( - "Obtaining the device uuid is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint64_t get_device_info_host() { - throw runtime_error( - "Obtaining the device free memory is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the device memory clock rate is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint32_t -get_device_info_host() { - throw runtime_error( - "Obtaining the device memory bus width is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline int32_t -get_device_info_host() { - throw runtime_error( - "Obtaining max compute queue indices is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline bool get_device_info_host< - ext::codeplay::experimental::info::device::supports_fusion>() { - // No support for fusion on the host device. - return false; -} - -template <> -inline uint32_t get_device_info_host< - ext::codeplay::experimental::info::device::max_registers_per_work_group>() { - throw runtime_error("Obtaining the maximum number of available registers per " - "work-group is not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint32_t get_device_info_host< - ext::oneapi::experimental::info::device::image_row_pitch_align>() { - throw runtime_error("Obtaining image pitch alignment is not " - "supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint32_t get_device_info_host< - ext::oneapi::experimental::info::device::max_image_linear_row_pitch>() { - throw runtime_error("Obtaining max image linear pitch is not " - "supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline std::vector -get_device_info_host< - ext::oneapi::experimental::info::device::matrix_combinations>() { - throw runtime_error("Obtaining matrix combinations is not " - "supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint32_t get_device_info_host< - ext::oneapi::experimental::info::device::max_image_linear_width>() { - throw runtime_error("Obtaining max image linear width is not " - "supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline uint32_t get_device_info_host< - ext::oneapi::experimental::info::device::max_image_linear_height>() { - throw runtime_error("Obtaining max image linear height is not " - "supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline float get_device_info_host< - ext::oneapi::experimental::info::device::mipmap_max_anisotropy>() { - throw runtime_error("Bindless image mipaps are not supported on HOST device", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline std::vector get_device_info_host< - ext::oneapi::experimental::info::device::component_devices>() { - throw runtime_error("Host devices cannot be component devices.", - PI_ERROR_INVALID_DEVICE); -} - -template <> -inline sycl::device get_device_info_host< - ext::oneapi::experimental::info::device::composite_device>() { - throw runtime_error("Host devices cannot be composite devices.", - PI_ERROR_INVALID_DEVICE); -} - -// Returns the list of all progress guarantees that can be requested for -// work_groups from the coordination level of root_group when using host device. -// First it calls getHostProgressGuarantee to get the strongest guarantee -// available and then calls getProgressGuaranteesUpTo to get a list of all -// guarantees that are either equal to the strongest guarantee or weaker than -// it. The next 5 definitions follow the same model but for different scopes. -template <> -inline std::vector -get_device_info_host< - ext::oneapi::experimental::info::device::work_group_progress_capabilities< - ext::oneapi::experimental::execution_scope::root_group>>() { - - using execution_scope = ext::oneapi::experimental::execution_scope; - using ReturnT = - std::vector; - return device_impl::getProgressGuaranteesUpTo( - device_impl::getHostProgressGuarantee(execution_scope::work_group, - execution_scope::root_group)); -} - -template <> -inline std::vector -get_device_info_host< - ext::oneapi::experimental::info::device::sub_group_progress_capabilities< - ext::oneapi::experimental::execution_scope::root_group>>() { - - using execution_scope = ext::oneapi::experimental::execution_scope; - using ReturnT = - std::vector; - return device_impl::getProgressGuaranteesUpTo( - device_impl::getHostProgressGuarantee(execution_scope::sub_group, - execution_scope::root_group)); -} - -template <> -inline std::vector -get_device_info_host< - ext::oneapi::experimental::info::device::sub_group_progress_capabilities< - ext::oneapi::experimental::execution_scope::work_group>>() { - using execution_scope = ext::oneapi::experimental::execution_scope; - using ReturnT = - std::vector; - return device_impl::getProgressGuaranteesUpTo( - device_impl::getHostProgressGuarantee(execution_scope::sub_group, - execution_scope::work_group)); -} - -template <> -inline std::vector -get_device_info_host< - ext::oneapi::experimental::info::device::work_item_progress_capabilities< - ext::oneapi::experimental::execution_scope::root_group>>() { - - using execution_scope = ext::oneapi::experimental::execution_scope; - using ReturnT = - std::vector; - return device_impl::getProgressGuaranteesUpTo( - device_impl::getHostProgressGuarantee(execution_scope::work_item, - execution_scope::root_group)); -} - -template <> -inline std::vector -get_device_info_host< - ext::oneapi::experimental::info::device::work_item_progress_capabilities< - ext::oneapi::experimental::execution_scope::work_group>>() { - using execution_scope = ext::oneapi::experimental::execution_scope; - using ReturnT = - std::vector; - return device_impl::getProgressGuaranteesUpTo( - device_impl::getHostProgressGuarantee(execution_scope::work_item, - execution_scope::work_group)); -} - -template <> -inline std::vector -get_device_info_host< - ext::oneapi::experimental::info::device::work_item_progress_capabilities< - ext::oneapi::experimental::execution_scope::sub_group>>() { - using execution_scope = ext::oneapi::experimental::execution_scope; - using ReturnT = - std::vector; - return device_impl::getProgressGuaranteesUpTo( - device_impl::getHostProgressGuarantee(execution_scope::work_item, - execution_scope::sub_group)); -} - // Returns the list of all progress guarantees that can be requested for // work_groups from the coordination level of root_group when using the device // given by Dev. First it calls getProgressGuarantee to get the strongest From 35b682216afe064e98bf8c6f2c45334d99a5120a Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 22 May 2024 04:23:01 -0700 Subject: [PATCH 04/52] not-buildable: remove is_host from context_impl.* Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/context_impl.cpp | 19 +++++-------------- sycl/source/detail/context_impl.hpp | 1 - 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index c2124456dae24..87663c4e10775 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -34,7 +34,6 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler, MContext(nullptr), MPlatform(detail::getSyclObjImpl(Device.get_platform())), MPropList(PropList), - MHostContext(detail::getSyclObjImpl(Device)->is_host()), MSupportBufferLocationByDevices(NotChecked) { MKernelProgramCache.setContextPtr(this); } @@ -43,7 +42,7 @@ context_impl::context_impl(const std::vector Devices, async_handler AsyncHandler, const property_list &PropList) : MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(Devices), - MContext(nullptr), MPlatform(), MPropList(PropList), MHostContext(false), + MContext(nullptr), MPlatform(), MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) { MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); std::vector DeviceIds; @@ -88,7 +87,7 @@ context_impl::context_impl(sycl::detail::pi::PiContext PiContext, bool OwnedByRuntime) : MOwnedByRuntime(OwnedByRuntime), MAsyncHandler(AsyncHandler), MDevices(DeviceList), MContext(PiContext), MPlatform(), - MHostContext(false), MSupportBufferLocationByDevices(NotChecked) { + MSupportBufferLocationByDevices(NotChecked) { if (!MDevices.empty()) { MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); } else { @@ -132,18 +131,11 @@ context_impl::context_impl(sycl::detail::pi::PiContext PiContext, } cl_context context_impl::get() const { - if (MHostContext) { - throw invalid_object_error( - "This instance of context doesn't support OpenCL interoperability.", - PI_ERROR_INVALID_CONTEXT); - } // TODO catch an exception and put it to list of asynchronous exceptions getPlugin()->call(MContext); return pi::cast(MContext); } -bool context_impl::is_host() const { return MHostContext; } - context_impl::~context_impl() { // Free all events associated with the initialization of device globals. for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) @@ -159,10 +151,9 @@ context_impl::~context_impl() { assert(LibProg.second && "Null program must not be kept in the cache"); getPlugin()->call(LibProg.second); } - if (!MHostContext) { - // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin()->call_nocheck(MContext); - } + + // TODO catch an exception and put it to list of asynchronous exceptions + getPlugin()->call_nocheck(MContext); } const async_handler &context_impl::get_async_handler() const { diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index a1e383f721e31..af20236fc4b23 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -272,7 +272,6 @@ class context_impl { sycl::detail::pi::PiContext MContext; PlatformImplPtr MPlatform; property_list MPropList; - bool MHostContext; CachedLibProgramsT MCachedLibPrograms; std::mutex MCachedLibProgramsMutex; mutable KernelProgramCache MKernelProgramCache; From 77c749c6ea54b35b5324bfe163460279b3039930 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 22 May 2024 04:29:12 -0700 Subject: [PATCH 05/52] not-buildable: remove is_host from event_impl.* Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 91 +++++++++++++------------------ sycl/source/detail/event_impl.hpp | 3 +- 2 files changed, 38 insertions(+), 56 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 7442cd4ccfe7a..e187be3563f5b 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -37,20 +37,9 @@ void event_impl::ensureContextInitialized() { if (MIsContextInitialized) return; - if (MHostEvent) { - QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue(); - this->setContextImpl(detail::getSyclObjImpl(HostQueue->get_context())); - } else { - const device SyclDevice; - this->setContextImpl(detail::queue_impl::getDefaultOrNew( - detail::getSyclObjImpl(SyclDevice))); - } -} - -bool event_impl::is_host() { - // Treat all devices that don't support interoperability as host devices to - // avoid attempts to call method get on such events. - return MHostEvent; + const device SyclDevice; + this->setContextImpl(detail::queue_impl::getDefaultOrNew( + detail::getSyclObjImpl(SyclDevice))); } event_impl::~event_impl() { @@ -59,7 +48,7 @@ event_impl::~event_impl() { } void event_impl::waitInternal(bool *Success) { - if (!MHostEvent && MEvent) { + if (MEvent) { // Wait for the native event sycl::detail::pi::PiResult Err = getPlugin()->call_nocheck(1, &MEvent); @@ -92,7 +81,7 @@ void event_impl::waitInternal(bool *Success) { } void event_impl::setComplete() { - if (MHostEvent || !MEvent) { + if (!MEvent) { { std::unique_lock lock(MMutex); #ifndef NDEBUG @@ -137,7 +126,6 @@ const PluginPtr &event_impl::getPlugin() { void event_impl::setStateIncomplete() { MState = HES_NotComplete; } void event_impl::setContextImpl(const ContextImplPtr &Context) { - MHostEvent = Context->is_host(); MContext = Context; MIsContextInitialized = true; } @@ -145,7 +133,7 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { event_impl::event_impl(sycl::detail::pi::PiEvent Event, const context &SyclContext) : MIsContextInitialized(true), MEvent(Event), - MContext(detail::getSyclObjImpl(SyclContext)), MHostEvent(false), + MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true), MState(HES_Complete) { if (MContext->is_host()) { @@ -317,7 +305,7 @@ event_impl::get_profiling_info() { // made by forcing the re-sync of submit time to start time is less than // 0.5ms. These timing values were obtained empirically using an integrated // Intel GPU). - if (MEventFromSubmittedExecCommandBuffer && !MHostEvent && MEvent) { + if (MEventFromSubmittedExecCommandBuffer && MEvent) { uint64_t StartTime = get_event_profiling_info( this->getHandleRef(), this->getPlugin()); @@ -336,20 +324,19 @@ event_impl::get_profiling_info() { if (isNOP() && MSubmitTime) return MSubmitTime; - if (!MHostEvent) { - if (MEvent) { - auto StartTime = - get_event_profiling_info( + if (MEvent) { + auto StartTime = + get_event_profiling_info( + this->getHandleRef(), this->getPlugin()); + if (!MFallbackProfiling) { + return StartTime; + } else { + auto DeviceBaseTime = + get_event_profiling_info( this->getHandleRef(), this->getPlugin()); - if (!MFallbackProfiling) { - return StartTime; - } else { - auto DeviceBaseTime = - get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); - return MHostBaseTime - DeviceBaseTime + StartTime; - } + return MHostBaseTime - DeviceBaseTime + StartTime; } + return 0; } if (!MHostProfilingInfo) @@ -368,19 +355,17 @@ uint64_t event_impl::get_profiling_info() { if (isNOP() && MSubmitTime) return MSubmitTime; - if (!MHostEvent) { - if (MEvent) { - auto EndTime = - get_event_profiling_info( + if (MEvent) { + auto EndTime = + get_event_profiling_info( + this->getHandleRef(), this->getPlugin()); + if (!MFallbackProfiling) { + return EndTime; + } else { + auto DeviceBaseTime = + get_event_profiling_info( this->getHandleRef(), this->getPlugin()); - if (!MFallbackProfiling) { - return EndTime; - } else { - auto DeviceBaseTime = - get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); - return MHostBaseTime - DeviceBaseTime + EndTime; - } + return MHostBaseTime - DeviceBaseTime + EndTime; } return 0; } @@ -393,7 +378,7 @@ uint64_t event_impl::get_profiling_info() { } template <> uint32_t event_impl::get_info() { - if (!MHostEvent && MEvent) { + if (MEvent) { return get_event_info(this->getHandleRef(), this->getPlugin()); } @@ -406,17 +391,15 @@ event_impl::get_info() { if (MState == HES_Discarded) return info::event_command_status::ext_oneapi_unknown; - if (!MHostEvent) { - // Command is enqueued and PiEvent is ready - if (MEvent) - return get_event_info( - this->getHandleRef(), this->getPlugin()); - // Command is blocked and not enqueued, PiEvent is not assigned yet - else if (MCommand) - return sycl::info::event_command_status::submitted; - } + // Command is enqueued and PiEvent is ready + if (MEvent) + return get_event_info( + this->getHandleRef(), this->getPlugin()); + // Command is blocked and not enqueued, PiEvent is not assigned yet + else if (MCommand) + return sycl::info::event_command_status::submitted; - return MHostEvent && MState.load() != HES_Complete + return MState.load() != HES_Complete ? sycl::info::event_command_status::submitted : info::event_command_status::complete; } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index f33c160f9df97..08bb15cff6ff8 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -49,7 +49,7 @@ class event_impl { /// Normally constructs a host event, use std::nullopt to instead instantiate /// a device event. event_impl(std::optional State = HES_Complete) - : MIsInitialized(false), MHostEvent(State), MIsFlushed(true), + : MIsInitialized(false), MIsFlushed(true), MState(State.value_or(HES_Complete)) { // Need to fail in event() constructor if there are problems with the // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept @@ -364,7 +364,6 @@ class event_impl { uint64_t MSubmitTime = 0; uint64_t MHostBaseTime = 0; ContextImplPtr MContext; - bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; void *MCommand = nullptr; std::weak_ptr MQueue; From 6e7142097db4e014c7a12e576c2af6d124675ed1 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 22 May 2024 04:31:22 -0700 Subject: [PATCH 06/52] not-buildable: update is_host for API objects to be easily removed Signed-off-by: Tikhomirova, Kseniya --- sycl/source/context.cpp | 5 ++--- sycl/source/device.cpp | 5 ++--- sycl/source/event.cpp | 5 ++--- sycl/source/kernel.cpp | 5 ++--- sycl/source/platform.cpp | 6 ++---- sycl/source/queue.cpp | 5 ++--- 6 files changed, 12 insertions(+), 19 deletions(-) diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index 3273c4f3056c2..c24a6c1ec2079 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -138,9 +138,8 @@ context::get_backend_info() const { cl_context context::get() const { return impl->get(); } bool context::is_host() const { - bool IsHost = impl->is_host(); - assert(!IsHost && "context::is_host should not be called in implementation."); - return IsHost; + assert(true && "context::is_host should not be called in implementation."); + return false; } backend context::get_backend() const noexcept { return impl->getBackend(); } diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 70aa37aad26a2..a3a88ebf6636a 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -71,9 +71,8 @@ std::vector device::get_devices(info::device_type deviceType) { cl_device_id device::get() const { return impl->get(); } bool device::is_host() const { - bool IsHost = impl->is_host(); - assert(!IsHost && "device::is_host should not be called in implementation."); - return IsHost; + assert(true && "device::is_host should not be called in implementation."); + return false; } bool device::is_cpu() const { return impl->is_cpu(); } diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index a7bae8055c10b..12b4a7e68164e 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -38,9 +38,8 @@ bool event::operator==(const event &rhs) const { return rhs.impl == impl; } bool event::operator!=(const event &rhs) const { return !(*this == rhs); } bool event::is_host() const { - bool IsHost = impl->is_host(); - assert(!IsHost && "event::is_host should not be called in implementation."); - return IsHost; + assert(true && "event::is_host should not be called in implementation."); + return false; } void event::wait() { impl->wait(impl); } diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index ff14c0a879078..bc842f6e596a5 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -31,9 +31,8 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext) cl_kernel kernel::get() const { return impl->get(); } bool kernel::is_host() const { - bool IsHost = impl->is_host(); - assert(!IsHost && "kernel::is_host should not be called in implementation."); - return IsHost; + assert(true && "kernel::is_host should not be called in implementation."); + return false; } context kernel::get_context() const { diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index a2ee714952be9..9a15943213ec6 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -41,10 +41,8 @@ bool platform::has_extension(const std::string &ExtensionName) const { } bool platform::is_host() const { - bool IsHost = impl->is_host(); - assert(!IsHost && - "platform::is_host should not be called in implementation."); - return IsHost; + assert(true && "platform::is_host should not be called in implementation."); + return false; } std::vector platform::get_devices(info::device_type DeviceType) const { diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 15d7f11fcb42d..6a66cce267aa1 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -96,9 +96,8 @@ queue::ext_oneapi_get_graph() const { } bool queue::is_host() const { - bool IsHost = impl->is_host(); - assert(!IsHost && "queue::is_host should not be called in implementation."); - return IsHost; + assert(true && "queue::is_host should not be called in implementation."); + return false; } void queue::throw_asynchronous() { impl->throw_asynchronous(); } From 7e5abe966b8ebbfee9e0adcc7ce935cd864c21b8 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 22 May 2024 08:53:47 -0700 Subject: [PATCH 07/52] not-buildable: update most obvious places Signed-off-by: Tikhomirova, Kseniya --- sycl/source/context.cpp | 37 ++++-------- sycl/source/detail/event_impl.cpp | 27 +++------ sycl/source/detail/event_impl.hpp | 13 ++-- sycl/source/detail/scheduler/commands.cpp | 60 +++---------------- sycl/source/detail/scheduler/commands.hpp | 7 +-- .../source/detail/scheduler/graph_builder.cpp | 4 +- sycl/source/detail/scheduler/scheduler.cpp | 24 +------- sycl/source/detail/scheduler/scheduler.hpp | 8 --- sycl/source/handler.cpp | 9 +-- 9 files changed, 39 insertions(+), 150 deletions(-) diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index c24a6c1ec2079..70b12836fc297 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -56,31 +56,20 @@ context::context(const std::vector &DeviceList, throw invalid_parameter_error("DeviceList is empty.", PI_ERROR_INVALID_VALUE); } - auto NonHostDeviceIter = std::find_if_not( - DeviceList.begin(), DeviceList.end(), [&](const device &CurrentDevice) { - return detail::getSyclObjImpl(CurrentDevice)->is_host(); - }); - if (NonHostDeviceIter == DeviceList.end()) - impl = std::make_shared(DeviceList[0], AsyncHandler, + + const auto &RefPlatform = + detail::getSyclObjImpl(DeviceList[0].get_platform())->getHandleRef(); + if (std::any_of(DeviceList.begin(), DeviceList.end(), + [&](const device &CurrentDevice) { + return (detail::getSyclObjImpl(CurrentDevice.get_platform()) + ->getHandleRef() != RefPlatform); + })) + throw invalid_parameter_error( + "Can't add devices across platforms to a single context.", + PI_ERROR_INVALID_DEVICE); + else + impl = std::make_shared(DeviceList, AsyncHandler, PropList); - else { - const device &NonHostDevice = *NonHostDeviceIter; - const auto &NonHostPlatform = - detail::getSyclObjImpl(NonHostDevice.get_platform())->getHandleRef(); - if (std::any_of(DeviceList.begin(), DeviceList.end(), - [&](const device &CurrentDevice) { - return ( - detail::getSyclObjImpl(CurrentDevice)->is_host() || - (detail::getSyclObjImpl(CurrentDevice.get_platform()) - ->getHandleRef() != NonHostPlatform)); - })) - throw invalid_parameter_error( - "Can't add devices across platforms to a single context.", - PI_ERROR_INVALID_DEVICE); - else - impl = std::make_shared(DeviceList, AsyncHandler, - PropList); - } } context::context(cl_context ClContext, async_handler AsyncHandler) { const auto &Plugin = sycl::detail::pi::getPlugin(); diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index e187be3563f5b..28bb37200392a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -136,13 +136,6 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event, MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true), MState(HES_Complete) { - if (MContext->is_host()) { - throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), - "The syclContext must match the OpenCL context " - "associated with the clEvent. " + - codeToString(PI_ERROR_INVALID_CONTEXT)); - } - sycl::detail::pi::PiContext TempContext; getPlugin()->call( MEvent, PI_EVENT_INFO_CONTEXT, sizeof(sycl::detail::pi::PiContext), @@ -162,19 +155,8 @@ event_impl::event_impl(const QueueImplPtr &Queue) { void event_impl::associateWithQueue(const QueueImplPtr &Queue) { MQueue = Queue; - MIsProfilingEnabled = Queue->is_host() || Queue->MIsProfilingEnabled; + MIsProfilingEnabled = Queue->MIsProfilingEnabled; MFallbackProfiling = MIsProfilingEnabled && Queue->isProfilingFallback(); - if (Queue->is_host()) { - MState.store(HES_NotComplete); - if (Queue->has_property()) { - MHostProfilingInfo.reset(new HostProfilingInfo()); - if (!MHostProfilingInfo) - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Out of host memory " + - codeToString(PI_ERROR_OUT_OF_HOST_MEMORY)); - } - return; - } MState.store(HES_Complete); } @@ -578,6 +560,13 @@ bool event_impl::isCompleted() { info::event_command_status::complete; } +void event_impl::setCommand(void *Cmd) { + MCommand = Cmd; + auto TypedCommand = static_cast(Cmd); + if (TypedCommand) + MIsHostTask = TypedCommand->isHostTask(); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 08bb15cff6ff8..7c1eb99e3b286 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -68,14 +68,6 @@ class event_impl { event_impl(sycl::detail::pi::PiEvent Event, const context &SyclContext); event_impl(const QueueImplPtr &Queue); - /// Checks if this event is a SYCL host event. - /// - /// All devices that do not support OpenCL interoperability are treated as - /// host device to avoid attempts to call method get on such events. - // - /// \return true if this event is a SYCL host event. - bool is_host(); - /// Waits for the event. /// /// Self is needed in order to pass shared_ptr to Scheduler. @@ -177,7 +169,7 @@ class event_impl { /// Scheduler mutex must be locked in write mode when this is called. /// /// @param Command is a generic pointer to Command object instance. - void setCommand(void *Command) { MCommand = Command; } + void setCommand(void *Command); /// Returns host profiling information. /// @@ -345,6 +337,8 @@ class event_impl { void setEnqueued() { MIsEnqueued = true; } + bool isHost() { return MIsHostTask; } + protected: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait @@ -412,6 +406,7 @@ class event_impl { std::shared_ptr Context); std::atomic_bool MIsEnqueued{false}; + bool MIsHostTask{false}; }; } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index bf7e44062cb5e..0739ac77373b7 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -96,9 +96,7 @@ static std::string demangleKernelName(std::string Name) { return Name; } #endif static std::string deviceToString(device Device) { - if (getSyclObjImpl(Device)->is_host()) - return "HOST"; - else if (Device.is_cpu()) + if (Device.is_cpu()) return "CPU"; else if (Device.is_gpu()) return "GPU"; @@ -144,10 +142,7 @@ void applyFuncOnFilteredArgs( #ifdef XPTI_ENABLE_INSTRUMENTATION static size_t deviceToID(const device &Device) { - if (getSyclObjImpl(Device)->is_host()) - return 0; - else - return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); + return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); } #endif @@ -265,7 +260,7 @@ std::vector Command::getPiEventsBlocking( // (which is set lazily) calling getContextImpl() would set that // context, which we wish to avoid as it is expensive. // Skip host task and NOP events also. - if (!EventImpl->isContextInitialized() || EventImpl->is_host() || + if (!EventImpl->isContextInitialized() || EventImpl->isHost() || EventImpl->isNOP()) continue; // In this path nullptr native event means that the command has not been @@ -455,40 +450,9 @@ void Command::waitForEvents(QueueImplPtr Queue, std::vector &EventImpls, sycl::detail::pi::PiEvent &Event) { if (!EventImpls.empty()) { - if (Queue->is_host()) { - // Host queue can wait for events from different contexts, i.e. it may - // contain events with different contexts in its MPreparedDepsEvents. - // OpenCL 2.1 spec says that clWaitForEvents will return - // CL_INVALID_CONTEXT if events specified in the list do not belong to - // the same context. Thus we split all the events into per-context map. - // An example. We have two queues for the same CPU device: Q1, Q2. Thus - // we will have two different contexts for the same CPU device: C1, C2. - // Also we have default host queue. This queue is accessible via - // Scheduler. Now, let's assume we have three different events: E1(C1), - // E2(C1), E3(C2). The command's MPreparedDepsEvents will contain all - // three events (E1, E2, E3). Now, if piEventsWait is called for all - // three events we'll experience failure with CL_INVALID_CONTEXT 'cause - // these events refer to different contexts. - std::map> - RequiredEventsPerContext; - - for (const EventImplPtr &Event : EventImpls) { - ContextImplPtr Context = Event->getContextImpl(); - assert(Context.get() && - "Only non-host events are expected to be waited for here"); - RequiredEventsPerContext[Context.get()].push_back(Event); - } - - for (auto &CtxWithEvents : RequiredEventsPerContext) { - std::vector RawEvents = - getPiEvents(CtxWithEvents.second); - CtxWithEvents.first->getPlugin()->call( - RawEvents.size(), RawEvents.data()); - } - } else { #ifndef NDEBUG for (const EventImplPtr &Event : EventImpls) - assert(Event->getContextImpl().get() && + assert(!Event->isHost() && "Only non-host events are expected to be waited for here"); #endif @@ -501,7 +465,6 @@ void Command::waitForEvents(QueueImplPtr Queue, MEvent->setHostEnqueueTime(); Plugin->call( Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event); - } } } @@ -714,7 +677,7 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, // enqueued // (e.g. alloca). Note that we can't check the pi event to make that // distinction since the command might still be unenqueued at this point. - bool PiEventExpected = (!DepEvent->is_host() && DepEvent->isInitialized()); + bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized()); if (auto *DepCmd = static_cast(DepEvent->getCommand())) PiEventExpected &= DepCmd->producesPiEvent(); @@ -885,7 +848,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, else { MEvent->setEnqueued(); if (MShouldCompleteEventIfPossible && - (MEvent->is_host() || MEvent->getHandleRef() == nullptr)) + (MEvent->isHost() || MEvent->getHandleRef() == nullptr)) MEvent->setComplete(); // Consider the command is successfully enqueued if return code is @@ -3172,8 +3135,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { std::vector Events = Barrier->MEventsWaitWithBarrier; std::vector PiEvents = getPiEventsBlocking(Events); - if (MQueue->getDeviceImplPtr()->is_host() || PiEvents.empty()) { - // NOP for host device. + if (PiEvents.empty()) { // If Events is empty, then the barrier has no effect. return PI_SUCCESS; } @@ -3244,10 +3206,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { } case CG::CGTYPE::SemaphoreWait: { CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get(); - if (MQueue->getDeviceImplPtr()->is_host()) { - // NOP for host device. - return PI_SUCCESS; - } const detail::PluginPtr &Plugin = MQueue->getPlugin(); Plugin->call( @@ -3258,10 +3216,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { } case CG::CGTYPE::SemaphoreSignal: { CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get(); - if (MQueue->getDeviceImplPtr()->is_host()) { - // NOP for host device. - return PI_SUCCESS; - } const detail::PluginPtr &Plugin = MQueue->getPlugin(); Plugin->call( diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8ba0cceee9e6a..89cabd134a7e1 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -377,10 +377,9 @@ class Command { std::string MSubmissionFileName; std::string MSubmissionFunctionName; - // This flag allows to control whether host event should be set complete - // after successfull enqueue of command. Event is considered as host event if - // either it's is_host() return true or there is no backend representation - // of event (i.e. getHandleRef() return reference to nullptr value). + // This flag allows to control whether event should be set complete + // after successfull enqueue of command. Event is considered as "host" event if + // there is no backend representation of event (i.e. getHandleRef() return reference to nullptr value). // By default the flag is set to true due to most of host operations are // synchronous. The only asynchronous operation currently is host-task. bool MShouldCompleteEventIfPossible = true; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index f0c5dc670aa05..196232b95d734 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -50,9 +50,7 @@ static bool doOverlap(const Requirement *LHS, const Requirement *RHS) { } static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS) { - // Consider two different host contexts to be the same to avoid additional - // allocation on the host - return LHS == RHS || (LHS->is_host() && RHS->is_host()); + return LHS == RHS; } /// Checks if current requirement is requirement for sub buffer. diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 7b6c837131658..0b061a86dbc62 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -105,14 +105,6 @@ EventImplPtr Scheduler::addCG( auto *CGExecKernelPtr = static_cast(CommandGroup.get()); Streams = CGExecKernelPtr->getStreams(); CGExecKernelPtr->clearStreams(); - // Stream's flush buffer memory is mainly initialized in stream's __init - // method. However, this method is not available on host device. - // Initializing stream's flush buffer on the host side in a separate task. - if (Queue->is_host()) { - for (const StreamImplPtr &Stream : Streams) { - Stream->initStreamHost(Queue); - } - } } std::vector> AuxiliaryResources; AuxiliaryResources = CommandGroup->getAuxiliaryResources(); @@ -394,18 +386,6 @@ void Scheduler::enqueueUnblockedCommands( } } -Scheduler::Scheduler() { - sycl::device HostDevice = - createSyclObjFromImpl(device_impl::getHostDeviceImpl()); - sycl::context HostContext{HostDevice}; - DefaultHostQueue = QueueImplPtr( - new queue_impl(detail::getSyclObjImpl(HostDevice), - detail::getSyclObjImpl(HostContext), /*AsyncHandler=*/{}, - /*PropList=*/{sycl::property::queue::enable_profiling()})); -} - -Scheduler::~Scheduler() { DefaultHostQueue.reset(); } - void Scheduler::releaseResources(BlockingT Blocking) { // There might be some commands scheduled for post enqueue cleanup that // haven't been freed because of the graph mutex being locked at the time, @@ -726,11 +706,11 @@ bool CheckEventReadiness(const ContextImplPtr &Context, // their context, which we wish to avoid as it is expensive. // NOP events also don't represent actual dependencies. if ((!SyclEventImplPtr->isContextInitialized() && - !SyclEventImplPtr->is_host()) || + !SyclEventImplPtr->isHost()) || SyclEventImplPtr->isNOP()) { return true; } - if (SyclEventImplPtr->is_host()) { + if (SyclEventImplPtr->isHost()) { return SyclEventImplPtr->isCompleted(); } // Cross-context dependencies can't be passed to the backend directly. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 09437928f1d32..6fa95cb4a4a54 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -450,10 +450,6 @@ class Scheduler { /// \return true if an instance of the scheduler object exists. static bool isInstanceAlive(); - QueueImplPtr getDefaultHostQueue() { return DefaultHostQueue; } - - const QueueImplPtr &getDefaultHostQueue() const { return DefaultHostQueue; } - static MemObjRecord *getMemObjRecord(const Requirement *const Req); void deferMemObjRelease(const std::shared_ptr &MemObj); @@ -468,8 +464,6 @@ class Scheduler { bool isInFusionMode(QueueIdT Queue); - Scheduler(); - ~Scheduler(); void releaseResources(BlockingT Blocking = BlockingT::BLOCKING); bool isDeferredMemObjectsEmpty(); @@ -966,8 +960,6 @@ class Scheduler { MAuxiliaryResources; std::mutex MAuxiliaryResourcesMutex; - QueueImplPtr DefaultHostQueue; - friend class Command; friend class DispatchHostTask; friend class queue_impl; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 8223c9330814e..749ab6750df5e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -273,12 +273,6 @@ event handler::finalize() { detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); #endif - if (MQueue->is_host()) { - MHostKernel->call(MNDRDesc, (NewEvent) - ? NewEvent->getHostProfilingInfo() - : nullptr); - Result = PI_SUCCESS; - } else { if (MQueue->getDeviceImplPtr()->getBackend() == backend::ext_intel_esimd_emulator) { // Capture the host timestamp for profiling (queue time) @@ -313,7 +307,6 @@ event handler::finalize() { MKernelName.c_str(), RawEvents, NewEvent, nullptr, MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative); } - } #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit signal only when event is created if (NewEvent != nullptr) { @@ -351,7 +344,7 @@ event handler::finalize() { if (PI_SUCCESS != EnqueueKernel()) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); - else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) + else if (NewEvent->isHost() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); NewEvent->setEnqueued(); From 31a702c1c2ec81aa2430595230761edc75d52dce Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 23 May 2024 06:33:00 -0700 Subject: [PATCH 08/52] not-buildable: remove is_host from obvious places, part2 Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/backend_impl.hpp | 1 - sycl/source/detail/bindless_images.cpp | 4 - sycl/source/detail/context_impl.cpp | 10 --- sycl/source/detail/device_impl.hpp | 6 +- sycl/source/detail/filter_selector_impl.cpp | 3 - sycl/source/detail/helpers.cpp | 4 +- sycl/source/detail/kernel_impl.cpp | 4 +- sycl/source/detail/kernel_impl.hpp | 22 ------ sycl/source/detail/kernel_info.hpp | 73 ------------------- sycl/source/detail/platform_impl.cpp | 17 +---- sycl/source/detail/platform_impl.hpp | 12 --- sycl/source/detail/platform_info.hpp | 30 -------- sycl/source/detail/program_impl.cpp | 46 +++--------- sycl/source/detail/program_impl.hpp | 6 -- sycl/source/detail/queue_impl.cpp | 35 ++++----- sycl/source/detail/queue_impl.hpp | 61 +++++----------- sycl/source/detail/scheduler/commands.cpp | 20 +---- .../source/detail/scheduler/graph_builder.cpp | 2 +- 18 files changed, 56 insertions(+), 300 deletions(-) diff --git a/sycl/source/detail/backend_impl.hpp b/sycl/source/detail/backend_impl.hpp index ca23ceb48815c..0c160ed1920c4 100644 --- a/sycl/source/detail/backend_impl.hpp +++ b/sycl/source/detail/backend_impl.hpp @@ -15,7 +15,6 @@ inline namespace _V1 { namespace detail { template backend getImplBackend(const T &Impl) { - assert(!Impl->is_host() && "Cannot get the backend for host."); return Impl->getContextImplPtr()->getBackend(); } diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp index 174fe087ede4f..fbf90e692598e 100644 --- a/sycl/source/detail/bindless_images.cpp +++ b/sycl/source/detail/bindless_images.cpp @@ -746,10 +746,6 @@ __SYCL_EXPORT void *pitched_alloc_device(size_t *resultPitch, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - if (CtxImpl->is_host()) { - throw sycl::exception(sycl::make_error_code(sycl::errc::memory_allocation), - "Cannot allocate pitched memory on host!"); - } pi_context PiContext = CtxImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 87663c4e10775..0c79ed2f70462 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -162,8 +162,6 @@ const async_handler &context_impl::get_async_handler() const { template <> uint32_t context_impl::get_info() const { - if (is_host()) - return 0; return get_context_info(this->getHandleRef(), this->getPlugin()); } @@ -183,8 +181,6 @@ context_impl::get_info() sycl::memory_order::relaxed, sycl::memory_order::acquire, sycl::memory_order::release, sycl::memory_order::acq_rel, sycl::memory_order::seq_cst}; - if (is_host()) - return CapabilityList; GetCapabilitiesIntersectionSet< sycl::memory_order, info::device::atomic_memory_order_capabilities>( @@ -200,8 +196,6 @@ context_impl::get_info() sycl::memory_scope::work_item, sycl::memory_scope::sub_group, sycl::memory_scope::work_group, sycl::memory_scope::device, sycl::memory_scope::system}; - if (is_host()) - return CapabilityList; GetCapabilitiesIntersectionSet< sycl::memory_scope, info::device::atomic_memory_scope_capabilities>( @@ -216,8 +210,6 @@ context_impl::get_info() const { sycl::memory_order::relaxed, sycl::memory_order::acquire, sycl::memory_order::release, sycl::memory_order::acq_rel, sycl::memory_order::seq_cst}; - if (is_host()) - return CapabilityList; GetCapabilitiesIntersectionSet( @@ -232,8 +224,6 @@ context_impl::get_info() const { sycl::memory_scope::work_item, sycl::memory_scope::sub_group, sycl::memory_scope::work_group, sycl::memory_scope::device, sycl::memory_scope::system}; - if (is_host()) - return CapabilityList; GetCapabilitiesIntersectionSet( diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 2526647152892..efec017d372f5 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -80,18 +80,18 @@ class device_impl { /// Check if device is a CPU device /// /// \return true if SYCL device is a CPU device - bool is_cpu() const { return (!is_host() && (MType == PI_DEVICE_TYPE_CPU)); } + bool is_cpu() const { return MType == PI_DEVICE_TYPE_CPU; } /// Check if device is a GPU device /// /// \return true if SYCL device is a GPU device - bool is_gpu() const { return (!is_host() && (MType == PI_DEVICE_TYPE_GPU)); } + bool is_gpu() const { return MType == PI_DEVICE_TYPE_GPU; } /// Check if device is an accelerator device /// /// \return true if SYCL device is an accelerator device bool is_accelerator() const { - return (!is_host() && (MType == PI_DEVICE_TYPE_ACC)); + return MType == PI_DEVICE_TYPE_ACC; } /// Return device type diff --git a/sycl/source/detail/filter_selector_impl.cpp b/sycl/source/detail/filter_selector_impl.cpp index 4b5f8e836ee6d..0043622d62483 100644 --- a/sycl/source/detail/filter_selector_impl.cpp +++ b/sycl/source/detail/filter_selector_impl.cpp @@ -99,9 +99,6 @@ filter_selector_impl::filter_selector_impl(const std::string &Input) } int filter_selector_impl::operator()(const device &Dev) const { - assert(!sycl::detail::getSyclObjImpl(Dev)->is_host() && - "filter_selector_impl should not be used with host."); - int Score = REJECT_DEVICE_SCORE; for (auto &Filter : mFilters) { diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 1bdb2ddbd4697..75c6fd72b8fd0 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -32,7 +32,7 @@ getOrWaitEvents(std::vector DepEvents, ContextImplPtr Context) { // (which is set lazily) calling getContextImpl() would set that // context, which we wish to avoid as it is expensive. if ((!SyclEventImplPtr->isContextInitialized() && - !SyclEventImplPtr->is_host()) || + !SyclEventImplPtr->isHost()) || SyclEventImplPtr->isNOP()) { continue; } @@ -41,7 +41,7 @@ getOrWaitEvents(std::vector DepEvents, ContextImplPtr Context) { bool NoPiEvent = SyclEventImplPtr->MCommand && !static_cast(SyclEventImplPtr->MCommand)->producesPiEvent(); - if (SyclEventImplPtr->is_host() || + if (SyclEventImplPtr->isHost() || SyclEventImplPtr->getContextImpl() != Context || NoPiEvent) { // Call wait, because the command for the event might not have been // enqueued when kernel fusion is happening. diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 9c5a1851cd3b1..b4ab6b232eef9 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -76,9 +76,7 @@ kernel_impl::kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl) kernel_impl::~kernel_impl() { // TODO catch an exception and put it to list of asynchronous exceptions - if (!is_host()) { - getPlugin()->call(MKernel); - } + getPlugin()->call(MKernel); } bool kernel_impl::isCreatedFromSource() const { diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 1e56e6da4dc53..1a1542d0d409b 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -103,20 +103,10 @@ class kernel_impl { /// /// \return a valid cl_kernel instance cl_kernel get() const { - if (is_host()) { - throw invalid_object_error( - "This instance of kernel doesn't support OpenCL interoperability.", - PI_ERROR_INVALID_KERNEL); - } getPlugin()->call(MKernel); return pi::cast(MKernel); } - /// Check if the associated SYCL context is a SYCL host context. - /// - /// \return true if this SYCL kernel is a host kernel. - bool is_host() const { return MContext->is_host(); } - const PluginPtr &getPlugin() const { return MContext->getPlugin(); } /// Query information from the kernel object using the info::kernel_info @@ -217,11 +207,6 @@ template inline typename Param::return_type kernel_impl::get_info() const { static_assert(is_kernel_info_desc::value, "Invalid kernel information descriptor"); - if (is_host()) { - // TODO implement - assert(0 && "Not implemented"); - } - if constexpr (std::is_same_v) checkIfValidForNumArgsInfoQuery(); @@ -248,9 +233,6 @@ kernel_impl::get_info(const device &Device) const { "is a built-in kernel."); } - if (is_host()) { - return get_kernel_device_specific_info_host(Device); - } return get_kernel_device_specific_info( this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), getPlugin()); @@ -260,10 +242,6 @@ template inline typename Param::return_type kernel_impl::get_info(const device &Device, const sycl::range<3> &WGSize) const { - if (is_host()) { - throw runtime_error("Sub-group feature is not supported on HOST device.", - PI_ERROR_INVALID_DEVICE); - } return get_kernel_device_specific_info_with_input( this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), WGSize, getPlugin()); diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index 12256158eed49..79c0f73c952de 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -137,79 +137,6 @@ uint32_t get_kernel_device_specific_info_with_input( return Result; } -template -inline typename Param::return_type -get_kernel_device_specific_info_host(const sycl::device &Device) = delete; - -template <> -inline sycl::range<3> get_kernel_device_specific_info_host< - info::kernel_device_specific::global_work_size>(const sycl::device &) { - throw invalid_object_error("This instance of kernel is a host instance", - PI_ERROR_INVALID_KERNEL); -} - -template <> -inline size_t get_kernel_device_specific_info_host< - info::kernel_device_specific::work_group_size>(const sycl::device &Dev) { - return Dev.get_info(); -} - -template <> -inline sycl::range<3> get_kernel_device_specific_info_host< - info::kernel_device_specific::compile_work_group_size>( - const sycl::device &) { - return {0, 0, 0}; -} - -template <> -inline size_t get_kernel_device_specific_info_host< - info::kernel_device_specific::preferred_work_group_size_multiple>( - const sycl::device &Dev) { - return get_kernel_device_specific_info_host< - info::kernel_device_specific::work_group_size>(Dev); -} - -template <> -inline size_t get_kernel_device_specific_info_host< - info::kernel_device_specific::private_mem_size>(const sycl::device &) { - return 0; -} - -template <> -inline uint32_t get_kernel_device_specific_info_host< - info::kernel_device_specific::ext_codeplay_num_regs>(const sycl::device &) { - return 0; -} - -template <> -inline uint32_t get_kernel_device_specific_info_host< - info::kernel_device_specific::max_num_sub_groups>(const sycl::device &) { - throw invalid_object_error("This instance of kernel is a host instance", - PI_ERROR_INVALID_KERNEL); -} - -template <> -inline uint32_t get_kernel_device_specific_info_host< - info::kernel_device_specific::max_sub_group_size>(const sycl::device &) { - throw invalid_object_error("This instance of kernel is a host instance", - PI_ERROR_INVALID_KERNEL); -} - -template <> -inline uint32_t get_kernel_device_specific_info_host< - info::kernel_device_specific::compile_num_sub_groups>( - const sycl::device &) { - throw invalid_object_error("This instance of kernel is a host instance", - PI_ERROR_INVALID_KERNEL); -} - -template <> -inline uint32_t get_kernel_device_specific_info_host< - info::kernel_device_specific::compile_sub_group_size>( - const sycl::device &) { - throw invalid_object_error("This instance of kernel is a host instance", - PI_ERROR_INVALID_KERNEL); -} } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 9700fde466803..2caf958bb842b 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -79,9 +79,6 @@ static bool IsBannedPlatform(platform Platform) { // is disabled as well. // auto IsMatchingOpenCL = [](platform Platform, const std::string_view name) { - if (getSyclObjImpl(Platform)->is_host()) - return false; - const bool HasNameMatch = Platform.get_info().find( name) != std::string::npos; const auto Backend = detail::getSyclObjImpl(Platform)->getBackend(); @@ -466,15 +463,9 @@ platform_impl::get_devices(info::device_type DeviceType) const { ods_target_list *OdsTargetList = SYCLConfig::get(); - if (is_host() && (DeviceType == info::device_type::host || - DeviceType == info::device_type::all)) { - Res.push_back( - createSyclObjFromImpl(device_impl::getHostDeviceImpl())); - } - // If any DeviceType other than host was requested for host platform, // an empty vector will be returned. - if (is_host() || DeviceType == info::device_type::host) + if (DeviceType == info::device_type::host) return Res; pi_uint32 NumDevices = 0; @@ -556,9 +547,6 @@ platform_impl::get_devices(info::device_type DeviceType) const { } bool platform_impl::has_extension(const std::string &ExtensionName) const { - if (is_host()) - return false; - std::string AllExtensionNames = get_platform_info_string_impl( MPlatform, getPlugin(), detail::PiInfoCode::value); @@ -580,9 +568,6 @@ pi_native_handle platform_impl::getNative() const { template typename Param::return_type platform_impl::get_info() const { - if (is_host()) - return get_platform_info_host(); - return get_platform_info(this->getHandleRef(), getPlugin()); } diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index 0bb8d1ab77e2f..e13bd0a3a1b31 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -89,9 +89,6 @@ class platform_impl { template typename Param::return_type get_backend_info() const; - /// \return true if this SYCL platform is a host platform. - bool is_host() const { return MHostPlatform; }; - /// Returns the backend of this platform. backend getBackend(void) const { return MBackend; } @@ -107,11 +104,6 @@ class platform_impl { /// \return an instance of OpenCL cl_platform_id. cl_platform_id get() const { - if (is_host()) { - throw invalid_object_error( - "This instance of platform doesn't support OpenCL interoperability.", - PI_ERROR_INVALID_PLATFORM); - } return pi::cast(MPlatform); } @@ -123,10 +115,6 @@ class platform_impl { /// /// \return a raw plug-in platform handle. const sycl::detail::pi::PiPlatform &getHandleRef() const { - if (is_host()) - throw invalid_object_error("This instance of platform is a host instance", - PI_ERROR_INVALID_PLATFORM); - return MPlatform; } diff --git a/sycl/source/detail/platform_info.hpp b/sycl/source/detail/platform_info.hpp index 42c41b5063cf5..70bcd626024d9 100644 --- a/sycl/source/detail/platform_info.hpp +++ b/sycl/source/detail/platform_info.hpp @@ -59,36 +59,6 @@ get_platform_info(sycl::detail::pi::PiPlatform Plt, const PluginPtr &Plugin) { return split_string(Result, ' '); } -// Host platform information methods -template -inline typename Param::return_type get_platform_info_host() = delete; - -template <> -inline std::string get_platform_info_host() { - return "FULL PROFILE"; -} - -template <> -inline std::string get_platform_info_host() { - return "1.2"; -} - -template <> inline std::string get_platform_info_host() { - return "SYCL host platform"; -} - -template <> -inline std::string get_platform_info_host() { - return ""; -} - -template <> -inline std::vector -get_platform_info_host() { - // TODO update when appropriate - return {}; -} - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index d65f3163b961f..584b2487f5dee 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -72,9 +72,8 @@ program_impl::program_impl( } MDevices = ProgramList[0]->MDevices; std::vector DevicesSorted; - if (!is_host()) { - DevicesSorted = sort_devices_by_cl_device_id(MDevices); - } + DevicesSorted = sort_devices_by_cl_device_id(MDevices); + check_device_feature_support(MDevices); std::list> Locks; for (const auto &Prg : ProgramList) { @@ -85,18 +84,16 @@ program_impl::program_impl( "Not all programs are associated with the same context", PI_ERROR_INVALID_PROGRAM); } - if (!is_host()) { - std::vector PrgDevicesSorted = - sort_devices_by_cl_device_id(Prg->MDevices); - if (PrgDevicesSorted != DevicesSorted) { - throw invalid_object_error( - "Not all programs are associated with the same devices", - PI_ERROR_INVALID_PROGRAM); - } + + std::vector PrgDevicesSorted = + sort_devices_by_cl_device_id(Prg->MDevices); + if (PrgDevicesSorted != DevicesSorted) { + throw invalid_object_error( + "Not all programs are associated with the same devices", + PI_ERROR_INVALID_PROGRAM); } } - if (!is_host()) { std::vector Devices(get_pi_devices()); std::vector Programs; bool NonInterOpToLink = false; @@ -113,7 +110,6 @@ program_impl::program_impl( LinkOptions.c_str(), Programs.size(), Programs.data(), nullptr, nullptr, &MProgram); Plugin->checkPiResult(Err); - } } program_impl::program_impl(ContextImplPtr Context, @@ -208,7 +204,7 @@ program_impl::program_impl(ContextImplPtr Context, program_impl::~program_impl() { // TODO catch an exception and put it to list of asynchronous exceptions - if (!is_host() && MProgram != nullptr) { + if (MProgram != nullptr) { const PluginPtr &Plugin = getPlugin(); Plugin->call(MProgram); } @@ -216,11 +212,6 @@ program_impl::~program_impl() { cl_program program_impl::get() const { throw_if_state_is(program_state::none); - if (is_host()) { - throw invalid_object_error( - "This instance of program doesn't support OpenCL interoperability.", - PI_ERROR_INVALID_PROGRAM); - } getPlugin()->call(MProgram); return pi::cast(MProgram); } @@ -229,19 +220,16 @@ void program_impl::compile_with_kernel_name(std::string KernelName, std::string CompileOptions) { std::lock_guard Lock(MMutex); throw_if_state_is_not(program_state::none); - if (!is_host()) { create_pi_program_with_kernel_name( KernelName, /*JITCompilationIsRequired=*/(!CompileOptions.empty())); compile(CompileOptions); - } MState = program_state::compiled; } void program_impl::link(std::string LinkOptions) { std::lock_guard Lock(MMutex); throw_if_state_is_not(program_state::compiled); - if (!is_host()) { check_device_feature_support(MDevices); std::vector Devices(get_pi_devices()); const PluginPtr &Plugin = getPlugin(); @@ -263,16 +251,12 @@ void program_impl::link(std::string LinkOptions) { Plugin->checkPiResult(Err); MLinkOptions = LinkOptions; MBuildOptions = LinkOptions; - } MState = program_state::linked; } bool program_impl::has_kernel(std::string KernelName, bool IsCreatedFromSource) const { throw_if_state_is(program_state::none); - if (is_host()) { - return !IsCreatedFromSource; - } std::vector Devices(get_pi_devices()); pi_uint64 function_ptr; @@ -299,14 +283,6 @@ kernel program_impl::get_kernel(std::string KernelName, std::shared_ptr PtrToSelf, bool IsCreatedFromSource) const { throw_if_state_is(program_state::none); - if (is_host()) { - if (IsCreatedFromSource) - throw invalid_object_error("This instance of program is a host instance", - PI_ERROR_INVALID_PROGRAM); - - return createSyclObjFromImpl( - std::make_shared(MContext, PtrToSelf)); - } auto [Kernel, ArgMask] = get_pi_kernel_arg_mask_pair(KernelName); return createSyclObjFromImpl(std::make_shared( Kernel, MContext, PtrToSelf, IsCreatedFromSource, nullptr, ArgMask)); @@ -314,8 +290,6 @@ kernel program_impl::get_kernel(std::string KernelName, std::vector> program_impl::get_binaries() const { throw_if_state_is(program_state::none); - if (is_host()) - return {}; std::vector> Result; const PluginPtr &Plugin = getPlugin(); diff --git a/sycl/source/detail/program_impl.hpp b/sycl/source/detail/program_impl.hpp index 32a0c7fd38bfe..1fa8767774961 100644 --- a/sycl/source/detail/program_impl.hpp +++ b/sycl/source/detail/program_impl.hpp @@ -134,9 +134,6 @@ class program_impl { /// not retained before return. const sycl::detail::pi::PiProgram &getHandleRef() const { return MProgram; } - /// \return true if this SYCL program is a host program. - bool is_host() const { return MContext->is_host(); } - /// Compiles the SYCL kernel function into the encapsulated raw program. /// /// The kernel function is defined by its name. This member function @@ -215,14 +212,11 @@ class program_impl { /// \return the SYCL context that this program was constructed with. context get_context() const { - if (is_host()) - return context(); return createSyclObjFromImpl(MContext); } /// \return the Plugin associated with the context of this program. const PluginPtr &getPlugin() const { - assert(!is_host() && "Plugin is not available for Host."); return MContext->getPlugin(); } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 05c579f78a405..2c7876ea14c08 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -42,10 +42,9 @@ getPIEvents(const std::vector &DepEvents) { template <> uint32_t queue_impl::get_info() const { sycl::detail::pi::PiResult result = PI_SUCCESS; - if (!is_host()) - getPlugin()->call( - MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, - nullptr); + getPlugin()->call( + MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, + nullptr); return result; } @@ -142,8 +141,7 @@ event queue_impl::memset(const std::shared_ptr &Self, SYCL_STREAM_NAME, "memory_transfer_node"); PrepareNotify.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast( - MDevice->is_host() ? 0 : MDevice->getHandleRef())); + reinterpret_cast(MDevice->getHandleRef())); xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr)); xpti::addMetadata(TEvent, "value_set", Value); xpti::addMetadata(TEvent, "memory_size", Count); @@ -190,8 +188,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, SYCL_STREAM_NAME, "memory_transfer_node"); PrepareNotify.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast( - MDevice->is_host() ? 0 : MDevice->getHandleRef())); + reinterpret_cast(MDevice->getHandleRef())); xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast(Src)); xpti::addMetadata(TEvent, "dest_memory_ptr", reinterpret_cast(Dest)); @@ -430,9 +427,7 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, if (WaitEvent) { device D = get_device(); std::string DevStr; - if (getSyclObjImpl(D)->is_host()) - DevStr = "HOST"; - else if (D.is_cpu()) + if (D.is_cpu()) DevStr = "CPU"; else if (D.is_gpu()) DevStr = "GPU"; @@ -588,14 +583,12 @@ bool queue_impl::ext_oneapi_empty() const { } // Check the status of the backend queue if this is not a host queue. - if (!is_host()) { - pi_bool IsReady = false; - getPlugin()->call( - MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady, - nullptr); - if (!IsReady) - return false; - } + pi_bool IsReady = false; + getPlugin()->call( + MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady, + nullptr); + if (!IsReady) + return false; // We may have events like host tasks which are not submitted to the backend // queue so we need to get their status separately. @@ -609,7 +602,7 @@ bool queue_impl::ext_oneapi_empty() const { EventImplWeakPtrIt != MEventsWeak.end(); ++EventImplWeakPtrIt) if (std::shared_ptr EventImplSharedPtr = EventImplWeakPtrIt->lock()) - if (EventImplSharedPtr->is_host() && + if (EventImplSharedPtr->isHost() && EventImplSharedPtr ->get_info() != info::event_command_status::complete) @@ -641,7 +634,7 @@ void queue_impl::revisitUnenqueuedCommandsState( std::remove_if( Deps.UnenqueuedCmdEvents.begin(), Deps.UnenqueuedCmdEvents.end(), [](const EventImplPtr &CommandEvent) { - return (CommandEvent->is_host() ? CommandEvent->isCompleted() + return (CommandEvent->isHost() ? CommandEvent->isCompleted() : CommandEvent->isEnqueued()); }), Deps.UnenqueuedCmdEvents.end()); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index dff24ad1dfec1..c205b5916f302 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -106,13 +106,12 @@ class queue_impl { queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), - MPropList(PropList), MHostQueue(MDevice->is_host()), + MPropList(PropList), MIsInorder(has_property()), MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), + MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { if (has_property()) { @@ -124,8 +123,7 @@ class queue_impl { if (MDevice->has(aspect::queue_profiling)) { // When piGetDeviceAndHostTimer is not supported, compute the // profiling time OpenCL version < 2.1 case - if (!getDeviceImplPtr()->is_host() && - !getDeviceImplPtr()->isGetDeviceAndHostTimerSupported()) + if (!getDeviceImplPtr()->isGetDeviceAndHostTimerSupported()) MFallbackProfiling = true; } else { throw sycl::exception(make_error_code(errc::feature_not_supported), @@ -154,7 +152,7 @@ class queue_impl { "Cannot enable fusion if device does not support fusion"); } if (!Context->isDeviceValid(Device)) { - if (!Context->is_host() && Context->getBackend() == backend::opencl) + if (Context->getBackend() == backend::opencl) throw sycl::invalid_object_error( "Queue cannot be constructed with the given context and device " "since the device is not a member of the context (descendants of " @@ -166,13 +164,12 @@ class queue_impl { "descendant of its member.", PI_ERROR_INVALID_DEVICE); } - if (!MHostQueue) { - const QueueOrder QOrder = - MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO; - MQueues.push_back(createQueue(QOrder)); - // This section is the second part of the instrumentation that uses the - // tracepoint information and notifies - } + + const QueueOrder QOrder = + MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO; + MQueues.push_back(createQueue(QOrder)); + // This section is the second part of the instrumentation that uses the + // tracepoint information and notifies // We enable XPTI tracing events using the TLS mechanism; if the code // location data is available, then the tracing data will be rich. @@ -198,13 +195,11 @@ class queue_impl { MDevice->getDeviceName()); xpti::addMetadata( TEvent, "sycl_device", - reinterpret_cast( - MDevice->is_host() ? 0 : MDevice->getHandleRef())); + reinterpret_cast(MDevice->getHandleRef())); } xpti::addMetadata(TEvent, "is_inorder", MIsInorder); xpti::addMetadata(TEvent, "queue_id", MQueueID); - if (!MHostQueue) - xpti::addMetadata(TEvent, "queue_handle", + xpti::addMetadata(TEvent, "queue_handle", reinterpret_cast(getHandleRef())); }); // Also publish to TLS @@ -263,13 +258,11 @@ class queue_impl { MDevice->getDeviceName()); xpti::addMetadata( TEvent, "sycl_device", - reinterpret_cast( - MDevice->is_host() ? 0 : MDevice->getHandleRef())); + reinterpret_cast(MDevice->getHandleRef())); } xpti::addMetadata(TEvent, "is_inorder", MIsInorder); xpti::addMetadata(TEvent, "queue_id", MQueueID); - if (!MHostQueue) - xpti::addMetadata(TEvent, "queue_handle", getHandleRef()); + xpti::addMetadata(TEvent, "queue_handle", getHandleRef()); }); // Also publish to TLS before notification xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); @@ -287,13 +280,12 @@ class queue_impl { /// \param AsyncHandler is a SYCL asynchronous exception handler. queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler) - : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false), + : MContext(Context), MAsyncHandler(AsyncHandler), MIsInorder(has_property()), MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), + MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { queue_impl_interop(PiQueue); @@ -309,13 +301,11 @@ class queue_impl { queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), - MHostQueue(false), MIsInorder(has_property()), MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)) { + MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)) { queue_impl_interop(PiQueue); } @@ -336,19 +326,12 @@ class queue_impl { } #endif throw_asynchronous(); - if (!MHostQueue) { - cleanup_fusion_cmd(); - getPlugin()->call(MQueues[0]); - } + cleanup_fusion_cmd(); + getPlugin()->call(MQueues[0]); } /// \return an OpenCL interoperability queue handle. cl_command_queue get() { - if (MHostQueue) { - throw invalid_object_error( - "This instance of queue doesn't support OpenCL interoperability", - PI_ERROR_INVALID_QUEUE); - } getPlugin()->call(MQueues[0]); return pi::cast(MQueues[0]); } @@ -367,9 +350,6 @@ class queue_impl { /// \return an associated SYCL device. device get_device() const { return createSyclObjFromImpl(MDevice); } - /// \return true if this queue is a SYCL host queue. - bool is_host() const { return MHostQueue; } - /// \return true if this queue has discard_events support. bool supportsDiscardingPiEvents() const { return MSupportsDiscardingPiEvents; @@ -859,7 +839,7 @@ class queue_impl { "function objects should use the sycl::handler API instead."); } - handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue); + handler Handler(Self, PrimaryQueue, SecondaryQueue); Handler.saveCodeLoc(Loc); PreventSubmit = true; try { @@ -969,7 +949,6 @@ class queue_impl { /// Iterator through MQueues. size_t MNextQueueIdx = 0; - const bool MHostQueue = false; /// Indicates that a native out-of-order queue could not be created and we /// need to emulate it with multiple native in-order queues. bool MEmulateOOO = false; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 0739ac77373b7..d6c41f39e9942 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2246,7 +2246,7 @@ void SetArgBasedOnType( const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, - const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg, + const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex) { switch (Arg.MType) { case kernel_param_kind_t::kind_stream: @@ -2300,13 +2300,6 @@ void SetArgBasedOnType( break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { - if (IsHost) { - throw sycl::exception( - sycl::make_error_code(sycl::errc::feature_not_supported), - "SYCL2020 specialization constants are not yet supported on host " - "device " + - codeToString(PI_ERROR_INVALID_OPERATION)); - } assert(DeviceImageImpl != nullptr); sycl::detail::pi::PiMem SpecConstsBuffer = DeviceImageImpl->get_spec_const_buffer_ref(); @@ -2343,7 +2336,7 @@ static pi_result SetKernelParamsAndLaunch( auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc, &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) { SetArgBasedOnType(Plugin, Kernel, DeviceImageImpl, getMemAllocationFunc, - Queue->get_context(), Queue->is_host(), Arg, + Queue->get_context(), Arg, NextTrueIndex); }; @@ -2940,8 +2933,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { NDRDescT &NDRDesc = ExecKernel->MNDRDesc; std::vector &Args = ExecKernel->MArgs; - if (MQueue->is_host() || (MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator)) { + if (MQueue->getDeviceImplPtr()->getBackend() == + backend::ext_intel_esimd_emulator) { for (ArgDesc &Arg : Args) if (kernel_param_kind_t::kind_accessor == Arg.MType) { Requirement *Req = (Requirement *)(Arg.MPtr); @@ -2954,10 +2947,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Plugin->call(RawEvents.size(), &RawEvents[0]); } - if (MQueue->is_host()) { - ExecKernel->MHostKernel->call(NDRDesc, - getEvent()->getHostProfilingInfo()); - } else { assert(MQueue->getDeviceImplPtr()->getBackend() == backend::ext_intel_esimd_emulator); if (MEvent != nullptr) @@ -2967,7 +2956,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { reinterpret_cast(ExecKernel->MHostKernel->getPtr()), NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], &NDRDesc.LocalSize[0], 0, nullptr, nullptr); - } return PI_SUCCESS; } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 196232b95d734..d1b57182d78ff 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -678,7 +678,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) { if (const char *HUMConfig = SYCLConfig::get()) { if (std::strcmp(HUMConfig, "0") == 0) - return Ctx->is_host(); + return false; if (std::strcmp(HUMConfig, "1") == 0) return true; } From fa08c2b3314604af314406fb73bcaf33e669f04a Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 27 May 2024 02:12:53 -0700 Subject: [PATCH 09/52] non-buildable: remove is_host from obvious places Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/context_impl.hpp | 7 +---- sycl/source/detail/device_impl.cpp | 8 ++--- sycl/source/detail/usm/usm_impl.cpp | 47 ----------------------------- 3 files changed, 3 insertions(+), 59 deletions(-) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index af20236fc4b23..203242ee40077 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -97,11 +97,6 @@ class context_impl { /// \return an instance of OpenCL cl_context. cl_context get() const; - /// Checks if this context is a host context. - /// - /// \return true if this context is a host context. - bool is_host() const; - /// Gets asynchronous exception handler. /// /// \return an instance of SYCL async_handler. @@ -182,7 +177,7 @@ class context_impl { // OpenCL does not support using descendants of context members within that // context yet. // TODO remove once this limitation is lifted - if (!is_host() && Device->getBackend() == backend::opencl) + if (Device->getBackend() == backend::opencl) return hasDevice(Device); while (!hasDevice(Device)) { diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 2e87300425c20..c677b9165d71f 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -327,8 +327,6 @@ bool device_impl::has(aspect Aspect) const { size_t return_size = 0; switch (Aspect) { - case aspect::host: - return is_host(); case aspect::cpu: return is_cpu(); case aspect::gpu: @@ -369,16 +367,14 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_intel_mem_channel: return get_info(); case aspect::usm_atomic_host_allocations: - return is_host() || - (get_device_info_impl:: get(MPlatform->getDeviceImpl(MDevice)) & PI_USM_CONCURRENT_ATOMIC_ACCESS); case aspect::usm_shared_allocations: return get_info(); case aspect::usm_atomic_shared_allocations: - return is_host() || - (get_device_info_impl:: get(MPlatform->getDeviceImpl(MDevice)) & PI_USM_CONCURRENT_ATOMIC_ACCESS); diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index ecf63bc63e427..753c27d5f678d 100755 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -73,20 +73,6 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, return nullptr; std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - if (CtxImpl->is_host()) { - if (!Alignment) { - // worst case default - Alignment = 128; - } - - aligned_allocator Alloc(Alignment); - try { - RetVal = Alloc.allocate(Size); - } catch (const std::bad_alloc &) { - // Conform with Specification behavior - RetVal = nullptr; - } - } else { pi_context C = CtxImpl->getHandleRef(); const PluginPtr &Plugin = CtxImpl->getPlugin(); pi_result Error = PI_ERROR_INVALID_VALUE; @@ -128,7 +114,6 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, // The spec wants a nullptr returned, not an exception. if (Error != PI_SUCCESS) return nullptr; - } #ifdef XPTI_ENABLE_INSTRUMENTATION xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr", reinterpret_cast(RetVal)); @@ -154,24 +139,6 @@ void *alignedAllocInternal(size_t Alignment, size_t Size, if (Size == 0) return nullptr; - if (CtxImpl->is_host()) { - if (Kind == alloc::unknown) { - RetVal = nullptr; - } else { - if (!Alignment) { - // worst case default - Alignment = 128; - } - - aligned_allocator Alloc(Alignment); - try { - RetVal = Alloc.allocate(Size); - } catch (const std::bad_alloc &) { - // Conform with Specification behavior - RetVal = nullptr; - } - } - } else { pi_context C = CtxImpl->getHandleRef(); const PluginPtr &Plugin = CtxImpl->getPlugin(); pi_result Error = PI_ERROR_INVALID_VALUE; @@ -245,7 +212,6 @@ void *alignedAllocInternal(size_t Alignment, size_t Size, // The spec wants a nullptr returned, not an exception. if (Error != PI_SUCCESS) return nullptr; - } return RetVal; } @@ -284,14 +250,9 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, void freeInternal(void *Ptr, const context_impl *CtxImpl) { if (Ptr == nullptr) return; - if (CtxImpl->is_host()) { - // need to use alignedFree here for Windows - detail::OSUtil::alignedFree(Ptr); - } else { pi_context C = CtxImpl->getHandleRef(); const PluginPtr &Plugin = CtxImpl->getPlugin(); Plugin->call(C, Ptr); - } } void free(void *Ptr, const context &Ctxt, @@ -578,10 +539,6 @@ alloc get_pointer_type(const void *Ptr, const context &Ctxt) { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - // Everything on a host device is just system malloc so call it host - if (CtxImpl->is_host()) - return alloc::host; - pi_context PICtx = CtxImpl->getHandleRef(); pi_usm_type AllocTy; @@ -631,10 +588,6 @@ device get_pointer_device(const void *Ptr, const context &Ctxt) { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - // Just return the host device in the host context - if (CtxImpl->is_host()) - return Ctxt.get_devices()[0]; - // Check if ptr is a host allocation if (get_pointer_type(Ptr, Ctxt) == alloc::host) { auto Devs = CtxImpl->getDevices(); From d021de9af53da859390f6519730dd363b9b2d4bb Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 27 May 2024 06:03:56 -0700 Subject: [PATCH 10/52] not-buildable: remove is_host in simple places Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/buffer_impl.cpp | 3 --- sycl/source/detail/memory_manager.cpp | 27 +-------------------------- sycl/source/detail/memory_manager.hpp | 4 ---- sycl/source/detail/queue_impl.cpp | 11 ++++------- sycl/source/detail/sycl_mem_obj_t.cpp | 23 ++--------------------- 5 files changed, 7 insertions(+), 61 deletions(-) diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 835c732a40bf9..d7d77205b162c 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -25,9 +25,6 @@ void *buffer_impl::allocateMem(ContextImplPtr Context, bool InitFromUserData, bool HostPtrReadOnly = false; BaseT::determineHostPtr(Context, InitFromUserData, HostPtr, HostPtrReadOnly); - assert(!(nullptr == HostPtr && BaseT::useHostPtr() && Context->is_host()) && - "Internal error. Allocating memory on the host " - "while having use_host_ptr property"); return MemoryManager::allocateMemBuffer( std::move(Context), this, HostPtr, HostPtrReadOnly, BaseT::getSizeInBytes(), BaseT::MInteropEvent, BaseT::MInteropContext, diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 840f95ea7a643..f4e42363cb6e1 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -266,11 +266,6 @@ void MemoryManager::releaseMemObj(ContextImplPtr TargetContext, return; } - if (TargetContext->is_host()) { - MemObj->releaseHostMem(MemAllocation); - return; - } - const PluginPtr &Plugin = TargetContext->getPlugin(); memReleaseHelper(Plugin, pi::cast(MemAllocation)); } @@ -288,20 +283,6 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, OutEvent); } -void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, - bool HostPtrReadOnly, size_t Size, - const sycl::property_list &) { - std::ignore = HostPtrReadOnly; - std::ignore = Size; - - // Can return user pointer directly if it is not a nullptr. - if (UserPtr) - return UserPtr; - - return MemObj->allocateHostMem(); - ; -} - void *MemoryManager::allocateInteropMemObject( ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, @@ -398,10 +379,7 @@ void *MemoryManager::allocateMemBuffer( const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait) { void *MemPtr; - if (TargetContext->is_host()) - MemPtr = - allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList); - else if (UserPtr && InteropContext) + if (UserPtr && InteropContext) MemPtr = allocateInteropMemObject(TargetContext, UserPtr, InteropEvent, InteropContext, PropsList, OutEventToWait); @@ -420,9 +398,6 @@ void *MemoryManager::allocateMemImage( const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait) { - if (TargetContext->is_host()) - return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, - PropsList); if (UserPtr && InteropContext) return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent, InteropContext, PropsList, OutEventToWait); diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 1d2800bf9dadc..7be17898bc0d9 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -85,10 +85,6 @@ class __SYCL_EXPORT MemoryManager { static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr); - static void *allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, - bool HostPtrReadOnly, size_t Size, - const sycl::property_list &PropsList); - static void * allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 2c7876ea14c08..bba423df61b60 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -283,12 +283,12 @@ void queue_impl::addEvent(const event &Event) { // if there is no command on the event, we cannot track it with MEventsWeak // as that will leave it with no owner. Track in MEventsShared only if we're // unable to call piQueueFinish during wait. - if (is_host() || MEmulateOOO) + if (Event->isHost() || MEmulateOOO) addSharedEvent(Event); } // As long as the queue supports piQueueFinish we only need to store events // for unenqueued commands and host tasks. - else if (is_host() || MEmulateOOO || EImpl->getHandleRef() == nullptr) { + else if (Event->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; MEventsWeak.push_back(std::move(EventWeakPtr)); @@ -299,7 +299,7 @@ void queue_impl::addEvent(const event &Event) { /// but some events have no other owner. In this case, /// addSharedEvent will have the queue track the events via a shared pointer. void queue_impl::addSharedEvent(const event &Event) { - assert(is_host() || MEmulateOOO); + assert(MEmulateOOO); std::lock_guard Lock(MMutex); // Events stored in MEventsShared are not released anywhere else aside from // calls to queue::wait/wait_and_throw, which a user application might not @@ -369,9 +369,6 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), &EventImpl->getHandleRef(), EventImpl); - if (MContext->is_host()) - return MDiscardEvents ? createDiscardedEvent() : event(); - if (isInOrder()) { auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr : MExtGraphDeps.LastEventPtr; @@ -520,7 +517,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { // directly. Otherwise, only wait for unenqueued or host task events, starting // from the latest submitted task in order to minimize total amount of calls, // then handle the rest with piQueueFinish. - const bool SupportsPiFinish = !is_host() && !MEmulateOOO; + const bool SupportsPiFinish = !MEmulateOOO; for (auto EventImplWeakPtrIt = WeakEvents.rbegin(); EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) { if (std::shared_ptr EventImplSharedPtr = diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index bb4c5f4e1441d..87f005fe8ca78 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -33,12 +33,6 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr), MHostPtrProvided(true), MOwnNativeHandle(OwnNativeHandle) { - if (MInteropContext->is_host()) - throw sycl::invalid_parameter_error( - "Creation of interoperability memory object using host context is " - "not allowed", - PI_ERROR_INVALID_CONTEXT); - sycl::detail::pi::PiContext Context = nullptr; const PluginPtr &Plugin = getPlugin(); @@ -84,12 +78,6 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr), MHostPtrProvided(true), MOwnNativeHandle(OwnNativeHandle) { - if (MInteropContext->is_host()) - throw sycl::invalid_parameter_error( - "Creation of interoperability memory object using host context is " - "not allowed", - PI_ERROR_INVALID_CONTEXT); - sycl::detail::pi::PiContext Context = nullptr; const PluginPtr &Plugin = getPlugin(); @@ -191,19 +179,12 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, // The data for the allocation can be provided via either the user pointer // (InitFromUserData, can be read-only) or a runtime-allocated read-write // HostPtr. We can have one of these scenarios: - // 1. The allocation is the first one and on host. InitFromUserData == true. - // 2. The allocation is the first one and isn't on host. InitFromUserData + // 1. The allocation is the first one and isn't on host. InitFromUserData // varies based on unified host memory support and whether or not the data can // be discarded. - // 3. The allocation is not the first one and is on host. InitFromUserData == - // false, HostPtr == nullptr. This can only happen if the allocation command - // is not linked since it would be a no-op otherwise. Attempt to reuse the - // user pointer if it's read-write, but do not copy its contents if it's not. - // 4. The allocation is not the first one and not on host. InitFromUserData == + // 2. The allocation is not the first one and not on host. InitFromUserData == // false, HostPtr is provided if the command is linked. The host pointer is // guaranteed to be reused in this case. - if (Context->is_host() && !MOpenCLInterop && !MHostPtrReadOnly) - InitFromUserData = true; if (InitFromUserData) { assert(!HostPtr && "Cannot init from user data and reuse host ptr provided " From 5b60b90c37d2bc388272eaed40f375403a148e80 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 28 May 2024 04:26:44 -0700 Subject: [PATCH 11/52] draft Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/memory_manager.cpp | 27 ----------- sycl/source/detail/scheduler/commands.cpp | 21 ++++----- .../source/detail/scheduler/graph_builder.cpp | 46 +++++++++---------- sycl/source/detail/scheduler/scheduler.hpp | 32 +++++++++---- 4 files changed, 55 insertions(+), 71 deletions(-) diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index f4e42363cb6e1..792c1c57bd3f1 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -921,9 +921,6 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(!SrcQueue->getContextImplPtr()->is_host() && - "Host queue not supported in fill_usm."); - if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) @@ -962,9 +959,6 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(!Queue->getContextImplPtr()->is_host() && - "Host queue not supported in fill_usm."); - if (!Length) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) @@ -1000,9 +994,6 @@ void MemoryManager::prefetch_usm( std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(!Queue->getContextImplPtr()->is_host() && - "Host queue not supported in prefetch_usm."); - const PluginPtr &Plugin = Queue->getPlugin(); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); @@ -1024,9 +1015,6 @@ void MemoryManager::advise_usm( std::vector /*DepEvents*/, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(!Queue->getContextImplPtr()->is_host() && - "Host queue not supported in advise_usm."); - const PluginPtr &Plugin = Queue->getPlugin(); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); @@ -1049,9 +1037,6 @@ void MemoryManager::copy_2d_usm( std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(!Queue->getContextImplPtr()->is_host() && - "Host queue not supported in copy_2d_usm."); - if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -1137,9 +1122,6 @@ void MemoryManager::fill_2d_usm( std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(!Queue->getContextImplPtr()->is_host() && - "Host queue not supported in fill_2d_usm."); - if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -1177,9 +1159,6 @@ void MemoryManager::memset_2d_usm( char Value, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(!Queue->getContextImplPtr()->is_host() && - "Host queue not supported in fill_2d_usm."); - if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -1714,8 +1693,6 @@ void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer( sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *Mem, size_t Length, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { - assert(!Context->is_host() && "Host queue not supported in prefetch_usm."); - const PluginPtr &Plugin = Context->getPlugin(); Plugin->call( CommandBuffer, Mem, Length, _pi_usm_migration_flags(0), Deps.size(), @@ -1728,8 +1705,6 @@ void MemoryManager::ext_oneapi_advise_usm_cmd_buffer( size_t Length, pi_mem_advice Advice, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { - assert(!Context->is_host() && "Host queue not supported in advise_usm."); - const PluginPtr &Plugin = Context->getPlugin(); Plugin->call( CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(), @@ -1748,8 +1723,6 @@ void MemoryManager::copy_image_bindless( const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - assert(!Queue->getContextImplPtr()->is_host() && - "Host queue not supported in copy_image_bindless."); assert((Flags == (sycl::detail::pi::PiImageCopyFlags) ext::oneapi::experimental::image_copy_flags::HtoD || Flags == (sycl::detail::pi::PiImageCopyFlags) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d6c41f39e9942..0a25d7b3ee6c1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -671,12 +671,9 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, const QueueImplPtr &WorkerQueue = getWorkerQueue(); const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr(); - // 1. Async work is not supported for host device. - // 2. Non-host events can be ignored if they are not fully initialized. - // 3. Some types of commands do not produce PI events after they are - // enqueued - // (e.g. alloca). Note that we can't check the pi event to make that - // distinction since the command might still be unenqueued at this point. + // 1. Non-host events can be ignored if they are not fully initialized. + // 2. Some types of commands do not produce PI events after they are + // enqueued (e.g. alloca). Note that we can't check the pi event to make that distinction since the command might still be unenqueued at this point. bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized()); if (auto *DepCmd = static_cast(DepEvent->getCommand())) PiEventExpected &= DepCmd->producesPiEvent(); @@ -692,11 +689,13 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match we'll connect them using host task - if (DepEventContext != WorkerContext && !WorkerContext->is_host()) { + if (DepEventContext == WorkerContext) + MPreparedDepsEvents.push_back(std::move(DepEvent)); + else + { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep, ToCleanUp); - } else - MPreparedDepsEvents.push_back(std::move(DepEvent)); + } return ConnectionCmd; } @@ -3106,10 +3105,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::Barrier: { - if (MQueue->getDeviceImplPtr()->is_host()) { - // NOP for host device. - return PI_SUCCESS; - } const PluginPtr &Plugin = MQueue->getPlugin(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d1b57182d78ff..bbb6d8de12f98 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -243,7 +243,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord( getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr, ToEnqueue); } else - MemObject->MRecord.reset(new MemObjRecord{Queue->getContextImplPtr(), + MemObject->MRecord.reset(new MemObjRecord{Queue ? Queue->getContextImplPtr() : nullptr, LeafLimit, AllocateDependency}); MMemObjs.push_back(MemObject); @@ -317,7 +317,7 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, assert(AllocaCmdSrc->MIsActive && "Expected source alloca command to be active"); - if (AllocaCmdSrc->getQueue()->is_host()) { + if (!AllocaCmdSrc->getQueue()) { UnMapMemObject *UnMapCmd = new UnMapMemObject( AllocaCmdDst, *AllocaCmdDst->getRequirement(), &AllocaCmdSrc->MMemAllocation, AllocaCmdDst->getQueue()); @@ -427,7 +427,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( Command *Scheduler::GraphBuilder::remapMemoryObject( MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd, std::vector &ToEnqueue) { - assert(HostAllocaCmd->getQueue()->is_host() && + assert(!HostAllocaCmd->getQueue() && "Host alloca command expected"); assert(HostAllocaCmd->MIsActive && "Active alloca command expected"); @@ -525,16 +525,14 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, auto SYCLMemObj = static_cast(Req->MSYCLMemObj); SYCLMemObj->handleWriteAccessorCreation(); } - - const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); - - MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue); + // Host accessor is not attached to any queue so no QueueImplPtr object to be sent to getOrInsertMemObjRecord. + MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req, ToEnqueue); if (MPrintOptionsArray[BeforeAddHostAcc]) printGraphAsDot("before_addHostAccessor"); markModifiedIfWrite(Record, Req); AllocaCommandBase *HostAllocaCmd = - getOrCreateAllocaForReq(Record, Req, HostQueue, ToEnqueue); + getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue); if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(), Record->MCurContext)) { @@ -682,6 +680,10 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) { if (std::strcmp(HUMConfig, "1") == 0) return true; } + // host task & host accessor is covered with no device context but provide required support. + if (Ctx == nullptr) + return true; + for (const device &Device : Ctx->getDevices()) { if (!Device.get_info()) return false; @@ -696,9 +698,9 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) { AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { - + auto Context = Queue != nullptr ? Queue->getContextImplPtr() : nullptr; AllocaCommandBase *AllocaCmd = findAllocaForReq( - Record, Req, Queue->getContextImplPtr(), /*AllowConst=*/false); + Record, Req, Context, /*AllowConst=*/false); if (!AllocaCmd) { std::vector ToCleanUp; @@ -729,7 +731,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // the user pointer is read-only is still not handled: it leads to // unnecessary copy on devices with unified host memory support. const bool HostUnifiedMemory = - checkHostUnifiedMemory(Queue->getContextImplPtr()); + checkHostUnifiedMemory(Context); SYCLMemObjI *MemObj = Req->MSYCLMemObj; const bool InitFromUserData = Record->MAllocaCommands.empty() && (HostUnifiedMemory || MemObj->isInterop()); @@ -745,16 +747,14 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // There's no need to make a host allocation if the buffer is not // initialized with user data. if (MemObj->hasUserDataPtr()) { - QueueImplPtr DefaultHostQueue = - Scheduler::getInstance().getDefaultHostQueue(); AllocaCommand *HostAllocaCmd = new AllocaCommand( - DefaultHostQueue, FullReq, true /* InitFromUserData */, + nullptr, FullReq, true /* InitFromUserData */, nullptr /* LinkedAllocaCmd */, MemObj->isHostPointerReadOnly() /* IsConst */); Record->MAllocaCommands.push_back(HostAllocaCmd); Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue); ++(HostAllocaCmd->MLeafCounter); - Record->MCurContext = DefaultHostQueue->getContextImplPtr(); + Record->usedOnHost(); } } } else { @@ -766,7 +766,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // new one. There could be situations when we could setup link with // "not" current allocation, but it will require memory copy. // Can setup link between cl and host allocations only - if (Queue->is_host() != Record->MCurContext->is_host()) { + if ((Context != nullptr) + (Record->MCurContext != nullptr) == 1) { // Linked commands assume that the host allocation is reused by the // plugin runtime and that can lead to unnecessary copy overhead on // devices that do not support host unified memory. Do not link the @@ -778,7 +778,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( bool PinnedHostMemory = MemObj->usesPinnedHostMemory(); bool HostUnifiedMemoryOnNonHostDevice = - Queue->is_host() ? checkHostUnifiedMemory(Record->MCurContext) + Queue == nullptr ? checkHostUnifiedMemory(Record->MCurContext) : HostUnifiedMemory; if (PinnedHostMemory || HostUnifiedMemoryOnNonHostDevice) { AllocaCommandBase *LinkedAllocaCmdCand = findAllocaForReq( @@ -818,14 +818,14 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // construction, host allocation doesn't. So, device allocation should // always be active here. Also if the "follower" command is a device one // we have to change current context to the device one. - if (Queue->is_host()) { + if (Queue == nullptr) { AllocaCmd->MIsActive = false; } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); + findDepsForReq(Record, Req, Context); for (Command *Dep : Deps) { Command *ConnCmd = AllocaCmd->addDep( DepDesc{Dep, Req, LinkedAllocaCmd}, ToCleanUp); @@ -1071,7 +1071,7 @@ void Scheduler::GraphBuilder::createGraphForCommand( if (isSameCtx) { // If the memory is already in the required host context, check if the // required access mode is valid, remap if not. - if (Record->MCurContext->is_host() && + if (!Record->MCurContext && !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) { remapMemoryObject(Record, Req, Req->MIsSubBuffer @@ -1093,7 +1093,7 @@ void Scheduler::GraphBuilder::createGraphForCommand( NeedMemMoveToHost = true; MemMoveTargetQueue = HT.MQueue; } - } else if (!Queue->is_host() && !Record->MCurContext->is_host()) + } else if (Queue && Record->MCurContext) NeedMemMoveToHost = true; if (NeedMemMoveToHost) @@ -1714,12 +1714,12 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( bool NeedMemMoveToHost = false; auto MemMoveTargetQueue = Queue; - if (!Queue->is_host() && !Record->MCurContext->is_host()) + if (Queue && Record->MCurContext) NeedMemMoveToHost = true; if (NeedMemMoveToHost) insertMemoryMove(Record, Req, - Scheduler::getInstance().getDefaultHostQueue(), + nullptr, ToEnqueue); insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue); } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 6fa95cb4a4a54..bcb930bc8194a 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -199,12 +199,12 @@ using FusionMap = std::unordered_map; /// There must be a single MemObjRecord for each SYCL memory object. /// /// \ingroup sycl_graph -struct MemObjRecord { +class MemObjRecord { MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency) : MReadLeaves{this, LeafLimit, AllocateDependency}, - MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {} - + MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx}, MCurHostAccess{ MCurContext == nullptr } {} +public: // Contains all allocation commands for the memory object. std::vector MAllocaCommands; @@ -214,16 +214,32 @@ struct MemObjRecord { // Contains latest write commands working with memory object. LeavesCollection MWriteLeaves; + // The flag indicates that the content of the memory object was/will be + // modified. Used while deciding if copy back needed. + bool MMemModified = false; + + void usedOnDevice(ContextImplPtr& NewContext) + { + MCurContext = NewContext; + MCurHostAccess = false; + } + + void usedOnHost() + { + MCurContext = nullptr; + MCurHostAccess = true; + } + + bool usedOnHost() { return MCurHostAccess; } +protected: // The context which has the latest state of the memory object. ContextImplPtr MCurContext; - // The mode this object can be accessed with from the host context. - // Valid only if the current context is host. + // The mode this object can be accessed with from the host (host_accessor). + // Valid only if the current usage is on host. access::mode MHostAccess = access::mode::read_write; - // The flag indicates that the content of the memory object was/will be - // modified. Used while deciding if copy back needed. - bool MMemModified = false; + bool MCurHostAccess = false; }; /// DPC++ graph scheduler class. From 21ed380f362dd560342f75f94a58b84da50edd9c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 29 May 2024 05:58:36 -0700 Subject: [PATCH 12/52] non-buildable: eliminate getDefaultHostQueue usage Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/memory_manager.cpp | 14 +-- sycl/source/detail/scheduler/commands.cpp | 6 +- .../source/detail/scheduler/graph_builder.cpp | 96 +++++++++---------- sycl/source/detail/scheduler/scheduler.cpp | 4 +- sycl/source/detail/scheduler/scheduler.hpp | 18 ++-- 5 files changed, 65 insertions(+), 73 deletions(-) diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 792c1c57bd3f1..3c0ad08e0763f 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -750,23 +750,23 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl) { - if (SrcQueue->is_host()) { - if (TgtQueue->is_host()) - copyH2H(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize, + if (!SrcQueue) { + if (!TgtQueue) + copyH2H(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, - std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset, + nullptr, DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl); else - copyH2D(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize, + copyH2D(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, pi::cast(DstMem), std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl); } else { - if (TgtQueue->is_host()) + if (!TgtQueue) copyD2H(SYCLMemObj, pi::cast(SrcMem), std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset, - SrcElemSize, (char *)DstMem, std::move(TgtQueue), DimDst, DstSize, + SrcElemSize, (char *)DstMem, nullptr, DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl); else diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 0a25d7b3ee6c1..f0e3471a0f6f6 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2872,7 +2872,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(), MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, Copy->getDst(), - Scheduler::getInstance().getDefaultHostQueue(), Req->MDims, + nullptr, Req->MDims, Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); @@ -2883,11 +2883,9 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Requirement *Req = (Requirement *)(Copy->getDst()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - Scheduler::getInstance().getDefaultHostQueue(); - MemoryManager::copy( AllocaCmd->getSYCLMemObj(), Copy->getSrc(), - Scheduler::getInstance().getDefaultHostQueue(), Req->MDims, + nullptr, Req->MDims, Req->MAccessRange, Req->MAccessRange, /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, AllocaCmd->getMemAllocation(), MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index bbb6d8de12f98..6c9244f9ecb2c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -49,15 +49,16 @@ static bool doOverlap(const Requirement *LHS, const Requirement *RHS) { LHS->MOffsetInBytes); } -static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS) { - return LHS == RHS; -} - /// Checks if current requirement is requirement for sub buffer. static bool IsSuitableSubReq(const Requirement *Req) { return Req->MIsSubBuffer; } +static ContextImplPtr GetContext(const QueueImplPtr& Queue) +{ + return Queue ? Queue->getContextImplPtr() : nullptr; +} + /// Checks if the required access mode is allowed under the current one. static bool isAccessModeAllowed(access::mode Required, access::mode Current) { switch (Current) { @@ -243,7 +244,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord( getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr, ToEnqueue); } else - MemObject->MRecord.reset(new MemObjRecord{Queue ? Queue->getContextImplPtr() : nullptr, + MemObject->MRecord.reset(new MemObjRecord{GetContext(Queue), LeafLimit, AllocateDependency}); MMemObjs.push_back(MemObject); @@ -282,8 +283,9 @@ void Scheduler::GraphBuilder::addNodeToLeaves( UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { + auto Context = GetContext(Queue); AllocaCommandBase *AllocaCmd = - findAllocaForReq(Record, Req, Queue->getContextImplPtr()); + findAllocaForReq(Record, Req, Context); assert(AllocaCmd && "There must be alloca for requirement!"); UpdateHostRequirementCommand *UpdateCommand = new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData); @@ -292,7 +294,7 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( const Requirement *StoredReq = UpdateCommand->getRequirement(); std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); + findDepsForReq(Record, Req, Context); std::vector ToCleanUp; for (Command *Dep : Deps) { Command *ConnCmd = @@ -345,8 +347,9 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( if (!AllocaCmdDst) throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); + auto Context = GetContext(Queue); std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); + findDepsForReq(Record, Req, Context); Deps.insert(AllocaCmdDst); // Get parent allocation of sub buffer to perform full copy of whole buffer if (IsSuitableSubReq(Req)) { @@ -362,8 +365,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( // current context, need to find a parent alloca command for it (it must be // there) auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) { - bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), - Record->MCurContext) && + bool Res = Record->isSameContext(AllocaCmd->getQueue()) && // Looking for a parent buffer alloca command AllocaCmd->getType() == Command::CommandType::ALLOCA; return Res; @@ -398,7 +400,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( if ((Req->MAccessMode == access::mode::discard_write) || (Req->MAccessMode == access::mode::discard_read_write)) { - Record->MCurContext = Queue->getContextImplPtr(); + Record->updateUsage(Context); return nullptr; } else { // Full copy of buffer is needed to avoid loss of data that may be caused @@ -420,7 +422,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue); for (Command *Cmd : ToCleanUp) cleanupCommand(Cmd); - Record->MCurContext = Queue->getContextImplPtr(); + Record->updateUsage(Context); return NewCmd; } @@ -474,7 +476,6 @@ Command *Scheduler::GraphBuilder::remapMemoryObject( Command * Scheduler::GraphBuilder::addCopyBack(Requirement *Req, std::vector &ToEnqueue) { - QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue(); SYCLMemObjI *MemObj = Req->MSYCLMemObj; MemObjRecord *Record = getMemObjRecord(MemObj); if (Record && MPrintOptionsArray[BeforeAddCopyBack]) @@ -485,13 +486,13 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req, return nullptr; std::set Deps = - findDepsForReq(Record, Req, HostQueue->getContextImplPtr()); + findDepsForReq(Record, Req, nullptr); AllocaCommandBase *SrcAllocaCmd = findAllocaForReq(Record, Req, Record->MCurContext); auto MemCpyCmdUniquePtr = std::make_unique( *SrcAllocaCmd->getRequirement(), SrcAllocaCmd, *Req, &Req->MData, - SrcAllocaCmd->getQueue(), std::move(HostQueue)); + SrcAllocaCmd->getQueue(), nullptr); if (!MemCpyCmdUniquePtr) throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); @@ -534,8 +535,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, AllocaCommandBase *HostAllocaCmd = getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue); - if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(), - Record->MCurContext)) { + if (Record->isSameContext(HostAllocaCmd->getQueue())) { if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) { remapMemoryObject(Record, Req, Req->MIsSubBuffer ? (static_cast( @@ -545,15 +545,14 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, ToEnqueue); } } else - insertMemoryMove(Record, Req, HostQueue, ToEnqueue); + insertMemoryMove(Record, Req, nullptr, ToEnqueue); Command *UpdateHostAccCmd = - insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue); + insertUpdateHostReqCmd(Record, Req, nullptr, ToEnqueue); // Need empty command to be blocked until host accessor is destructed EmptyCommand *EmptyCmd = - addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue, - Command::BlockReason::HostAccessor, ToEnqueue); + addEmptyCmd(UpdateHostAccCmd, {Req}, Command::BlockReason::HostAccessor, ToEnqueue); Req->MBlockedCmd = EmptyCmd; @@ -564,14 +563,14 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, } Command *Scheduler::GraphBuilder::addCGUpdateHost( - std::unique_ptr CommandGroup, const QueueImplPtr &HostQueue, + std::unique_ptr CommandGroup, std::vector &ToEnqueue) { auto UpdateHost = static_cast(CommandGroup.get()); Requirement *Req = UpdateHost->getReqToUpdate(); - MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue); - return insertMemoryMove(Record, Req, HostQueue, ToEnqueue); + MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req, ToEnqueue); + return insertMemoryMove(Record, Req, nullptr, ToEnqueue); } /// Start the search for the record from list of "leaf" commands and check if @@ -618,8 +617,10 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, // Going through copying memory between contexts is not supported. if (Dep.MDepCommand) - CanBypassDep &= - sameCtx(Context, Dep.MDepCommand->getQueue()->getContextImplPtr()); + { + auto DepQueue = Dep.MDepCommand->getQueue(); + CanBypassDep &= IsOnSameContext(Context, DepQueue); + } if (!CanBypassDep) { RetDeps.insert(DepCmd); @@ -658,7 +659,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( bool AllowConst) { auto IsSuitableAlloca = [&Context, Req, AllowConst](AllocaCommandBase *AllocaCmd) { - bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context); + bool Res = IsOnSameContext(Context, AllocaCmd->getQueue()); if (IsSuitableSubReq(Req)) { const Requirement *TmpReq = AllocaCmd->getRequirement(); Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF; @@ -698,7 +699,7 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) { AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { - auto Context = Queue != nullptr ? Queue->getContextImplPtr() : nullptr; + auto Context = GetContext(Queue); AllocaCommandBase *AllocaCmd = findAllocaForReq( Record, Req, Context, /*AllowConst=*/false); @@ -754,7 +755,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( Record->MAllocaCommands.push_back(HostAllocaCmd); Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue); ++(HostAllocaCmd->MLeafCounter); - Record->usedOnHost(); + Record->updateUsage(nullptr); } } } else { @@ -766,7 +767,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // new one. There could be situations when we could setup link with // "not" current allocation, but it will require memory copy. // Can setup link between cl and host allocations only - if ((Context != nullptr) + (Record->MCurContext != nullptr) == 1) { + if ((Context != nullptr) + (Record->usedOnDevice()) == 1) { // Linked commands assume that the host allocation is reused by the // plugin runtime and that can lead to unnecessary copy overhead on // devices that do not support host unified memory. Do not link the @@ -822,7 +823,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( AllocaCmd->MIsActive = false; } else { LinkedAllocaCmd->MIsActive = false; - Record->MCurContext = Queue->getContextImplPtr(); + Record->updateUsage(Context); std::set Deps = findDepsForReq(Record, Req, Context); @@ -865,10 +866,9 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( Command *Cmd, const std::vector &Reqs, - const QueueImplPtr &Queue, Command::BlockReason Reason, + Command::BlockReason Reason, std::vector &ToEnqueue, const bool AddDepsToLeaves) { - EmptyCommand *EmptyCmd = - new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + EmptyCommand *EmptyCmd = new EmptyCommand(); if (!EmptyCmd) throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); @@ -878,9 +878,9 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( EmptyCmd->MBlockReason = Reason; for (Requirement *Req : Reqs) { - MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req, ToEnqueue); + MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req, ToEnqueue); AllocaCommandBase *AllocaCmd = - getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); + getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue); EmptyCmd->addRequirement(Cmd, AllocaCmd, Req); } // addRequirement above call addDep that already will add EmptyCmd as user for @@ -1062,8 +1062,7 @@ void Scheduler::GraphBuilder::createGraphForCommand( AllocaCmd = getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue); - isSameCtx = - sameCtx(QueueForAlloca->getContextImplPtr(), Record->MCurContext); + isSameCtx = Record->isSameContext(QueueForAlloca); } // If there is alloca command we need to check if the latest memory is in @@ -1071,7 +1070,7 @@ void Scheduler::GraphBuilder::createGraphForCommand( if (isSameCtx) { // If the memory is already in the required host context, check if the // required access mode is valid, remap if not. - if (!Record->MCurContext && + if (!Record->usedOnDevice() && !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) { remapMemoryObject(Record, Req, Req->MIsSubBuffer @@ -1089,21 +1088,20 @@ void Scheduler::GraphBuilder::createGraphForCommand( if (isInteropTask) { const detail::CGHostTask &HT = static_cast(CG); - if (HT.MQueue->getContextImplPtr() != Record->MCurContext) { + if (!(Record->isSameContext(HT.MQueue)) { NeedMemMoveToHost = true; MemMoveTargetQueue = HT.MQueue; } - } else if (Queue && Record->MCurContext) + } else if (Queue && Record->usedOnDevice()) NeedMemMoveToHost = true; if (NeedMemMoveToHost) - insertMemoryMove(Record, Req, - Scheduler::getInstance().getDefaultHostQueue(), - ToEnqueue); + insertMemoryMove(Record, Req, nullptr, ToEnqueue); insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue); } + std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); + findDepsForReq(Record, Req, GetContext(Queue)); for (Command *Dep : Deps) { if (Dep != NewCmd) { @@ -1343,7 +1341,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( CG::CodeplayHostTask, /* Payload */ {})); ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); + std::move(ConnectCG), Cmd->getQueue()); } catch (const std::bad_alloc &) { throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); } @@ -1705,7 +1703,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); - isSameCtx = sameCtx(Queue->getContextImplPtr(), Record->MCurContext); + isSameCtx = Record->isSameContext(Queue); } if (!isSameCtx) { @@ -1714,7 +1712,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( bool NeedMemMoveToHost = false; auto MemMoveTargetQueue = Queue; - if (Queue && Record->MCurContext) + if (Queue && Record->usedOnDevice()) NeedMemMoveToHost = true; if (NeedMemMoveToHost) @@ -1724,7 +1722,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue); } std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); + findDepsForReq(Record, Req, GetContext(Queue)); for (Command *Dep : Deps) { if (Dep != NewCmd.get()) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 0b061a86dbc62..7e5db05daf01a 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -118,12 +118,12 @@ EventImplPtr Scheduler::addCG( switch (Type) { case CG::UpdateHost: NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), - DefaultHostQueue, AuxiliaryCmds); + AuxiliaryCmds); NewEvent = NewCmd->getEvent(); break; case CG::CodeplayHostTask: { auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - DefaultHostQueue, AuxiliaryCmds); + nullptr, AuxiliaryCmds); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index bcb930bc8194a..6a2bcc4e5004a 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -203,7 +203,7 @@ class MemObjRecord { MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency) : MReadLeaves{this, LeafLimit, AllocateDependency}, - MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx}, MCurHostAccess{ MCurContext == nullptr } {} + MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {} public: // Contains all allocation commands for the memory object. std::vector MAllocaCommands; @@ -218,19 +218,19 @@ class MemObjRecord { // modified. Used while deciding if copy back needed. bool MMemModified = false; - void usedOnDevice(ContextImplPtr& NewContext) + void updateUsage(ContextImplPtr& NewContext) { MCurContext = NewContext; - MCurHostAccess = false; } - void usedOnHost() + bool isSameContext(const QueueImplPtr& Queue) const { - MCurContext = nullptr; - MCurHostAccess = true; + // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison. + return LHS == (Queue ? Queue->getContextImplPtr() : nullptr); } - bool usedOnHost() { return MCurHostAccess; } + bool usedOnDevice( return MCurContext != nullptr; ) + protected: // The context which has the latest state of the memory object. ContextImplPtr MCurContext; @@ -238,8 +238,6 @@ class MemObjRecord { // The mode this object can be accessed with from the host (host_accessor). // Valid only if the current usage is on host. access::mode MHostAccess = access::mode::read_write; - - bool MCurHostAccess = false; }; /// DPC++ graph scheduler class. @@ -621,7 +619,6 @@ class Scheduler { /// /// \return a command that represents command group execution. Command *addCGUpdateHost(std::unique_ptr CommandGroup, - const QueueImplPtr &HostQueue, std::vector &ToEnqueue); /// Enqueues a command to update memory to the latest state. @@ -759,7 +756,6 @@ class Scheduler { EmptyCommand *addEmptyCmd(Command *Cmd, const std::vector &Req, - const QueueImplPtr &Queue, Command::BlockReason Reason, std::vector &ToEnqueue, const bool AddDepsToLeaves = true); From c533af788609ed1b86dd27307eb48045f05c7565 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 4 Jun 2024 03:41:44 -0700 Subject: [PATCH 13/52] non-buildable: cleanup queue usages Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.cpp | 3 +- sycl/source/detail/scheduler/commands.cpp | 208 +++++++++------------- 2 files changed, 88 insertions(+), 123 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index bba423df61b60..c1c1d3835a54d 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -26,7 +26,8 @@ namespace sycl { inline namespace _V1 { namespace detail { -std::atomic queue_impl::MNextAvailableQueueID = 0; +// Treat 0 as reserved for "host" queue +std::atomic queue_impl::MNextAvailableQueueID = 1; static std::vector getPIEvents(const std::vector &DepEvents) { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f0e3471a0f6f6..f7962bb7a5d66 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -56,7 +56,7 @@ namespace detail { // Global graph for the application extern xpti::trace_event_data_t *GSYCLGraphEvent; -bool CurrentCodeLocationValid() { +static bool CurrentCodeLocationValid() { detail::tls_code_loc_t Tls; auto CodeLoc = Tls.query(); auto FileName = CodeLoc.fileName(); @@ -65,7 +65,7 @@ bool CurrentCodeLocationValid() { (FunctionName && FunctionName[0] != '\0'); } -void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, +static void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, xpti_td *TraceEvent, uint16_t Type, const void *Addr) { if (!(xptiCheckTraceEnabled(StreamID, Type) && TraceEvent)) @@ -74,6 +74,17 @@ void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, xptiNotifySubscribers(StreamID, Type, detail::GSYCLGraphEvent, static_cast(TraceEvent), InstanceID, Addr); } + +static addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue) +{ + xpti::addMetadata(TraceEvent, "sycl_device", + Queue ? deviceToID(MQueue->get_device()) : nullptr); + xpti::addMetadata(TraceEvent, "sycl_device_type", + Queue ? deviceToString(MQueue->get_device()) : "host"); + if (Queue) + xpti::addMetadata(TraceEvent, "sycl_device_name", + getSyclObjImpl(MQueue->get_device())->getDeviceName()); +} #endif #ifdef __SYCL_ENABLE_GNU_DEMANGLING @@ -236,9 +247,7 @@ Command::getPiEvents(const std::vector &EventImpls) const { // current one is a host task. In this case we should not skip pi event due // to different sync mechanisms for different task types on in-order queue. const QueueImplPtr &WorkerQueue = getWorkerQueue(); - // MWorkerQueue in command is always not null. So check if - // EventImpl->getWorkerQueue != nullptr is implicit. - if (EventImpl->getWorkerQueue() == WorkerQueue && + if (WorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue && WorkerQueue->isInOrder() && !isHostTask()) continue; @@ -278,9 +287,7 @@ std::vector Command::getPiEventsBlocking( // current one is a host task. In this case we should not skip pi event due // to different sync mechanisms for different task types on in-order queue. const QueueImplPtr &WorkerQueue = getWorkerQueue(); - // MWorkerQueue in command is always not null. So check if - // EventImpl->getWorkerQueue != nullptr is implicit. - if (EventImpl->getWorkerQueue() == WorkerQueue && + if (MWorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue && WorkerQueue->isInOrder() && !isHostTask()) continue; @@ -337,12 +344,10 @@ class DispatchHostTask { PluginWithEvents.first->call(RawEvents.size(), RawEvents.data()); } catch (const sycl::exception &E) { - CGHostTask &HostTask = static_cast(MThisCmd->getCG()); - HostTask.MQueue->reportAsyncException(std::current_exception()); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception()); return (pi_result)E.get_cl_code(); } catch (...) { - CGHostTask &HostTask = static_cast(MThisCmd->getCG()); - HostTask.MQueue->reportAsyncException(std::current_exception()); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception()); return PI_ERROR_UNKNOWN; } } @@ -383,7 +388,7 @@ class DispatchHostTask { std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error( std::string("Couldn't wait for host-task's dependencies"), WaitResult)); - HostTask.MQueue->reportAsyncException(EPtr); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(EPtr); // reset host-task's lambda and quit HostTask.MHostTask.reset(); Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); @@ -394,7 +399,7 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now if (HostTask.MHostTask->isInteropTask()) { interop_handle IH{MReqToMem, HostTask.MQueue, - HostTask.MQueue->getDeviceImplPtr(), + // HostTask.MQueue->getDeviceImplPtr(), HostTask.MQueue->getContextImplPtr()}; HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH); @@ -419,7 +424,7 @@ class DispatchHostTask { } } #endif - HostTask.MQueue->reportAsyncException(CurrentException); + MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); } HostTask.MHostTask.reset(); @@ -436,7 +441,7 @@ class DispatchHostTask { Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); } catch (...) { auto CurrentException = std::current_exception(); - HostTask.MQueue->reportAsyncException(CurrentException); + MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); } } }; @@ -449,6 +454,7 @@ void Command::waitForPreparedHostEvents() const { void Command::waitForEvents(QueueImplPtr Queue, std::vector &EventImpls, sycl::detail::pi::PiEvent &Event) { + assert(Queue && "Device queue is expected here"); if (!EventImpls.empty()) { #ifndef NDEBUG for (const EventImplPtr &Event : EventImpls) @@ -484,7 +490,7 @@ Command::Command( MEvent->setWorkerQueue(MWorkerQueue); MEvent->setSubmittedQueue(MWorkerQueue); MEvent->setCommand(this); - MEvent->setContextImpl(MQueue->getContextImplPtr()); + MEvent->setContextImpl(MQueue ? MQueue->getContextImplPtr(): nullptr); MEvent->setStateIncomplete(); MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; @@ -669,7 +675,7 @@ void Command::makeTraceEventEpilog() { Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector &ToCleanUp) { const QueueImplPtr &WorkerQueue = getWorkerQueue(); - const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr(); + const ContextImplPtr &WorkerContext = WorkerQueue ? WorkerQueue->getContextImplPtr() : nullptr; // 1. Non-host events can be ignored if they are not fully initialized. // 2. Some types of commands do not produce PI events after they are @@ -701,7 +707,8 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, } const ContextImplPtr &Command::getWorkerContext() const { - return MQueue->getContextImplPtr(); + assert(MWorkerQueue && "MWorkerQueue must not be nullptr"); + return MWorkerQueue->getContextImplPtr(); } const QueueImplPtr &Command::getWorkerQueue() const { @@ -963,16 +970,12 @@ void AllocaCommandBase::emitInstrumentationData() { // Set the relevant meta data properties for this command if (MTraceEvent && MFirstInstance) { xpti_td *TE = static_cast(MTraceEvent); - xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(TE, MQueue); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); } #endif } @@ -1022,7 +1025,7 @@ pi_int32 AllocaCommand::enqueueImp() { void *HostPtr = nullptr; if (!MIsLeaderAlloca) { - if (MQueue->is_host()) { + if (!MQueue) { // Do not need to make allocation if we have a linked device allocation Command::waitForEvents(MQueue, EventImpls, Event); @@ -1033,7 +1036,7 @@ pi_int32 AllocaCommand::enqueueImp() { // TODO: Check if it is correct to use std::move on stack variable and // delete it RawEvents below. MMemAllocation = MemoryManager::allocate( - MQueue->getContextImplPtr(), getSYCLMemObj(), MInitFromUserData, HostPtr, + MQueue ? MQueue->getContextImplPtr() : nullptr, getSYCLMemObj(), MInitFromUserData, HostPtr, std::move(EventImpls), Event); return PI_SUCCESS; @@ -1043,7 +1046,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA ON " << deviceToString(MQueue->get_device()) << "\\n"; + Stream << "ALLOCA ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Link : " << this->MLinkedAllocaCmd << "\\n"; Stream << "\"];" << std::endl; @@ -1092,7 +1095,7 @@ void AllocaSubBufCommand::emitInstrumentationData() { xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1102,7 +1105,7 @@ void *AllocaSubBufCommand::getMemAllocation() const { // In some cases parent`s memory allocation might change (e.g., after // map/unmap operations). If parent`s memory allocation changes, sub-buffer // memory allocation should be changed as well. - if (MQueue->is_host()) { + if (!MQueue) { return static_cast( static_cast(MParentAlloca->getMemAllocation()) + MRequirement.MOffsetInBytes); @@ -1116,7 +1119,7 @@ pi_int32 AllocaSubBufCommand::enqueueImp() { sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); MMemAllocation = MemoryManager::allocateMemSubBuffer( - MQueue->getContextImplPtr(), MParentAlloca->getMemAllocation(), + MQueue ? MQueue->getContextImplPtr() : nullptr, MParentAlloca->getMemAllocation(), MRequirement.MElemSize, MRequirement.MOffsetInBytes, MRequirement.MAccessRange, std::move(EventImpls), Event); @@ -1129,7 +1132,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA SUB BUF ON " << deviceToString(MQueue->get_device()) + Stream << "ALLOCA SUB BUF ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n"; @@ -1163,17 +1166,13 @@ void ReleaseCommand::emitInstrumentationData() { if (MFirstInstance) { xpti_td *TE = static_cast(MTraceEvent); - xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(TE, MQueue); xpti::addMetadata(TE, "allocation_type", commandToName(MAllocaCmd->getType())); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1187,9 +1186,9 @@ pi_int32 ReleaseCommand::enqueueImp() { // On host side we only allocate memory for full buffers. // Thus, deallocating sub buffers leads to double memory freeing. - SkipRelease |= MQueue->is_host() && MAllocaCmd->getType() == ALLOCA_SUB_BUF; + SkipRelease |= !MQueue && MAllocaCmd->getType() == ALLOCA_SUB_BUF; - const bool CurAllocaIsHost = MAllocaCmd->getQueue()->is_host(); + const bool CurAllocaIsHost = !MAllocaCmd->getQueue(); bool NeedUnmap = false; if (MAllocaCmd->MLinkedAllocaCmd) { @@ -1213,7 +1212,7 @@ pi_int32 ReleaseCommand::enqueueImp() { : MAllocaCmd->getQueue(); EventImplPtr UnmapEventImpl(new event_impl(Queue)); - UnmapEventImpl->setContextImpl(Queue->getContextImplPtr()); + UnmapEventImpl->setContextImpl(Queue ? Queue->getContextImplPtr() : nullptr); UnmapEventImpl->setStateIncomplete(); sycl::detail::pi::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef(); @@ -1237,7 +1236,7 @@ pi_int32 ReleaseCommand::enqueueImp() { Command::waitForEvents(MQueue, EventImpls, Event); else { MemoryManager::release( - MQueue->getContextImplPtr(), MAllocaCmd->getSYCLMemObj(), + MQueue ? MQueue->getContextImplPtr() : nullptr, MAllocaCmd->getSYCLMemObj(), MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); } return PI_SUCCESS; @@ -1247,7 +1246,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "RELEASE ON " << deviceToString(MQueue->get_device()) << "\\n"; + Stream << "RELEASE ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; Stream << " Alloca : " << MAllocaCmd << "\\n"; Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n"; Stream << "\"];" << std::endl; @@ -1287,16 +1286,12 @@ void MapMemObject::emitInstrumentationData() { if (MFirstInstance) { xpti_td *TE = static_cast(MTraceEvent); - xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(TE, MQueue); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1321,7 +1316,7 @@ void MapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "MAP ON " << deviceToString(MQueue->get_device()) << "\\n"; + Stream << "MAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; Stream << "\"];" << std::endl; @@ -1352,16 +1347,12 @@ void UnMapMemObject::emitInstrumentationData() { if (MFirstInstance) { xpti_td *TE = static_cast(MTraceEvent); - xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(TE, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(TE, MQueue); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1383,9 +1374,9 @@ bool UnMapMemObject::producesPiEvent() const { // an event waitlist and Level Zero plugin attempts to batch these commands, // so the execution of kernel B starts only on step 4. This workaround // restores the old behavior in this case until this is resolved. - return MQueue->getDeviceImplPtr()->getBackend() != + return MQueue && (MQueue->getDeviceImplPtr()->getBackend() != backend::ext_oneapi_level_zero || - MEvent->getHandleRef() != nullptr; + MEvent->getHandleRef() != nullptr); } pi_int32 UnMapMemObject::enqueueImp() { @@ -1406,7 +1397,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "UNMAP ON " << deviceToString(MQueue->get_device()) << "\\n"; + Stream << "UNMAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; Stream << "\"];" << std::endl; @@ -1428,11 +1419,11 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq, MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstAllocaCmd(DstAllocaCmd) { - if (!MSrcQueue->is_host()) { + if (MSrcQueue) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } - MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue; + MWorkerQueue = !MQueue ? MSrcQueue : MQueue; MEvent->setWorkerQueue(MWorkerQueue); emitInstrumentationDataProxy(); @@ -1449,24 +1440,19 @@ void MemCpyCommand::emitInstrumentationData() { if (MFirstInstance) { xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - xpti::addMetadata(CmdTraceEvent, "sycl_device", - deviceToID(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(CmdTraceEvent, MQueue); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); xpti::addMetadata(CmdTraceEvent, "copy_from", - reinterpret_cast( - getSyclObjImpl(MSrcQueue->get_device()).get())); + MSrcQueue ? reinterpret_cast( + getSyclObjImpl(MSrcQueue->get_device()).get()) : nullptr); xpti::addMetadata( CmdTraceEvent, "copy_to", - reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); + MQueue ? reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get()): nullptr); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1492,7 +1478,7 @@ bool MemCpyCommand::producesPiEvent() const { // an event waitlist and Level Zero plugin attempts to batch these commands, // so the execution of kernel B starts only on step 4. This workaround // restores the old behavior in this case until this is resolved. - return MQueue->is_host() || + return !MQueue || MQueue->getDeviceImplPtr()->getBackend() != backend::ext_oneapi_level_zero || MEvent->getHandleRef() != nullptr; @@ -1521,10 +1507,10 @@ void MemCpyCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\""; Stream << "ID = " << this << " ; "; - Stream << "MEMCPY ON " << deviceToString(MQueue->get_device()) << "\\n"; - Stream << "From: " << MSrcAllocaCmd << " is host: " << MSrcQueue->is_host() + Stream << "MEMCPY ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n"; - Stream << "To: " << MDstAllocaCmd << " is host: " << MQueue->is_host() + Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue << "\\n"; Stream << "\"];" << std::endl; @@ -1579,7 +1565,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "UPDATE REQ ON " << deviceToString(MQueue->get_device()) << "\\n"; + Stream << "UPDATE REQ ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; bool IsReqOnBuffer = MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer; Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n"; @@ -1606,11 +1592,11 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, : Command(CommandType::COPY_MEMORY, std::move(DstQueue)), MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) { - if (!MSrcQueue->is_host()) { + if (MSrcQueue) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } - MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue; + MWorkerQueue = !MQueue ? MSrcQueue : MQueue; MEvent->setWorkerQueue(MWorkerQueue); emitInstrumentationDataProxy(); @@ -1627,24 +1613,19 @@ void MemCpyCommandHost::emitInstrumentationData() { if (MFirstInstance) { xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - xpti::addMetadata(CmdTraceEvent, "sycl_device", - deviceToID(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(CmdTraceEvent, MQueue); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); xpti::addMetadata(CmdTraceEvent, "copy_from", reinterpret_cast( - getSyclObjImpl(MSrcQueue->get_device()).get())); + MSrcQueue ? getSyclObjImpl(MSrcQueue->get_device()).get()) : "nullptr"); xpti::addMetadata( CmdTraceEvent, "copy_to", - reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); + MQueue ? reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get()) : "nullptr"); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1726,18 +1707,13 @@ void EmptyCommand::emitInstrumentationData() { if (MFirstInstance) { xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - xpti::addMetadata(CmdTraceEvent, "sycl_device", - deviceToID(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(CmdTraceEvent, MQueue); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1766,7 +1742,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "MEMCPY HOST ON " << deviceToString(MQueue->get_device()) << "\\n"; + Stream << "MEMCPY HOST ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; Stream << "\"];" << std::endl; @@ -1799,18 +1775,13 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { if (MFirstInstance) { xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - xpti::addMetadata(CmdTraceEvent, "sycl_device", - deviceToID(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + addDeviceMetadata(CmdTraceEvent, MQueue); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1960,6 +1931,7 @@ void instrumentationAddExtraKernelMetadata( if (!SyclKernel->isCreatedFromSource()) EliminatedArgMask = SyclKernel->getKernelArgMask(); } else { + assert(Queue && "Queue with submitted kernel could not be on host"); std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) = detail::ProgramManager::getInstance().getOrCreateKernel( Queue->getContextImplPtr(), Queue->getDeviceImplPtr(), KernelName); @@ -2024,12 +1996,7 @@ void instrumentationFillCommonData(const std::string &KernelName, if (CGKernelInstanceNo > 1) return; - xpti::addMetadata(CmdTraceEvent, "sycl_device", - deviceToID(Queue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_type", - deviceToString(Queue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_name", - getSyclObjImpl(Queue->get_device())->getDeviceName()); + addDeviceMetadata(CmdTraceEvent, Queue); if (!KernelName.empty()) { xpti::addMetadata(CmdTraceEvent, "kernel_name", KernelName); } @@ -2080,7 +2047,7 @@ std::pair emitKernelInstrumentationData( if (CmdTraceEvent) { // Stash the queue_id mutable metadata in TLS xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - Queue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, @@ -2126,7 +2093,7 @@ void ExecCGCommand::emitInstrumentationData() { if (CmdTraceEvent) { xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CG::Kernel) { auto KernelCG = @@ -2149,7 +2116,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "EXEC CG ON " << deviceToString(MQueue->get_device()) << "\\n"; + Stream << "EXEC CG ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; switch (MCommandGroup->getType()) { case detail::CG::Kernel: { @@ -2330,6 +2297,7 @@ static pi_result SetKernelParamsAndLaunch( const KernelArgMask *EliminatedArgMask, const std::function &getMemAllocationFunc, bool IsCooperative) { + assert(Queue && "Queue with submitted kernel could not be on host"); const PluginPtr &Plugin = Queue->getPlugin(); auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc, @@ -2521,7 +2489,7 @@ pi_int32 enqueueImpKernel( const std::function &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative) { - + assert(Queue && "Queue with submitted kernel could not be on host"); // Run OpenCL kernel auto ContextImpl = Queue->getContextImplPtr(); auto DeviceImpl = Queue->getDeviceImplPtr(); @@ -2636,6 +2604,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read) { + assert(Queue && "Queue with submitted read write host pipe could not be on host"); detail::HostPipeMapEntry *hostPipeEntry = ProgramManager::getInstance().getHostPipeEntry(PipeName); @@ -3309,19 +3278,14 @@ void KernelFusionCommand::emitInstrumentationData() { // This function is called in the constructor of the command. At this point // the kernel fusion list is still empty, so we don't have a terrible lot of // information we could attach to this node here. - if (MFirstInstance && CmdTraceEvent) { - xpti::addMetadata(CmdTraceEvent, "sycl_device", - deviceToID(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_type", - deviceToString(MQueue->get_device())); - xpti::addMetadata(CmdTraceEvent, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); - } + if (MFirstInstance && CmdTraceEvent) + addDeviceMetadata(CmdTraceEVent, MQueue); + if (MFirstInstance) { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue->getQueueID()); + MQueue ? MQueue->getQueueID() : 0); xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, static_cast(MTraceEvent), MInstanceID, @@ -3335,7 +3299,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "KERNEL FUSION on " << deviceToString(MQueue->get_device()) << "\\n" + Stream << "KERNEL FUSION on " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n" << "FUSION LIST: {"; bool Initial = true; for (auto *Cmd : MFusionList) { From f0868f5ecb17b2886e999e4891725e1695e22c36 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Jun 2024 04:31:26 -0700 Subject: [PATCH 14/52] handle nullptr Queue in commands.* Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.hpp | 6 ++-- sycl/source/detail/scheduler/commands.cpp | 39 ++++++++++++++++------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c205b5916f302..15e19f143f29d 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -670,9 +670,9 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } - ThreadPool &getThreadPool() { - return GlobalHandler::instance().getHostTaskThreadPool(); - } + // ThreadPool &getThreadPool() { + // return GlobalHandler::instance().getHostTaskThreadPool(); + // } /// Gets the native handle of the SYCL queue. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f7962bb7a5d66..55b29ac7dd426 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -87,6 +87,13 @@ static addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue) } #endif +static ContextImplPtr getContext(const QueueImplPtr& Queue) +{ + if (Queue) + return Queue->getContextImplPtr(); + return nullptr; +} + #ifdef __SYCL_ENABLE_GNU_DEMANGLING struct DemangleHandle { char *p; @@ -490,7 +497,8 @@ Command::Command( MEvent->setWorkerQueue(MWorkerQueue); MEvent->setSubmittedQueue(MWorkerQueue); MEvent->setCommand(this); - MEvent->setContextImpl(MQueue ? MQueue->getContextImplPtr(): nullptr); + if (MQueue) + MEvent->setContextImpl(MQueue->getContextImplPtr()); MEvent->setStateIncomplete(); MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; @@ -707,12 +715,12 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, } const ContextImplPtr &Command::getWorkerContext() const { - assert(MWorkerQueue && "MWorkerQueue must not be nullptr"); + if (!MWorkerQueue) + return nullptr; return MWorkerQueue->getContextImplPtr(); } const QueueImplPtr &Command::getWorkerQueue() const { - assert(MWorkerQueue && "MWorkerQueue must not be nullptr"); return MWorkerQueue; } @@ -1036,7 +1044,7 @@ pi_int32 AllocaCommand::enqueueImp() { // TODO: Check if it is correct to use std::move on stack variable and // delete it RawEvents below. MMemAllocation = MemoryManager::allocate( - MQueue ? MQueue->getContextImplPtr() : nullptr, getSYCLMemObj(), MInitFromUserData, HostPtr, + getContext(MQueue), getSYCLMemObj(), MInitFromUserData, HostPtr, std::move(EventImpls), Event); return PI_SUCCESS; @@ -1119,7 +1127,7 @@ pi_int32 AllocaSubBufCommand::enqueueImp() { sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); MMemAllocation = MemoryManager::allocateMemSubBuffer( - MQueue ? MQueue->getContextImplPtr() : nullptr, MParentAlloca->getMemAllocation(), + getContext(MQueue), MParentAlloca->getMemAllocation(), MRequirement.MElemSize, MRequirement.MOffsetInBytes, MRequirement.MAccessRange, std::move(EventImpls), Event); @@ -1212,7 +1220,7 @@ pi_int32 ReleaseCommand::enqueueImp() { : MAllocaCmd->getQueue(); EventImplPtr UnmapEventImpl(new event_impl(Queue)); - UnmapEventImpl->setContextImpl(Queue ? Queue->getContextImplPtr() : nullptr); + UnmapEventImpl->setContextImpl(getContext(Queue)); UnmapEventImpl->setStateIncomplete(); sycl::detail::pi::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef(); @@ -1236,7 +1244,7 @@ pi_int32 ReleaseCommand::enqueueImp() { Command::waitForEvents(MQueue, EventImpls, Event); else { MemoryManager::release( - MQueue ? MQueue->getContextImplPtr() : nullptr, MAllocaCmd->getSYCLMemObj(), + getContext(MQueue), MAllocaCmd->getSYCLMemObj(), MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); } return PI_SUCCESS; @@ -2654,6 +2662,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, } pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { + assert(MQueue && "Device queue is required for command buffer enqueue"); // Wait on host command dependencies waitForPreparedHostEvents(); @@ -2819,8 +2828,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { auto RawEvents = getPiEvents(EventImpls); flushCrossQueueDeps(EventImpls, getWorkerQueue()); - bool DiscardPiEvent = (MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0); + bool DiscardPiEvent = MQueue && MQueue->supportsDiscardingPiEvents() && + (MCommandGroup->getRequirements().size() == 0); sycl::detail::pi::PiEvent *Event = DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent; @@ -2894,6 +2903,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::Kernel: { + assert(MQueue && "Device queue must be present for kernel command"); CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get(); NDRDescT &NDRDesc = ExecKernel->MNDRDesc; @@ -3039,8 +3049,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Req->MSYCLMemObj->MRecord->MAllocaCommands; for (AllocaCommandBase *AllocaCmd : AllocaCmds) - if (HostTask->MQueue->getContextImplPtr() == - AllocaCmd->getQueue()->getContextImplPtr()) { + if (getContext(HostTask->MQueue) == + getContext(AllocaCmd->getQueue()) { auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); ReqToMem.emplace_back(std::make_pair(Req, MemArg)); @@ -3064,7 +3074,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { // submitted to report exception origin properly. copySubmissionCodeLocation(); - MQueue->getThreadPool().submit( + getThreadPool().submit( DispatchHostTask(this, std::move(ReqToMem))); MShouldCompleteEventIfPossible = false; @@ -3072,6 +3082,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::Barrier: { + assert(MQueue && "Device queue must be present for barrier command"); const PluginPtr &Plugin = MQueue->getPlugin(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); @@ -3081,6 +3092,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::BarrierWaitlist: { + assert(MQueue && "Device queue must be present for barrier with wait list command"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; std::vector PiEvents = @@ -3132,6 +3144,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { typeSize, RawEvents, EventImpl, read); } case CG::CGTYPE::ExecCommandBuffer: { + assert(MQueue && "Device queue must be present for command buffer enqueue"); CGExecCommandBuffer *CmdBufferCG = static_cast(MCommandGroup.get()); if (MEvent != nullptr) @@ -3155,6 +3168,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::SemaphoreWait: { + assert(MQueue && "Device queue must be present for semaphore wait command"); CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get(); const detail::PluginPtr &Plugin = MQueue->getPlugin(); @@ -3165,6 +3179,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::SemaphoreSignal: { + assert(MQueue && "Device queue must be present for semaphore signal command"); CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get(); const detail::PluginPtr &Plugin = MQueue->getPlugin(); From 3d044e896cc6ff1d851c56268dfeb2dc623b55e9 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Jun 2024 06:04:41 -0700 Subject: [PATCH 15/52] non-buildable: handle nullptr queue in memory_manager.cpp Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 12 +++++++----- sycl/source/detail/memory_manager.cpp | 22 ++++++++++++++++++++-- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 28bb37200392a..be32787c0aa4d 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -149,15 +149,16 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event, } event_impl::event_impl(const QueueImplPtr &Queue) { - this->setContextImpl(Queue->getContextImplPtr()); + // Queue == nullptr means that it is a host task event + this->setContextImpl(getContext(Queue)); this->associateWithQueue(Queue); } void event_impl::associateWithQueue(const QueueImplPtr &Queue) { MQueue = Queue; - MIsProfilingEnabled = Queue->MIsProfilingEnabled; + MIsProfilingEnabled = Queue && Queue->MIsProfilingEnabled; MFallbackProfiling = MIsProfilingEnabled && Queue->isProfilingFallback(); - MState.store(HES_Complete); + MState.store(Queue ? HES_Complete : HES_NotComplete); } void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID, @@ -402,8 +403,9 @@ event_impl::get_backend_info() const { ->get_platform() .get_info(); } - return ""; // If the queue has been released, no platform will be associated - // so return empty string + // If the queue has been released, no platform will be associated + // so return empty string. + return ""; } template <> diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 3c0ad08e0763f..30827adb15e8f 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -482,6 +482,7 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, const detail::EventImplPtr &OutEventImpl) { (void)SrcAccessRange; assert(SYCLMemObj && "The SYCLMemObj is nullptr"); + assert(TgtQueue && "Destination mem object queue must be not nullptr"); const sycl::detail::pi::PiQueue Queue = TgtQueue->getHandleRef(); const PluginPtr &Plugin = TgtQueue->getPlugin(); @@ -560,6 +561,7 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, const detail::EventImplPtr &OutEventImpl) { (void)DstAccessRange; assert(SYCLMemObj && "The SYCLMemObj is nullptr"); + assert(SrcQueue && "Source mem object queue is expected to be not nullptr"); const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef(); const PluginPtr &Plugin = SrcQueue->getPlugin(); @@ -641,6 +643,7 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); + assert(SrcQueue && TgtQueue && "Source mem object and target mem object queues are expected to be not nullptr"); const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef(); const PluginPtr &Plugin = SrcQueue->getPlugin(); @@ -804,6 +807,7 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); + assert(Queue && "Fill should be called only with a valid device queue"); const PluginPtr &Plugin = Queue->getPlugin(); @@ -861,7 +865,7 @@ void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, unsigned int ElementSize, std::vector DepEvents, sycl::detail::pi::PiEvent &OutEvent) { - if (Queue->is_host()) { + if (!Queue) { throw runtime_error("Not supported configuration of map requested", PI_ERROR_INVALID_OPERATION); } @@ -907,6 +911,10 @@ void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, sycl::detail::pi::PiEvent &OutEvent) { // Host queue is not supported here. + if (!Queue) { + throw runtime_error("Not supported configuration of unmap requested", + PI_ERROR_INVALID_OPERATION); + } // All DepEvents are to the same Context. // Using the plugin of the Queue. @@ -921,6 +929,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(SrcQueue && "USM copy must be called with a valid device queue"); if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) @@ -959,6 +968,7 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(Queue && "USM fill must be called with a valid device queue"); if (!Length) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) @@ -994,6 +1004,7 @@ void MemoryManager::prefetch_usm( std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(Queue && "USM prefetch must be called with a valid device queue"); const PluginPtr &Plugin = Queue->getPlugin(); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); @@ -1015,6 +1026,7 @@ void MemoryManager::advise_usm( std::vector /*DepEvents*/, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(Queue && "USM advise must be called with a valid device queue"); const PluginPtr &Plugin = Queue->getPlugin(); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); @@ -1037,6 +1049,7 @@ void MemoryManager::copy_2d_usm( std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(Queue && "USM copy 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -1122,6 +1135,7 @@ void MemoryManager::fill_2d_usm( std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(Queue && "USM fill 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -1159,6 +1173,7 @@ void MemoryManager::memset_2d_usm( char Value, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(Queue && "USM memset 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -1198,6 +1213,7 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { + assert(Queue && "Copy to device global USM must be called with a valid device queue"); // Get or allocate USM memory for the device_global. DeviceGlobalUSMMem &DeviceGlobalUSM = DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue); @@ -1299,6 +1315,7 @@ static void memcpyToDeviceGlobalDirect( size_t NumBytes, size_t Offset, const void *Src, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent) { + assert(Queue && "Direct copy to device global must be called with a valid device queue"); sycl::detail::pi::PiProgram Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); const PluginPtr &Plugin = Queue->getPlugin(); @@ -1313,6 +1330,7 @@ static void memcpyFromDeviceGlobalDirect( size_t NumBytes, size_t Offset, void *Dest, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent) { + assert(Queue && "Direct copy from device global must be called with a valid device queue"); sycl::detail::pi::PiProgram Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); const PluginPtr &Plugin = Queue->getPlugin(); @@ -1722,7 +1740,7 @@ void MemoryManager::copy_image_bindless( sycl::detail::pi::PiImageRegion CopyExtent, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - + assert(Queue && "Copy image bindless must be called with a valid device queue"); assert((Flags == (sycl::detail::pi::PiImageCopyFlags) ext::oneapi::experimental::image_copy_flags::HtoD || Flags == (sycl::detail::pi::PiImageCopyFlags) From b3161e8bf8b978600e6910e7e8953a530ac26d23 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Jun 2024 06:55:19 -0700 Subject: [PATCH 16/52] non-buildable: build enabling Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.hpp | 6 ++--- .../source/detail/scheduler/graph_builder.cpp | 6 +++++ sycl/source/detail/scheduler/scheduler.hpp | 23 ++++++++----------- sycl/source/handler.cpp | 9 ++++---- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 15e19f143f29d..a3463225a54d1 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -111,7 +111,7 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)), + MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { if (has_property()) { @@ -285,7 +285,7 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)), + MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { queue_impl_interop(PiQueue); @@ -305,7 +305,7 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)) { + MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder) { queue_impl_interop(PiQueue); } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 6c9244f9ecb2c..d9614e9ca9d51 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -59,6 +59,12 @@ static ContextImplPtr GetContext(const QueueImplPtr& Queue) return Queue ? Queue->getContextImplPtr() : nullptr; } +bool MemObjRecord::isSameContext(const QueueImplPtr& Queue) const +{ + // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison. + return MCurContext == (Queue ? Queue->getContextImplPtr() : nullptr); +} + /// Checks if the required access mode is allowed under the current one. static bool isAccessModeAllowed(access::mode Required, access::mode Current) { switch (Current) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 6a2bcc4e5004a..61f01863c477b 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -218,26 +218,21 @@ class MemObjRecord { // modified. Used while deciding if copy back needed. bool MMemModified = false; - void updateUsage(ContextImplPtr& NewContext) - { - MCurContext = NewContext; - } - - bool isSameContext(const QueueImplPtr& Queue) const - { - // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison. - return LHS == (Queue ? Queue->getContextImplPtr() : nullptr); - } - - bool usedOnDevice( return MCurContext != nullptr; ) - -protected: // The context which has the latest state of the memory object. ContextImplPtr MCurContext; // The mode this object can be accessed with from the host (host_accessor). // Valid only if the current usage is on host. access::mode MHostAccess = access::mode::read_write; + + void updateUsage(ContextImplPtr& NewContext) + { + MCurContext = NewContext; + } + + bool isSameContext(const QueueImplPtr& Queue) const; + + bool usedOnDevice() { return MCurContext != nullptr; } }; /// DPC++ graph scheduler class. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 749ab6750df5e..c0e0438d9cd2f 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,16 +80,15 @@ void *getValueFromDynamicParameter( } // namespace detail -handler::handler(std::shared_ptr Queue, bool IsHost) - : handler(Queue, Queue, nullptr, IsHost) {} +handler::handler(std::shared_ptr Queue) + : handler(Queue, Queue, nullptr) {} handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue, - bool IsHost) + std::shared_ptr SecondaryQueue) : MImpl(std::make_shared(std::move(PrimaryQueue), std::move(SecondaryQueue))), - MQueue(std::move(Queue)), MIsHost(IsHost) {} + MQueue(std::move(Queue)) {} handler::handler( std::shared_ptr Graph) From 2258a1cbb812161a21af5dbb9a38c170a41badc8 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Jun 2024 08:07:45 -0700 Subject: [PATCH 17/52] not-buildable: build enabling 2 Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/buffer_impl.cpp | 9 +- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/detail/memory_manager.cpp | 4 +- sycl/source/detail/queue_impl.cpp | 4 +- sycl/source/detail/queue_impl.hpp | 5 + sycl/source/detail/scheduler/commands.cpp | 136 +++++++++--------- sycl/source/detail/scheduler/commands.hpp | 12 +- .../source/detail/scheduler/graph_builder.cpp | 51 +++---- sycl/source/detail/scheduler/scheduler.hpp | 12 +- 9 files changed, 108 insertions(+), 127 deletions(-) diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index d7d77205b162c..f13444107e9eb 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -68,10 +68,13 @@ buffer_impl::getNativeVector(backend BackendName) const { sycl::detail::pi::PiMem NativeMem = pi::cast(Cmd->getMemAllocation()); auto Ctx = Cmd->getWorkerContext(); - auto Platform = Ctx->getPlatformImpl(); // If Host Shared Memory is not supported then there is alloca for host that - // doesn't have platform - if (!Platform || (Platform->getBackend() != BackendName)) + // doesn't have context and platform + if (!Ctx) + continue; + PlatformImplPtr Platform = Ctx->getPlatformImpl(); + assert(Platform && "Platform must be present for device context"); + if (Platform->getBackend() != BackendName) continue; auto Plugin = Platform->getPlugin(); diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index be32787c0aa4d..e34597aa008d1 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -150,7 +150,7 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event, event_impl::event_impl(const QueueImplPtr &Queue) { // Queue == nullptr means that it is a host task event - this->setContextImpl(getContext(Queue)); + this->setContextImpl(queue_impl::getContext(Queue)); this->associateWithQueue(Queue); } diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 30827adb15e8f..e2c22f794f587 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -413,7 +413,7 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext, waitForEvents(DepEvents); OutEvent = nullptr; - if (TargetContext->is_host()) + if (!TargetContext) return static_cast(static_cast(ParentMemObj) + Offset); size_t SizeInBytes = ElemSize; @@ -643,7 +643,7 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); - assert(SrcQueue && TgtQueue && "Source mem object and target mem object queues are expected to be not nullptr"); + assert(SrcQueue && "Source mem object and target mem object queues are expected to be not nullptr"); const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef(); const PluginPtr &Plugin = SrcQueue->getPlugin(); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index c1c1d3835a54d..ce4dd462eef32 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -284,12 +284,12 @@ void queue_impl::addEvent(const event &Event) { // if there is no command on the event, we cannot track it with MEventsWeak // as that will leave it with no owner. Track in MEventsShared only if we're // unable to call piQueueFinish during wait. - if (Event->isHost() || MEmulateOOO) + if (EImpl->isHost() || MEmulateOOO) addSharedEvent(Event); } // As long as the queue supports piQueueFinish we only need to store events // for unenqueued commands and host tasks. - else if (Event->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) { + else if (EImpl->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; MEventsWeak.push_back(std::move(EventWeakPtr)); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index a3463225a54d1..61f34c35c7baf 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -750,6 +750,11 @@ class queue_impl { // tasks and host tasks is applicable for out of order queues only. Not neede // for in order ones. void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask); + + static ContextImplPtr getContext(const QueueImplPtr& Queue) + { + return Queue ? Queue->getContextImplPtr() : nullptr; + } protected: event discard_or_return(const event &Event); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 55b29ac7dd426..05873f23f45a9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -75,16 +75,32 @@ static void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, static_cast(TraceEvent), InstanceID, Addr); } -static addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue) +static size_t deviceToID(const device &Device) { + return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); +} + +static std::string deviceToString(device Device) { + if (Device.is_cpu()) + return "CPU"; + else if (Device.is_gpu()) + return "GPU"; + else if (Device.is_accelerator()) + return "ACCELERATOR"; + else + return "UNKNOWN"; +} + +static void addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue) { xpti::addMetadata(TraceEvent, "sycl_device", - Queue ? deviceToID(MQueue->get_device()) : nullptr); + Queue ? deviceToID(Queue->get_device()) : 0); xpti::addMetadata(TraceEvent, "sycl_device_type", - Queue ? deviceToString(MQueue->get_device()) : "host"); + Queue ? deviceToString(Queue->get_device()) : "host"); if (Queue) xpti::addMetadata(TraceEvent, "sycl_device_name", - getSyclObjImpl(MQueue->get_device())->getDeviceName()); + getSyclObjImpl(Queue->get_device())->getDeviceName()); } + #endif static ContextImplPtr getContext(const QueueImplPtr& Queue) @@ -113,17 +129,6 @@ static std::string demangleKernelName(std::string Name) { static std::string demangleKernelName(std::string Name) { return Name; } #endif -static std::string deviceToString(device Device) { - if (Device.is_cpu()) - return "CPU"; - else if (Device.is_gpu()) - return "GPU"; - else if (Device.is_accelerator()) - return "ACCELERATOR"; - else - return "UNKNOWN"; -} - void applyFuncOnFilteredArgs( const KernelArgMask *EliminatedArgMask, std::vector &Args, std::function Func) { @@ -158,12 +163,6 @@ void applyFuncOnFilteredArgs( } } -#ifdef XPTI_ENABLE_INSTRUMENTATION -static size_t deviceToID(const device &Device) { - return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); -} -#endif - static std::string accessModeToString(access::mode Mode) { switch (Mode) { case access::mode::read: @@ -253,9 +252,8 @@ Command::getPiEvents(const std::vector &EventImpls) const { // At this stage dependency is definitely pi task and need to check if // current one is a host task. In this case we should not skip pi event due // to different sync mechanisms for different task types on in-order queue. - const QueueImplPtr &WorkerQueue = getWorkerQueue(); - if (WorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue && - WorkerQueue->isInOrder() && !isHostTask()) + if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue && + MWorkerQueue->isInOrder() && !isHostTask()) continue; RetPiEvents.push_back(EventImpl->getHandleRef()); @@ -293,9 +291,8 @@ std::vector Command::getPiEventsBlocking( // At this stage dependency is definitely pi task and need to check if // current one is a host task. In this case we should not skip pi event due // to different sync mechanisms for different task types on in-order queue. - const QueueImplPtr &WorkerQueue = getWorkerQueue(); - if (MWorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue && - WorkerQueue->isInOrder() && !isHostTask()) + if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue && + MWorkerQueue->isInOrder() && !isHostTask()) continue; RetPiEvents.push_back(EventImpl->getHandleRef()); @@ -431,7 +428,7 @@ class DispatchHostTask { } } #endif - MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); } HostTask.MHostTask.reset(); @@ -448,7 +445,7 @@ class DispatchHostTask { Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); } catch (...) { auto CurrentException = std::current_exception(); - MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); } } }; @@ -471,7 +468,7 @@ void Command::waitForEvents(QueueImplPtr Queue, std::vector RawEvents = getPiEvents(EventImpls); - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); const PluginPtr &Plugin = Queue->getPlugin(); if (MEvent != nullptr) @@ -682,8 +679,7 @@ void Command::makeTraceEventEpilog() { Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, std::vector &ToCleanUp) { - const QueueImplPtr &WorkerQueue = getWorkerQueue(); - const ContextImplPtr &WorkerContext = WorkerQueue ? WorkerQueue->getContextImplPtr() : nullptr; + const ContextImplPtr &WorkerContext = getWorkerContext(); // 1. Non-host events can be ignored if they are not fully initialized. // 2. Some types of commands do not produce PI events after they are @@ -714,14 +710,10 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, return ConnectionCmd; } -const ContextImplPtr &Command::getWorkerContext() const { - if (!MWorkerQueue) +ContextImplPtr Command::getWorkerContext() const { + if (!MQueue) return nullptr; - return MWorkerQueue->getContextImplPtr(); -} - -const QueueImplPtr &Command::getWorkerQueue() const { - return MWorkerQueue; + return MQueue->getContextImplPtr(); } bool Command::producesPiEvent() const { return true; } @@ -1054,7 +1046,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "ALLOCA ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Link : " << this->MLinkedAllocaCmd << "\\n"; Stream << "\"];" << std::endl; @@ -1140,7 +1132,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA SUB BUF ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" + Stream << "ALLOCA SUB BUF ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n"; @@ -1254,7 +1246,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "RELEASE ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "RELEASE ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << " Alloca : " << MAllocaCmd << "\\n"; Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n"; Stream << "\"];" << std::endl; @@ -1309,7 +1301,7 @@ pi_int32 MapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); *MDstPtr = MemoryManager::map( @@ -1324,7 +1316,7 @@ void MapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "MAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "MAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << "\"];" << std::endl; @@ -1391,7 +1383,7 @@ pi_int32 UnMapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(), @@ -1405,7 +1397,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "UNMAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "UNMAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << "\"];" << std::endl; @@ -1452,11 +1444,10 @@ void MemCpyCommand::emitInstrumentationData() { xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); xpti::addMetadata(CmdTraceEvent, "copy_from", - MSrcQueue ? reinterpret_cast( - getSyclObjImpl(MSrcQueue->get_device()).get()) : nullptr); + MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); xpti::addMetadata( CmdTraceEvent, "copy_to", - MQueue ? reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get()): nullptr); + MQueue ? deviceToID(MQueue->get_device()): 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, @@ -1466,8 +1457,9 @@ void MemCpyCommand::emitInstrumentationData() { #endif } -const ContextImplPtr &MemCpyCommand::getWorkerContext() const { - return getWorkerQueue()->getContextImplPtr(); +ContextImplPtr MemCpyCommand::getWorkerContext() const { + assert(MWorkerQueue && "Worker queue for mem cpy command must be not nullptr"); + return MWorkerQueue->getContextImplPtr(); } bool MemCpyCommand::producesPiEvent() const { @@ -1499,7 +1491,7 @@ pi_int32 MemCpyCommand::enqueueImp() { sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); auto RawEvents = getPiEvents(EventImpls); - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); MemoryManager::copy( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), @@ -1515,7 +1507,7 @@ void MemCpyCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\""; Stream << "ID = " << this << " ; "; - Stream << "MEMCPY ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "MEMCPY ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n"; Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue @@ -1573,7 +1565,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "UPDATE REQ ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "UPDATE REQ ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; bool IsReqOnBuffer = MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer; Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n"; @@ -1625,11 +1617,10 @@ void MemCpyCommandHost::emitInstrumentationData() { xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); xpti::addMetadata(CmdTraceEvent, "copy_from", - reinterpret_cast( - MSrcQueue ? getSyclObjImpl(MSrcQueue->get_device()).get()) : "nullptr"); + MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); xpti::addMetadata( CmdTraceEvent, "copy_to", - MQueue ? reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get()) : "nullptr"); + MQueue ? deviceToID(MQueue->get_device()) : 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, @@ -1639,12 +1630,13 @@ void MemCpyCommandHost::emitInstrumentationData() { #endif } -const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const { - return getWorkerQueue()->getContextImplPtr(); +ContextImplPtr MemCpyCommandHost::getWorkerContext() const { + assert(MWorkerQueue && "Worker queue for mem cpy host command must be not nullptr"); + return MWorkerQueue->getContextImplPtr(); } pi_int32 MemCpyCommandHost::enqueueImp() { - const QueueImplPtr &Queue = getWorkerQueue(); + const QueueImplPtr &Queue = MWorkerQueue; waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); @@ -1660,7 +1652,7 @@ pi_int32 MemCpyCommandHost::enqueueImp() { return PI_SUCCESS; } - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); MemoryManager::copy( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, @@ -1671,8 +1663,8 @@ pi_int32 MemCpyCommandHost::enqueueImp() { return PI_SUCCESS; } -EmptyCommand::EmptyCommand(QueueImplPtr Queue) - : Command(CommandType::EMPTY_TASK, std::move(Queue)) { +EmptyCommand::EmptyCommand() + : Command(CommandType::EMPTY_TASK, nullptr) { emitInstrumentationDataProxy(); } @@ -1750,7 +1742,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "MEMCPY HOST ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "MEMCPY HOST ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << "\"];" << std::endl; @@ -2055,7 +2047,7 @@ std::pair emitKernelInstrumentationData( if (CmdTraceEvent) { // Stash the queue_id mutable metadata in TLS xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + Queue ? Queue->getQueueID() : 0); instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, @@ -2124,7 +2116,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "EXEC CG ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"; + Stream << "EXEC CG ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; switch (MCommandGroup->getType()) { case detail::CG::Kernel: { @@ -2670,7 +2662,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { // submissions of the command buffer itself will not receive dependencies on // them, e.g. initial copies from host to device std::vector EventImpls = MPreparedDepsEvents; - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); std::vector RawEvents = getPiEvents(EventImpls); if (!RawEvents.empty()) { const PluginPtr &Plugin = MQueue->getPlugin(); @@ -2826,7 +2818,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getPiEvents(EventImpls); - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); bool DiscardPiEvent = MQueue && MQueue->supportsDiscardingPiEvents() && (MCommandGroup->getRequirements().size() == 0); @@ -3050,7 +3042,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { for (AllocaCommandBase *AllocaCmd : AllocaCmds) if (getContext(HostTask->MQueue) == - getContext(AllocaCmd->getQueue()) { + getContext(AllocaCmd->getQueue())) { auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); ReqToMem.emplace_back(std::make_pair(Req, MemArg)); @@ -3294,7 +3286,7 @@ void KernelFusionCommand::emitInstrumentationData() { // the kernel fusion list is still empty, so we don't have a terrible lot of // information we could attach to this node here. if (MFirstInstance && CmdTraceEvent) - addDeviceMetadata(CmdTraceEVent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue); if (MFirstInstance) { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS @@ -3314,7 +3306,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "KERNEL FUSION on " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n" + Stream << "KERNEL FUSION on " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n" << "FUSION LIST: {"; bool Initial = true; for (auto *Cmd : MFusionList) { @@ -3354,7 +3346,7 @@ pi_int32 UpdateCommandBufferCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getPiEvents(EventImpls); - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + flushCrossQueueDeps(EventImpls, MWorkerQueue); for (auto &Node : MNodes) { auto CG = static_cast(Node->MCommandGroup.get()); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 89cabd134a7e1..ea2ba3ea72118 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -223,11 +223,7 @@ class Command { /// Get the context of the queue this command will be submitted to. Could /// differ from the context of MQueue for memory copy commands. - virtual const ContextImplPtr &getWorkerContext() const; - - /// Get the queue this command will be submitted to. Could differ from MQueue - /// for memory copy commands. - const QueueImplPtr &getWorkerQueue() const; + virtual ContextImplPtr getWorkerContext() const; /// Returns true iff the command produces a PI event on non-host devices. virtual bool producesPiEvent() const; @@ -414,7 +410,7 @@ class Command { /// implement lock in the graph, or to merge several nodes into one. class EmptyCommand : public Command { public: - EmptyCommand(QueueImplPtr Queue); + EmptyCommand(); void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MRequirements[0]; } @@ -586,7 +582,7 @@ class MemCpyCommand : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; - const ContextImplPtr &getWorkerContext() const final; + ContextImplPtr getWorkerContext() const final; bool producesPiEvent() const final; private: @@ -610,7 +606,7 @@ class MemCpyCommandHost : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; - const ContextImplPtr &getWorkerContext() const final; + ContextImplPtr getWorkerContext() const final; private: pi_int32 enqueueImp() final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d9614e9ca9d51..8778ad6927c3e 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -54,15 +54,10 @@ static bool IsSuitableSubReq(const Requirement *Req) { return Req->MIsSubBuffer; } -static ContextImplPtr GetContext(const QueueImplPtr& Queue) -{ - return Queue ? Queue->getContextImplPtr() : nullptr; -} - -bool MemObjRecord::isSameContext(const QueueImplPtr& Queue) const +static bool isOnSameContext(const ContextImplPtr Context, const QueueImplPtr& Queue) { // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison. - return MCurContext == (Queue ? Queue->getContextImplPtr() : nullptr); + return Context == queue_impl::getContext(Queue); } /// Checks if the required access mode is allowed under the current one. @@ -250,7 +245,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord( getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr, ToEnqueue); } else - MemObject->MRecord.reset(new MemObjRecord{GetContext(Queue), + MemObject->MRecord.reset(new MemObjRecord{queue_impl::getContext(Queue), LeafLimit, AllocateDependency}); MMemObjs.push_back(MemObject); @@ -289,7 +284,7 @@ void Scheduler::GraphBuilder::addNodeToLeaves( UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { - auto Context = GetContext(Queue); + auto Context = queue_impl::getContext(Queue); AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Context); assert(AllocaCmd && "There must be alloca for requirement!"); @@ -353,7 +348,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( if (!AllocaCmdDst) throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); - auto Context = GetContext(Queue); + auto Context = queue_impl::getContext(Queue); std::set Deps = findDepsForReq(Record, Req, Context); Deps.insert(AllocaCmdDst); @@ -371,7 +366,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( // current context, need to find a parent alloca command for it (it must be // there) auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) { - bool Res = Record->isSameContext(AllocaCmd->getQueue()) && + bool Res = isOnSameContext(Record->MCurContext, AllocaCmd->getQueue()) && // Looking for a parent buffer alloca command AllocaCmd->getType() == Command::CommandType::ALLOCA; return Res; @@ -406,7 +401,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( if ((Req->MAccessMode == access::mode::discard_write) || (Req->MAccessMode == access::mode::discard_read_write)) { - Record->updateUsage(Context); + Record->MCurContext = Context; return nullptr; } else { // Full copy of buffer is needed to avoid loss of data that may be caused @@ -428,7 +423,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue); for (Command *Cmd : ToCleanUp) cleanupCommand(Cmd); - Record->updateUsage(Context); + Record->MCurContext = Context; return NewCmd; } @@ -541,7 +536,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, AllocaCommandBase *HostAllocaCmd = getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue); - if (Record->isSameContext(HostAllocaCmd->getQueue())) { + if (isOnSameContext(Record->MCurContext, HostAllocaCmd->getQueue())) { if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) { remapMemoryObject(Record, Req, Req->MIsSubBuffer ? (static_cast( @@ -625,7 +620,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, if (Dep.MDepCommand) { auto DepQueue = Dep.MDepCommand->getQueue(); - CanBypassDep &= IsOnSameContext(Context, DepQueue); + CanBypassDep &= isOnSameContext(Context, DepQueue); } if (!CanBypassDep) { @@ -665,7 +660,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( bool AllowConst) { auto IsSuitableAlloca = [&Context, Req, AllowConst](AllocaCommandBase *AllocaCmd) { - bool Res = IsOnSameContext(Context, AllocaCmd->getQueue()); + bool Res = isOnSameContext(Context, AllocaCmd->getQueue()); if (IsSuitableSubReq(Req)) { const Requirement *TmpReq = AllocaCmd->getRequirement(); Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF; @@ -705,7 +700,7 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) { AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { - auto Context = GetContext(Queue); + auto Context = queue_impl::getContext(Queue); AllocaCommandBase *AllocaCmd = findAllocaForReq( Record, Req, Context, /*AllowConst=*/false); @@ -761,7 +756,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( Record->MAllocaCommands.push_back(HostAllocaCmd); Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue); ++(HostAllocaCmd->MLeafCounter); - Record->updateUsage(nullptr); + Record->MCurContext = nullptr; } } } else { @@ -773,7 +768,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // new one. There could be situations when we could setup link with // "not" current allocation, but it will require memory copy. // Can setup link between cl and host allocations only - if ((Context != nullptr) + (Record->usedOnDevice()) == 1) { + if ((Context != nullptr) != (Record->MCurContext != nullptr)) { // Linked commands assume that the host allocation is reused by the // plugin runtime and that can lead to unnecessary copy overhead on // devices that do not support host unified memory. Do not link the @@ -829,7 +824,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( AllocaCmd->MIsActive = false; } else { LinkedAllocaCmd->MIsActive = false; - Record->updateUsage(Context); + Record->MCurContext =Context; std::set Deps = findDepsForReq(Record, Req, Context); @@ -1068,7 +1063,7 @@ void Scheduler::GraphBuilder::createGraphForCommand( AllocaCmd = getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue); - isSameCtx = Record->isSameContext(QueueForAlloca); + isSameCtx = isOnSameContext(Record->MCurContext, QueueForAlloca); } // If there is alloca command we need to check if the latest memory is in @@ -1076,7 +1071,7 @@ void Scheduler::GraphBuilder::createGraphForCommand( if (isSameCtx) { // If the memory is already in the required host context, check if the // required access mode is valid, remap if not. - if (!Record->usedOnDevice() && + if (!Record->MCurContext && !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) { remapMemoryObject(Record, Req, Req->MIsSubBuffer @@ -1094,11 +1089,11 @@ void Scheduler::GraphBuilder::createGraphForCommand( if (isInteropTask) { const detail::CGHostTask &HT = static_cast(CG); - if (!(Record->isSameContext(HT.MQueue)) { + if (!isOnSameContext(Record->MCurContext, HT.MQueue)) { NeedMemMoveToHost = true; MemMoveTargetQueue = HT.MQueue; } - } else if (Queue && Record->usedOnDevice()) + } else if (Queue && Record->MCurContext) NeedMemMoveToHost = true; if (NeedMemMoveToHost) @@ -1107,7 +1102,7 @@ void Scheduler::GraphBuilder::createGraphForCommand( } std::set Deps = - findDepsForReq(Record, Req, GetContext(Queue)); + findDepsForReq(Record, Req, queue_impl::getContext(Queue)); for (Command *Dep : Deps) { if (Dep != NewCmd) { @@ -1709,7 +1704,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); - isSameCtx = Record->isSameContext(Queue); + isSameCtx = isOnSameContext(Record->MCurContext, Queue); } if (!isSameCtx) { @@ -1718,7 +1713,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( bool NeedMemMoveToHost = false; auto MemMoveTargetQueue = Queue; - if (Queue && Record->usedOnDevice()) + if (Queue && Record->MCurContext) NeedMemMoveToHost = true; if (NeedMemMoveToHost) @@ -1728,7 +1723,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue); } std::set Deps = - findDepsForReq(Record, Req, GetContext(Queue)); + findDepsForReq(Record, Req, queue_impl::getContext(Queue)); for (Command *Dep : Deps) { if (Dep != NewCmd.get()) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 61f01863c477b..d3462872c9ddf 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -199,12 +199,11 @@ using FusionMap = std::unordered_map; /// There must be a single MemObjRecord for each SYCL memory object. /// /// \ingroup sycl_graph -class MemObjRecord { +struct MemObjRecord { MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency) : MReadLeaves{this, LeafLimit, AllocateDependency}, MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {} -public: // Contains all allocation commands for the memory object. std::vector MAllocaCommands; @@ -224,15 +223,6 @@ class MemObjRecord { // The mode this object can be accessed with from the host (host_accessor). // Valid only if the current usage is on host. access::mode MHostAccess = access::mode::read_write; - - void updateUsage(ContextImplPtr& NewContext) - { - MCurContext = NewContext; - } - - bool isSameContext(const QueueImplPtr& Queue) const; - - bool usedOnDevice() { return MCurContext != nullptr; } }; /// DPC++ graph scheduler class. From df27615254aff2efd52952930673920c521fd3fb Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Jun 2024 08:49:20 -0700 Subject: [PATCH 18/52] almost buildable: build enabling 3 Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.hpp | 6 +++--- sycl/source/detail/scheduler/commands.cpp | 6 +++--- sycl/source/detail/scheduler/commands.hpp | 3 +-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 61f34c35c7baf..3bd7b6ea7ec0a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -670,9 +670,9 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } - // ThreadPool &getThreadPool() { - // return GlobalHandler::instance().getHostTaskThreadPool(); - // } + static ThreadPool &getThreadPool() { + return GlobalHandler::instance().getHostTaskThreadPool(); + } /// Gets the native handle of the SYCL queue. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 05873f23f45a9..d0a790ed97059 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -65,7 +65,7 @@ static bool CurrentCodeLocationValid() { (FunctionName && FunctionName[0] != '\0'); } -static void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, +void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, xpti_td *TraceEvent, uint16_t Type, const void *Addr) { if (!(xptiCheckTraceEnabled(StreamID, Type) && TraceEvent)) @@ -2424,7 +2424,7 @@ pi_int32 enqueueImpCommandBufferKernel( &getMemAllocationFunc](sycl::detail::ArgDesc &Arg, size_t NextTrueIndex) { sycl::detail::SetArgBasedOnType(Plugin, PiKernel, DeviceImageImpl, - getMemAllocationFunc, Ctx, false, Arg, + getMemAllocationFunc, Ctx, Arg, NextTrueIndex); }; // Copy args for modification @@ -3066,7 +3066,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { // submitted to report exception origin properly. copySubmissionCodeLocation(); - getThreadPool().submit( + queue_impl::getThreadPool().submit( DispatchHostTask(this, std::move(ReqToMem))); MShouldCompleteEventIfPossible = false; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index ea2ba3ea72118..628ccdf2593da 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -33,7 +33,6 @@ class node_impl; namespace detail { #ifdef XPTI_ENABLE_INSTRUMENTATION -bool CurrentCodeLocationValid(); void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, xpti_td *TraceEvent, uint16_t Type, const void *Addr); @@ -793,7 +792,7 @@ void SetArgBasedOnType( const detail::plugin &Plugin, sycl::detail::pi::PiKernel Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, - const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg, + const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex); void applyFuncOnFilteredArgs( From eebc51933df59666baad0bb50100cb02dce4e485 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Jun 2024 09:34:20 -0700 Subject: [PATCH 19/52] almost almost buildable: enable build 4 Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 3 ++- sycl/source/handler.cpp | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 3bd7b6ea7ec0a..1315d32ecaa4f 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -844,7 +844,7 @@ class queue_impl { "function objects should use the sycl::handler API instead."); } - handler Handler(Self, PrimaryQueue, SecondaryQueue); + handler Handler(Self, PrimaryQueue, SecondaryQueue, false); Handler.saveCodeLoc(Loc); PreventSubmit = true; try { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d0a790ed97059..1683b874fba5d 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -402,8 +402,9 @@ class DispatchHostTask { try { // we're ready to call the user-defined lambda now if (HostTask.MHostTask->isInteropTask()) { + assert(HostTask.MQueue && "Submitted queue for host task must be device queue"); interop_handle IH{MReqToMem, HostTask.MQueue, - // HostTask.MQueue->getDeviceImplPtr(), + HostTask.MQueue->getDeviceImplPtr(), HostTask.MQueue->getContextImplPtr()}; HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c0e0438d9cd2f..015d690d67e7d 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,12 +80,12 @@ void *getValueFromDynamicParameter( } // namespace detail -handler::handler(std::shared_ptr Queue) - : handler(Queue, Queue, nullptr) {} +handler::handler(std::shared_ptr Queue, bool) + : handler(Queue, Queue, nullptr, false) {} handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue) + std::shared_ptr SecondaryQueue, bool) : MImpl(std::make_shared(std::move(PrimaryQueue), std::move(SecondaryQueue))), MQueue(std::move(Queue)) {} From c6fe5c8098daadcde4dd19241be937e146bf9a17 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 6 Jun 2024 10:12:13 -0700 Subject: [PATCH 20/52] buildable Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/device_impl.cpp | 7 ------- sycl/source/detail/device_impl.hpp | 5 ----- sycl/source/detail/stream_impl.cpp | 14 +++++--------- sycl/source/detail/stream_impl.hpp | 4 ---- 4 files changed, 5 insertions(+), 25 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index c677b9165d71f..ae3b04486d1ea 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -716,13 +716,6 @@ bool device_impl::has(aspect Aspect) const { PI_ERROR_INVALID_DEVICE); } -std::shared_ptr device_impl::getHostDeviceImpl() { - static std::shared_ptr HostImpl = - std::make_shared(); - - return HostImpl; -} - bool device_impl::isAssertFailSupported() const { return MIsAssertFailSupported; } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index efec017d372f5..9249bbba59fe8 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -217,11 +217,6 @@ class device_impl { /// \return true if the SYCL device has the given feature. bool has(aspect Aspect) const; - /// Gets the single instance of the Host Device - /// - /// \return the host device_impl singleton - static std::shared_ptr getHostDeviceImpl(); - bool isAssertFailSupported() const; bool isRootDevice() const { return MRootDevice == nullptr; } diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 4550b5cc26629..7268293433e82 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -94,12 +94,12 @@ void stream_impl::initStreamHost(QueueImplPtr Queue) { } void stream_impl::flush(const EventImplPtr &LeadEvent) { + assert(LeadEvent && "LeadEvent is expected to be not nullptr"); // We don't want stream flushing to be blocking operation that is why submit a // host task to print stream buffer. It will fire up as soon as the kernel // finishes execution. - auto Q = detail::createSyclObjFromImpl( - sycl::detail::Scheduler::getInstance().getDefaultHostQueue()); - event Event = Q.submit([&](handler &cgh) { + auto Q = LeadEvent->getSubmittedQueue(); + event Event = detail::createSyclObjFromImpl(Q).submit([&](handler &cgh) { auto BufHostAcc = Buf_.get_access( cgh, range<1>(BufferSize_), id<1>(OffsetSize)); @@ -131,14 +131,10 @@ void stream_impl::flush(const EventImplPtr &LeadEvent) { fflush(stdout); }); }); - if (LeadEvent) { - LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event)); - LeadEvent->getSubmittedQueue()->registerStreamServiceEvent( - detail::getSyclObjImpl(Event)); - } + LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event)); + Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event)); } -void stream_impl::flush() { flush(nullptr); } } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index 823653016c162..cd3d503b4b894 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -49,10 +49,6 @@ class __SYCL_EXPORT stream_impl { // LeadEvent as well as in queue LeadEvent associated with. void flush(const EventImplPtr &LeadEvent); - // Enqueue task to copy stream buffer to the host and print the contents - // Remove during next ABI breaking window - void flush(); - size_t size() const noexcept; size_t get_work_item_buffer_size() const; From 24669e2a82d3765cc08800d4e8691e0c2bc5b28b Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 6 Jun 2024 10:52:53 -0700 Subject: [PATCH 21/52] RT-buildable: enabling UT build Signed-off-by: Tikhomirova, Kseniya --- sycl/unittests/scheduler/AllocaLinking.cpp | 13 +++---------- .../scheduler/CommandsWaitForEvents.cpp | 10 ++-------- .../scheduler/EnqueueWithDependsOnDeps.cpp | 3 +-- sycl/unittests/scheduler/GraphCleanup.cpp | 11 +++-------- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 11 +++-------- sycl/unittests/scheduler/LeafLimit.cpp | 2 -- .../scheduler/LeafLimitDiffContexts.cpp | 2 +- sycl/unittests/scheduler/LeavesCollection.cpp | 9 ++++----- .../scheduler/LinkedAllocaDependencies.cpp | 14 ++++---------- .../scheduler/NoHostUnifiedMemory.cpp | 19 +++++++------------ sycl/unittests/scheduler/QueueFlushing.cpp | 10 +++------- .../scheduler/SchedulerTestUtils.hpp | 3 +-- .../scheduler/StreamInitDependencyOnHost.cpp | 9 +++------ 13 files changed, 35 insertions(+), 81 deletions(-) diff --git a/sycl/unittests/scheduler/AllocaLinking.cpp b/sycl/unittests/scheduler/AllocaLinking.cpp index a77995a203da3..e15cf24761ee1 100644 --- a/sycl/unittests/scheduler/AllocaLinking.cpp +++ b/sycl/unittests/scheduler/AllocaLinking.cpp @@ -47,13 +47,6 @@ static pi_result redefinedDeviceGetInfoAfter(pi_device Device, TEST_F(SchedulerTest, AllocaLinking) { HostUnifiedMemory = false; - // This host device constructor should be placed before Mock.redefine - // because it overrides the real implementation of get_device_info - // which is needed when creating a host device. - device HostDevice = detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl()); - std::shared_ptr DefaultHostQueue{ - new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; sycl::unittest::PiMock Mock; sycl::queue Q{Mock.getPlatform().get_devices()[0]}; @@ -73,7 +66,7 @@ TEST_F(SchedulerTest, AllocaLinking) { detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds); EXPECT_FALSE(HostAllocaCmd->MLinkedAllocaCmd); EXPECT_FALSE(NonHostAllocaCmd->MLinkedAllocaCmd); @@ -90,7 +83,7 @@ TEST_F(SchedulerTest, AllocaLinking) { detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds); EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd); EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd); @@ -107,7 +100,7 @@ TEST_F(SchedulerTest, AllocaLinking) { detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds); EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd); EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd); diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp index d893c33f5cc26..499a45d0fe70f 100644 --- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp +++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp @@ -219,13 +219,7 @@ TEST_F(SchedulerTest, CommandsWaitForEvents) { std::shared_ptr E2( new detail::event_impl(TestContext->EventCtx2, Q2.get_context())); - device HostDevice = detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl()); - std::shared_ptr DefaultHostQueue(new detail::queue_impl( - detail::getSyclObjImpl(HostDevice), /*AsyncHandler=*/{}, - /*PropList=*/{})); - - MockCommand Cmd(DefaultHostQueue); + MockCommand Cmd(nullptr); std::vector> Events; Events.push_back(E1); @@ -233,7 +227,7 @@ TEST_F(SchedulerTest, CommandsWaitForEvents) { pi_event EventResult = nullptr; - Cmd.waitForEventsCall(DefaultHostQueue, Events, EventResult); + Cmd.waitForEventsCall(nullptr, Events, EventResult); ASSERT_TRUE(TestContext->EventCtx1WasWaited && TestContext->EventCtx2WasWaited) diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index fc816d1a4f3af..bd7531c964716 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -83,7 +83,7 @@ class DependsOnTests : public ::testing::Test { detail::Command *NewCmd = MS.addCG( std::move(CmdGroup), - Type == TestCGType::HOST_TASK ? MS.getDefaultHostQueue() : QueueDevImpl, + Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl, ToEnqueue); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; @@ -167,7 +167,6 @@ class DependsOnTests : public ::testing::Test { TEST_F(DependsOnTests, EnqueueNoMemObjTwoHostTasks) { // Checks enqueue of two dependent host tasks - detail::QueueImplPtr QueueHostImpl = MS.getDefaultHostQueue(); std::vector Events; detail::Command *Cmd1 = diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 3389769569e5e..e0ec582db065c 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -172,7 +172,7 @@ static void checkCleanupOnEnqueue(MockScheduler &MS, } static void checkCleanupOnLeafUpdate( - MockScheduler &MS, detail::QueueImplPtr &QueueImpl, buffer &Buf, + MockScheduler &MS, detail::QueueImplPtr QueueImpl, buffer &Buf, detail::Requirement &MockReq, std::function SchedulerCall) { bool CommandDeleted = false; @@ -247,15 +247,10 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) { checkCleanupOnLeafUpdate( MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) { detail::Command *Leaf = *Record->MWriteLeaves.begin(); - MS.addEmptyCmd(Leaf, {&MockReq}, QueueImpl, - detail::Command::BlockReason::HostTask, ToEnqueue); + MS.addEmptyCmd(Leaf, {&MockReq}, detail::Command::BlockReason::HostTask, ToEnqueue); }); - device HostDevice = detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl()); - detail::QueueImplPtr DefaultHostQueue{ - new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; checkCleanupOnLeafUpdate( - MS, DefaultHostQueue, Buf, MockReq, [&](detail::MemObjRecord *Record) { + MS, nullptr, Buf, MockReq, [&](detail::MemObjRecord *Record) { MS.getOrCreateAllocaForReq(Record, &MockReq, QueueImpl, ToEnqueue); }); // Check cleanup on exceeding leaf limit. diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 337ef2ef3d403..c19b494f9c484 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -77,11 +77,6 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { sycl::detail::QueueImplPtr InOrderQueueImpl = detail::getSyclObjImpl(InOrderQueue); - device HostDevice = detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl()); - std::shared_ptr DefaultHostQueue{ - new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; - MockScheduler MS; int val; @@ -92,18 +87,18 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req, AuxCmds); MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl, AuxCmds); - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds); // Check that sequential memory movements submitted to the same in-order // queue do not depend on each other. detail::Command *Cmd = - MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds); + MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds); detail::EnqueueResultT Res; auto ReadLock = MS.acquireGraphReadLock(); MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl, AuxCmds); MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); - Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds); + Cmd = MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds); MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); } diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index 36d8f459a324a..f3417b297bc31 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -36,8 +36,6 @@ TEST_F(SchedulerTest, LeafLimit) { unittest::ScopedEnvVar DisabledCleanup{ DisableCleanupName, "1", detail::SYCLConfig::reset}; - sycl::queue HQueue(detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl())); MockScheduler MS; std::vector> LeavesToAdd; std::unique_ptr MockDepCmd; diff --git a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp index 38d9ac784c09f..1af882a423af8 100644 --- a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp +++ b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp @@ -61,7 +61,7 @@ TEST_F(SchedulerTest, LeafLimitDiffContexts) { AllocaCmd = MS.getOrCreateAllocaForReq( Rec, &MockReq, detail::getSyclObjImpl(Queue), ToEnqueue); std::ignore = MS.getOrCreateAllocaForReq( - Rec, &MockReq, MS.getDefaultHostQueue(), ToEnqueue); + Rec, &MockReq, nullptr, ToEnqueue); DepCmd = std::make_unique(detail::getSyclObjImpl(Queue), MockReq); } diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp index ea883041add66..39146ffaa95e8 100644 --- a/sycl/unittests/scheduler/LeavesCollection.cpp +++ b/sycl/unittests/scheduler/LeavesCollection.cpp @@ -37,9 +37,8 @@ createGenericCommand(const std::shared_ptr &Q) { } std::shared_ptr -createEmptyCommand(const std::shared_ptr &Q, - const Requirement &Req) { - EmptyCommand *Cmd = new EmptyCommand(Q); +createEmptyCommand(const Requirement &Req) { + EmptyCommand *Cmd = new EmptyCommand(); Cmd->addRequirement(/* DepCmd = */ nullptr, /* AllocaCmd = */ nullptr, &Req); Cmd->MBlockReason = Command::BlockReason::HostAccessor; return std::shared_ptr{Cmd}; @@ -97,7 +96,7 @@ TEST_F(LeavesCollectionTest, PushBack) { for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) { auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q)) - : createEmptyCommand(getSyclObjImpl(Q), MockReq); + : createEmptyCommand(MockReq); Cmds.push_back(Cmd); LE.push_back(Cmds.back().get(), ToEnqueue); @@ -137,7 +136,7 @@ TEST_F(LeavesCollectionTest, Remove) { for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) { auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q)) - : createEmptyCommand(getSyclObjImpl(Q), MockReq); + : createEmptyCommand(MockReq); Cmds.push_back(Cmd); if (LE.push_back(Cmds.back().get(), ToEnqueue)) diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp index 5ab9cfbb43f5a..6ae6b9bfc2344 100644 --- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp +++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp @@ -64,28 +64,22 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { sycl::queue Queue1{Dev}; sycl::detail::QueueImplPtr Q1 = sycl::detail::getSyclObjImpl(Queue1); - device HostDevice = detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl()); - std::shared_ptr DefaultHostQueue(new detail::queue_impl( - detail::getSyclObjImpl(HostDevice), /*AsyncHandler=*/{}, - /*PropList=*/{})); - auto AllocaDep = [](sycl::detail::Command *, sycl::detail::Command *, sycl::detail::MemObjRecord *, std::vector &) {}; std::shared_ptr Record{ - new sycl::detail::MemObjRecord(DefaultHostQueue->getContextImplPtr(), 10, + new sycl::detail::MemObjRecord(nullptr, 10, AllocaDep)}; MemObjMock MemObj(Record); Req.MSYCLMemObj = &MemObj; - sycl::detail::AllocaCommand AllocaCmd1(DefaultHostQueue, Req, false); + sycl::detail::AllocaCommand AllocaCmd1(nullptr, Req, false); Record->MAllocaCommands.push_back(&AllocaCmd1); - MockCommand DepCmd(DefaultHostQueue, Req); - MockCommand DepDepCmd(DefaultHostQueue, Req); + MockCommand DepCmd(nullptr, Req); + MockCommand DepDepCmd(nullptr, Req); DepCmd.MDeps.push_back({&DepDepCmd, DepDepCmd.getRequirement(), &AllocaCmd1}); DepDepCmd.MUsers.insert(&DepCmd); std::vector ToEnqueue; diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp index 635a8e9c3389c..20cf879d53daf 100644 --- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp +++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp @@ -91,11 +91,6 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { redefinedMemCreateWithNativeHandle); sycl::detail::QueueImplPtr QImpl = detail::getSyclObjImpl(Q); - device HostDevice = detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl()); - std::shared_ptr DefaultHostQueue{ - new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; - MockScheduler MS; // Check non-host alloca with non-discard access mode { @@ -113,10 +108,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // order to perform a memory move. EXPECT_EQ(Record->MAllocaCommands.size(), 2U); detail::AllocaCommandBase *HostAllocaCmd = Record->MAllocaCommands[0]; - EXPECT_TRUE(HostAllocaCmd->getQueue()->is_host()); + EXPECT_TRUE(HostAllocaCmd->getQueue() == nullptr); EXPECT_TRUE(!HostAllocaCmd->MLinkedAllocaCmd); EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd); - EXPECT_TRUE(Record->MCurContext->is_host()); + EXPECT_TRUE(Record->MCurContext == nullptr); detail::Command *MemoryMove = MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds); @@ -162,9 +157,9 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // another and the transfer is done via a write operation. std::vector AuxCmds; detail::MemObjRecord *Record = - MS.getOrInsertMemObjRecord(DefaultHostQueue, &Req, AuxCmds); + MS.getOrInsertMemObjRecord(nullptr, &Req, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds); EXPECT_EQ(Record->MAllocaCommands.size(), 1U); detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); @@ -190,14 +185,14 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds); // Memory movement operations should be omitted for discard access modes. detail::Command *MemoryMove = - MS.insertMemoryMove(Record, &DiscardReq, DefaultHostQueue, AuxCmds); + MS.insertMemoryMove(Record, &DiscardReq, nullptr, AuxCmds); EXPECT_TRUE(MemoryMove == nullptr); // The current context for the record should still be modified. - EXPECT_EQ(Record->MCurContext, DefaultHostQueue->getContextImplPtr()); + EXPECT_EQ(Record->MCurContext, nullptr); } // Check that interoperability memory objects are initialized. { diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index c97428b9d55c6..330ff7e0f02d2 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -122,21 +122,17 @@ TEST_F(SchedulerTest, QueueFlushing) { QueueImplA}; testCommandEnqueue(&UnmapCmd, QueueImplB, MockReq); - device HostDevice = detail::createSyclObjFromImpl( - detail::device_impl::getHostDeviceImpl()); - detail::QueueImplPtr DefaultHostQueue{ - new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; detail::AllocaCommand HostAllocaCmd = - detail::AllocaCommand(DefaultHostQueue, MockReq); + detail::AllocaCommand(nullptr, MockReq); detail::MemCpyCommand MemCpyCmd{MockReq, &AllocaCmd, MockReq, &HostAllocaCmd, - QueueImplA, DefaultHostQueue}; + QueueImplA, nullptr}; testCommandEnqueue(&MemCpyCmd, QueueImplB, MockReq); detail::MemCpyCommandHost MemCpyCmdHost{MockReq, &AllocaCmd, MockReq, &MockHostPtr, - QueueImplA, DefaultHostQueue}; + QueueImplA, nullptr}; testCommandEnqueue(&MemCpyCmdHost, QueueImplB, MockReq); std::unique_ptr CG{ diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 88ced1f25904a..20f82f9165c01 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -189,10 +189,9 @@ class MockScheduler : public sycl::detail::Scheduler { sycl::detail::EmptyCommand * addEmptyCmd(sycl::detail::Command *Cmd, const std::vector &Reqs, - const sycl::detail::QueueImplPtr &Queue, sycl::detail::Command::BlockReason Reason, std::vector &ToEnqueue) { - return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Queue, Reason, ToEnqueue); + return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Reason, ToEnqueue); } sycl::detail::Command * diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 18c0b3e1a8070..838b60809472c 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -80,12 +80,9 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { unittest::ScopedEnvVar DisabledCleanup{ DisableCleanupName, "1", detail::SYCLConfig::reset}; - std::shared_ptr HQueueImpl(new detail::queue_impl( - detail::device_impl::getHostDeviceImpl(), /*AsyncHandler=*/{}, - /*PropList=*/{})); // Emulating processing of command group function - MockHandlerStreamInit MockCGH(HQueueImpl, true); + MockHandlerStreamInit MockCGH(nullptr, true); MockCGH.setType(detail::CG::Kernel); auto EmptyKernel = [](sycl::nd_item<1>) {}; @@ -114,11 +111,11 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { static_cast(MainCG.get())->getStreams(); ASSERT_EQ(Streams.size(), 1u) << "Invalid number of stream objects"; - Streams[0]->initStreamHost(HQueueImpl); + Streams[0]->initStreamHost(nullptr); MockScheduler MS; std::vector AuxCmds; - detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds); + detail::Command *NewCmd = MS.addCG(std::move(MainCG), nullptr, AuxCmds); ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler"; ASSERT_GT(NewCmd->MDeps.size(), 0u) << "No deps appeared in the new exec kernel command"; From fcc7748699821b8a53db059de50b94dff5f96232 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Jun 2024 03:42:25 -0700 Subject: [PATCH 22/52] RT-buildable: restore incorrectly deleted code Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/memory_manager.cpp | 28 ++++++++++++++-- sycl/source/detail/memory_manager.hpp | 3 ++ sycl/source/detail/scheduler/commands.cpp | 41 +++++++++++++++++++---- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index e2c22f794f587..461cf8b85915c 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -266,6 +266,11 @@ void MemoryManager::releaseMemObj(ContextImplPtr TargetContext, return; } + if (!TargetContext) { + MemObj->releaseHostMem(MemAllocation); + return; + } + const PluginPtr &Plugin = TargetContext->getPlugin(); memReleaseHelper(Plugin, pi::cast(MemAllocation)); } @@ -283,6 +288,19 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, OutEvent); } +void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, + bool HostPtrReadOnly, size_t Size, + const sycl::property_list &) { + std::ignore = HostPtrReadOnly; + std::ignore = Size; + + // Can return user pointer directly if it is not a nullptr. + if (UserPtr) + return UserPtr; + + return MemObj->allocateHostMem(); +} + void *MemoryManager::allocateInteropMemObject( ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, @@ -379,9 +397,10 @@ void *MemoryManager::allocateMemBuffer( const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait) { void *MemPtr; - if (UserPtr && InteropContext) - MemPtr = - allocateInteropMemObject(TargetContext, UserPtr, InteropEvent, + if (!TargetContext) + MemPtr = allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList); + else if (UserPtr && InteropContext) + MemPtr = allocateInteropMemObject(TargetContext, UserPtr, InteropEvent, InteropContext, PropsList, OutEventToWait); else MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size, @@ -398,6 +417,9 @@ void *MemoryManager::allocateMemImage( const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, sycl::detail::pi::PiEvent &OutEventToWait) { + if (!TargetContext) + return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, + PropsList); if (UserPtr && InteropContext) return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent, InteropContext, PropsList, OutEventToWait); diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 7be17898bc0d9..deefda9ccd8ff 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -85,6 +85,9 @@ class __SYCL_EXPORT MemoryManager { static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr); + static void *allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, + bool HostPtrReadOnly, size_t Size, + const sycl::property_list &PropsList); static void * allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 1683b874fba5d..b1713473f2de3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -459,8 +459,38 @@ void Command::waitForPreparedHostEvents() const { void Command::waitForEvents(QueueImplPtr Queue, std::vector &EventImpls, sycl::detail::pi::PiEvent &Event) { - assert(Queue && "Device queue is expected here"); if (!EventImpls.empty()) { + if (!Queue) { + // Host queue can wait for events from different contexts, i.e. it may + // contain events with different contexts in its MPreparedDepsEvents. + // OpenCL 2.1 spec says that clWaitForEvents will return + // CL_INVALID_CONTEXT if events specified in the list do not belong to + // the same context. Thus we split all the events into per-context map. + // An example. We have two queues for the same CPU device: Q1, Q2. Thus + // we will have two different contexts for the same CPU device: C1, C2. + // Also we have default host queue. This queue is accessible via + // Scheduler. Now, let's assume we have three different events: E1(C1), + // E2(C1), E3(C2). The command's MPreparedDepsEvents will contain all + // three events (E1, E2, E3). Now, if piEventsWait is called for all + // three events we'll experience failure with CL_INVALID_CONTEXT 'cause + // these events refer to different contexts. + std::map> + RequiredEventsPerContext; + + for (const EventImplPtr &Event : EventImpls) { + ContextImplPtr Context = Event->getContextImpl(); + assert(Context.get() && + "Only non-host events are expected to be waited for here"); + RequiredEventsPerContext[Context.get()].push_back(Event); + } + + for (auto &CtxWithEvents : RequiredEventsPerContext) { + std::vector RawEvents = + getPiEvents(CtxWithEvents.second); + CtxWithEvents.first->getPlugin()->call( + RawEvents.size(), RawEvents.data()); + } + } else { #ifndef NDEBUG for (const EventImplPtr &Event : EventImpls) assert(!Event->isHost() && @@ -477,6 +507,7 @@ void Command::waitForEvents(QueueImplPtr Queue, Plugin->call( Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event); } + } } /// It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents @@ -700,13 +731,11 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match we'll connect them using host task - if (DepEventContext == WorkerContext) - MPreparedDepsEvents.push_back(std::move(DepEvent)); - else - { + if (DepEventContext != WorkerContext && WorkerContext){ Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep, ToCleanUp); - } + } else + MPreparedDepsEvents.push_back(std::move(DepEvent)); return ConnectionCmd; } From 7aa76d9f1e51eb430909125e9c4acc54518c7e81 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Jun 2024 05:59:28 -0700 Subject: [PATCH 23/52] RT buildable: check-sycl-AccessorTests passed Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/detail/event_impl.hpp | 4 ++-- sycl/source/detail/scheduler/commands.cpp | 17 +++++++++-------- sycl/source/detail/sycl_mem_obj_t.cpp | 2 +- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index e34597aa008d1..e38c15e04879a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -566,7 +566,7 @@ void event_impl::setCommand(void *Cmd) { MCommand = Cmd; auto TypedCommand = static_cast(Cmd); if (TypedCommand) - MIsHostTask = TypedCommand->isHostTask(); + MIsHostEvent = TypedCommand->getWorkerContext() == nullptr; } } // namespace detail diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 7c1eb99e3b286..237939ea37bd8 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -337,7 +337,7 @@ class event_impl { void setEnqueued() { MIsEnqueued = true; } - bool isHost() { return MIsHostTask; } + bool isHost() { return MIsHostEvent; } protected: // When instrumentation is enabled emits trace event for event wait begin and @@ -406,7 +406,7 @@ class event_impl { std::shared_ptr Context); std::atomic_bool MIsEnqueued{false}; - bool MIsHostTask{false}; + bool MIsHostEvent{false}; }; } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index b1713473f2de3..f7b9805ff17ec 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -459,6 +459,11 @@ void Command::waitForPreparedHostEvents() const { void Command::waitForEvents(QueueImplPtr Queue, std::vector &EventImpls, sycl::detail::pi::PiEvent &Event) { + #ifndef NDEBUG + for (const EventImplPtr &Event : EventImpls) + assert(!Event->isHost() && + "Only non-host events are expected to be waited for here"); +#endif if (!EventImpls.empty()) { if (!Queue) { // Host queue can wait for events from different contexts, i.e. it may @@ -491,12 +496,6 @@ void Command::waitForEvents(QueueImplPtr Queue, RawEvents.size(), RawEvents.data()); } } else { -#ifndef NDEBUG - for (const EventImplPtr &Event : EventImpls) - assert(!Event->isHost() && - "Only non-host events are expected to be waited for here"); -#endif - std::vector RawEvents = getPiEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); @@ -1488,7 +1487,8 @@ void MemCpyCommand::emitInstrumentationData() { } ContextImplPtr MemCpyCommand::getWorkerContext() const { - assert(MWorkerQueue && "Worker queue for mem cpy command must be not nullptr"); + if (!MWorkerQueue) + return nullptr; return MWorkerQueue->getContextImplPtr(); } @@ -1661,7 +1661,8 @@ void MemCpyCommandHost::emitInstrumentationData() { } ContextImplPtr MemCpyCommandHost::getWorkerContext() const { - assert(MWorkerQueue && "Worker queue for mem cpy host command must be not nullptr"); + if (!MWorkerQueue) + return nullptr; return MWorkerQueue->getContextImplPtr(); } diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 87f005fe8ca78..a95b9b43d7f5c 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -209,7 +209,7 @@ void SYCLMemObjT::detachMemoryObject( !MOwnNativeHandle || (MInteropContext && !MInteropContext->isOwnedByRuntime()); - if (MRecord && MRecord->MCurContext->isOwnedByRuntime() && + if (MRecord && MRecord->MCurContext && MRecord->MCurContext->isOwnedByRuntime() && !InteropObjectsUsed && (!MHostPtrProvided || MIsInternal)) Scheduler::getInstance().deferMemObjRelease(Self); } From dc4a94ea111456a188ec60eaeef7ff9a053bf3bd Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Jun 2024 06:28:04 -0700 Subject: [PATCH 24/52] RT-buildable: enable unittests 2 Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 3 ++- sycl/source/detail/scheduler/scheduler.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index e38c15e04879a..8f676a97f187d 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -81,7 +81,7 @@ void event_impl::waitInternal(bool *Success) { } void event_impl::setComplete() { - if (!MEvent) { + if (MIsHostEvent || !MEvent) { { std::unique_lock lock(MMutex); #ifndef NDEBUG @@ -126,6 +126,7 @@ const PluginPtr &event_impl::getPlugin() { void event_impl::setStateIncomplete() { MState = HES_NotComplete; } void event_impl::setContextImpl(const ContextImplPtr &Context) { + MIsHostEvent = Context == nullptr; MContext = Context; MIsContextInitialized = true; } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 7e5db05daf01a..d3fe7b523e689 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -459,7 +459,8 @@ void Scheduler::NotifyHostTaskCompletion(Command *Cmd) { std::vector ToCleanUp; auto CmdEvent = Cmd->getEvent(); - auto QueueImpl = Cmd->getQueue(); + auto QueueImpl = CmdEvent->getSubmittedQueue(); + assert(QueueImpl && "Submitted queue for host task must not be null"); { ReadLockT Lock = acquireReadLock(); From 8c57888b2a5a733d248322287e599d0f08855444 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Jun 2024 08:52:24 -0700 Subject: [PATCH 25/52] RT-buildable: unittests enabling 3 Signed-off-by: Tikhomirova, Kseniya --- .../source/detail/scheduler/graph_builder.cpp | 2 +- sycl/source/detail/stream_impl.cpp | 70 +++++++++---------- .../scheduler/StreamInitDependencyOnHost.cpp | 10 ++- 3 files changed, 44 insertions(+), 38 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 8778ad6927c3e..6d3fbdd157618 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1342,7 +1342,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( CG::CodeplayHostTask, /* Payload */ {})); ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), Cmd->getQueue()); + std::move(ConnectCG), nullptr); } catch (const std::bad_alloc &) { throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); } diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 7268293433e82..cb46510551a30 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -98,41 +98,41 @@ void stream_impl::flush(const EventImplPtr &LeadEvent) { // We don't want stream flushing to be blocking operation that is why submit a // host task to print stream buffer. It will fire up as soon as the kernel // finishes execution. - auto Q = LeadEvent->getSubmittedQueue(); - event Event = detail::createSyclObjFromImpl(Q).submit([&](handler &cgh) { - auto BufHostAcc = - Buf_.get_access( - cgh, range<1>(BufferSize_), id<1>(OffsetSize)); - // Create accessor to the flush buffer even if not using it yet. Otherwise - // kernel will be a leaf for the flush buffer and scheduler will not be able - // to cleanup the kernel. TODO: get rid of finalize method by using host - // accessor to the flush buffer. - auto FlushBufHostAcc = - FlushBuf_ - .get_access( - cgh); - cgh.host_task([=] { - if (!BufHostAcc.empty()) { - // SYCL 2020, 4.16: - // > If the totalBufferSize or workItemBufferSize limits are exceeded, - // > it is implementation-defined whether the streamed characters - // > exceeding the limit are output, or silently ignored/discarded, and - // > if output it is implementation-defined whether those extra - // > characters exceeding the workItemBufferSize limit count toward the - // > totalBufferSize limit. Regardless of this implementation defined - // > behavior of output exceeding the limits, no undefined or erroneous - // > behavior is permitted of an implementation when the limits are - // > exceeded. - // - // Defend against zero-sized buffers (although they'd have no practical - // use). - printf("%s", &(BufHostAcc[0])); - } - fflush(stdout); - }); - }); - LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event)); - Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event)); + // auto Q = LeadEvent->getSubmittedQueue(); + // event Event = detail::createSyclObjFromImpl(Q).submit([&](handler &cgh) { + // auto BufHostAcc = + // Buf_.get_access( + // cgh, range<1>(BufferSize_), id<1>(OffsetSize)); + // // Create accessor to the flush buffer even if not using it yet. Otherwise + // // kernel will be a leaf for the flush buffer and scheduler will not be able + // // to cleanup the kernel. TODO: get rid of finalize method by using host + // // accessor to the flush buffer. + // auto FlushBufHostAcc = + // FlushBuf_ + // .get_access( + // cgh); + // cgh.host_task([=] { + // if (!BufHostAcc.empty()) { + // // SYCL 2020, 4.16: + // // > If the totalBufferSize or workItemBufferSize limits are exceeded, + // // > it is implementation-defined whether the streamed characters + // // > exceeding the limit are output, or silently ignored/discarded, and + // // > if output it is implementation-defined whether those extra + // // > characters exceeding the workItemBufferSize limit count toward the + // // > totalBufferSize limit. Regardless of this implementation defined + // // > behavior of output exceeding the limits, no undefined or erroneous + // // > behavior is permitted of an implementation when the limits are + // // > exceeded. + // // + // // Defend against zero-sized buffers (although they'd have no practical + // // use). + // printf("%s", &(BufHostAcc[0])); + // } + // fflush(stdout); + // }); + // }); + // LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event)); + // Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event)); } } // namespace detail diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 838b60809472c..4b34a1f4d6828 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -12,6 +12,7 @@ #include #include #include +#include using namespace sycl; @@ -81,8 +82,13 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { DisableCleanupName, "1", detail::SYCLConfig::reset}; + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + sycl::queue Q(Plt.get_devices()[0]); + std::shared_ptr QImpl = detail::getSyclObjImpl(Q); + // Emulating processing of command group function - MockHandlerStreamInit MockCGH(nullptr, true); + MockHandlerStreamInit MockCGH(QImpl, true); MockCGH.setType(detail::CG::Kernel); auto EmptyKernel = [](sycl::nd_item<1>) {}; @@ -111,7 +117,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { static_cast(MainCG.get())->getStreams(); ASSERT_EQ(Streams.size(), 1u) << "Invalid number of stream objects"; - Streams[0]->initStreamHost(nullptr); + Streams[0]->initStreamHost(QImpl); MockScheduler MS; std::vector AuxCmds; From abfc5bfbdf48b8bfe48cfb17e68d9a91bb64ba9e Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 17 Jun 2024 07:49:32 -0700 Subject: [PATCH 26/52] tiny cleanup Signed-off-by: Tikhomirova, Kseniya --- .../source/detail/scheduler/graph_builder.cpp | 22 +++++++++---------- sycl/source/detail/scheduler/scheduler.hpp | 3 +-- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 6d3fbdd157618..1932f18d697ac 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -868,7 +868,7 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( Command *Cmd, const std::vector &Reqs, Command::BlockReason Reason, - std::vector &ToEnqueue, const bool AddDepsToLeaves) { + std::vector &ToEnqueue) { EmptyCommand *EmptyCmd = new EmptyCommand(); if (!EmptyCmd) @@ -889,19 +889,17 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( if (!Reqs.size()) Cmd->addUser(EmptyCmd); - if (AddDepsToLeaves) { - const std::vector &Deps = Cmd->MDeps; - std::vector ToCleanUp; - for (const DepDesc &Dep : Deps) { - const Requirement *Req = Dep.MDepRequirement; - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + const std::vector &Deps = Cmd->MDeps; + std::vector ToCleanUp; + for (const DepDesc &Dep : Deps) { + const Requirement *Req = Dep.MDepRequirement; + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); - } - for (Command *Cmd : ToCleanUp) - cleanupCommand(Cmd); + updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp); + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); } + for (Command *Cmd : ToCleanUp) + cleanupCommand(Cmd); return EmptyCmd; } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index d3462872c9ddf..4e0bf465d59fd 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -742,8 +742,7 @@ class Scheduler { EmptyCommand *addEmptyCmd(Command *Cmd, const std::vector &Req, Command::BlockReason Reason, - std::vector &ToEnqueue, - const bool AddDepsToLeaves = true); + std::vector &ToEnqueue); void createGraphForCommand(Command *NewCmd, CG &CG, bool isInteropTask, std::vector &Reqs, From 75f6eab8dd7a8f5b008d1b955bad3c3fc36914ba Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 17 Jun 2024 07:21:30 -0700 Subject: [PATCH 27/52] move stream_impl flush Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.cpp | 19 ++++- sycl/source/detail/queue_impl.hpp | 3 +- sycl/source/detail/scheduler/scheduler.cpp | 11 --- sycl/source/detail/stream_impl.cpp | 83 ++++++------------- sycl/source/detail/stream_impl.hpp | 10 +-- .../scheduler/CommandsWaitForEvents.cpp | 2 +- .../scheduler/StreamInitDependencyOnHost.cpp | 62 -------------- 7 files changed, 49 insertions(+), 141 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 298d4078cc922..af7af19ede120 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -361,8 +361,10 @@ event queue_impl::submit_impl(const std::function &CGF, // Host and interop tasks, however, are not submitted to low-level runtimes // and require separate dependency management. const CG::CGTYPE Type = Handler.getType(); - event Event = detail::createSyclObjFromImpl( - std::make_shared()); + event Event = detail::createSyclObjFromImpl(std::make_shared()); + std::vector Streams; + if (Type == CG::Kernel) + Streams = std::move(Handler.MStreamStorage); if (PostProcess) { bool IsKernel = Type == CG::Kernel; @@ -380,6 +382,19 @@ event queue_impl::submit_impl(const std::function &CGF, finalizeHandler(Handler, Event); addEvent(Event); + + auto EventImpl = detail::getSyclObjImpl(Event); + for (auto &Stream : Streams) { + // We don't want stream flushing to be blocking operation that is why submit a + // host task to print stream buffer. It will fire up as soon as the kernel + // finishes execution. + event FlushEvent = submit_impl([&](handler &ServiceCGH) { + Stream->generateFlushCommand(ServiceCGH); + }, Self, PrimaryQueue, SecondaryQueue, Loc, {}); + EventImpl->attachEventToComplete(detail::getSyclObjImpl(FlushEvent)); + registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent)); + } + return Event; } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c3d0c4c5752f8..e72ded829a798 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -13,10 +13,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -26,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index d3fe7b523e689..52eb59b225004 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -99,13 +99,6 @@ EventImplPtr Scheduler::addCG( EventImplPtr NewEvent = nullptr; const CG::CGTYPE Type = CommandGroup->getType(); std::vector AuxiliaryCmds; - std::vector Streams; - - if (Type == CG::Kernel) { - auto *CGExecKernelPtr = static_cast(CommandGroup.get()); - Streams = CGExecKernelPtr->getStreams(); - CGExecKernelPtr->clearStreams(); - } std::vector> AuxiliaryResources; AuxiliaryResources = CommandGroup->getAuxiliaryResources(); CommandGroup->clearAuxiliaryResources(); @@ -143,10 +136,6 @@ EventImplPtr Scheduler::addCG( if (ShouldEnqueue) { enqueueCommandForCG(NewEvent, AuxiliaryCmds); - - for (const auto &StreamImplPtr : Streams) { - StreamImplPtr->flush(NewEvent); - } } if (!AuxiliaryResources.empty()) diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index cb46510551a30..7d926fbdb83dd 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -76,65 +76,36 @@ size_t stream_impl::get_size() const { return BufferSize_; } size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; } -void stream_impl::initStreamHost(QueueImplPtr Queue) { - // Real size of full flush buffer is saved only in buffer_impl field of - // FlushBuf object. - size_t FlushBufSize = getSyclObjImpl(FlushBuf_)->size(); - - auto Q = createSyclObjFromImpl(Queue); - Q.submit([&](handler &cgh) { - auto FlushBufAcc = FlushBuf_.get_access( - cgh, range<1>(1), id<1>(0)); - cgh.host_task([=] { - char *FlushBufPtr = FlushBufAcc.get_pointer(); - std::memset(FlushBufPtr, 0, FlushBufSize); - }); +void stream_impl::generateFlushCommand(handler& cgh) +{ + // Create accessor to the flush buffer even if not using it yet. Otherwise + // kernel will be a leaf for the flush buffer and scheduler will not be able + // to cleanup the kernel. TODO: get rid of finalize method by using host + // accessor to the flush buffer. + host_accessor FlushBuffHostAcc(FlushBuf_, cgh); + host_accessor BufHostAcc (Buf_, cgh, range<1>(BufferSize_), id<1>(OffsetSize)); + + cgh.host_task([=] { + if (!BufHostAcc.empty()) { + // SYCL 2020, 4.16: + // > If the totalBufferSize or workItemBufferSize limits are exceeded, + // > it is implementation-defined whether the streamed characters + // > exceeding the limit are output, or silently ignored/discarded, and + // > if output it is implementation-defined whether those extra + // > characters exceeding the workItemBufferSize limit count toward the + // > totalBufferSize limit. Regardless of this implementation defined + // > behavior of output exceeding the limits, no undefined or erroneous + // > behavior is permitted of an implementation when the limits are + // > exceeded. + // + // Defend against zero-sized buffers (although they'd have no practical + // use). + printf("%s", &(BufHostAcc[0])); + } + fflush(stdout); }); } -void stream_impl::flush(const EventImplPtr &LeadEvent) { - assert(LeadEvent && "LeadEvent is expected to be not nullptr"); - // We don't want stream flushing to be blocking operation that is why submit a - // host task to print stream buffer. It will fire up as soon as the kernel - // finishes execution. - // auto Q = LeadEvent->getSubmittedQueue(); - // event Event = detail::createSyclObjFromImpl(Q).submit([&](handler &cgh) { - // auto BufHostAcc = - // Buf_.get_access( - // cgh, range<1>(BufferSize_), id<1>(OffsetSize)); - // // Create accessor to the flush buffer even if not using it yet. Otherwise - // // kernel will be a leaf for the flush buffer and scheduler will not be able - // // to cleanup the kernel. TODO: get rid of finalize method by using host - // // accessor to the flush buffer. - // auto FlushBufHostAcc = - // FlushBuf_ - // .get_access( - // cgh); - // cgh.host_task([=] { - // if (!BufHostAcc.empty()) { - // // SYCL 2020, 4.16: - // // > If the totalBufferSize or workItemBufferSize limits are exceeded, - // // > it is implementation-defined whether the streamed characters - // // > exceeding the limit are output, or silently ignored/discarded, and - // // > if output it is implementation-defined whether those extra - // // > characters exceeding the workItemBufferSize limit count toward the - // // > totalBufferSize limit. Regardless of this implementation defined - // // > behavior of output exceeding the limits, no undefined or erroneous - // // > behavior is permitted of an implementation when the limits are - // // > exceeded. - // // - // // Defend against zero-sized buffers (although they'd have no practical - // // use). - // printf("%s", &(BufHostAcc[0])); - // } - // fflush(stdout); - // }); - // }); - // LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event)); - // Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event)); -} - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index cd3d503b4b894..aacb495537943 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -41,14 +41,6 @@ class __SYCL_EXPORT stream_impl { // buffer and offset in the flush buffer GlobalOffsetAccessorT accessGlobalOffset(handler &CGH); - // Initialize flush buffers on host. - void initStreamHost(QueueImplPtr Queue); - - // Enqueue task to copy stream buffer to the host and print the contents - // The host task event is then registered for post processing in the - // LeadEvent as well as in queue LeadEvent associated with. - void flush(const EventImplPtr &LeadEvent); - size_t size() const noexcept; size_t get_work_item_buffer_size() const; @@ -67,6 +59,8 @@ class __SYCL_EXPORT stream_impl { return PropList_.get_property(); } + void generateFlushCommand(handler& cgh); + private: // Size of the stream buffer size_t BufferSize_; diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp index 499a45d0fe70f..43aa7a88775d7 100644 --- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp +++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp @@ -163,7 +163,7 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) { auto EventImplProxy = std::static_pointer_cast(EventImpl); - ASSERT_TRUE(EventImplProxy->MPostCompleteEvents.size() == 1) + ASSERT_EQ(EventImplProxy->MPostCompleteEvents.size(), 1) << "Expected 1 post complete event"; Q.wait(); diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 4b34a1f4d6828..d1e7f22aa9485 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -74,65 +74,3 @@ static bool ValidateDepCommandsTree(const detail::Command *Cmd, return false; } - -TEST_F(SchedulerTest, StreamInitDependencyOnHost) { - // Disable post enqueue cleanup so that it doesn't interfere with dependency - // checks. - unittest::ScopedEnvVar DisabledCleanup{ - DisableCleanupName, "1", - detail::SYCLConfig::reset}; - - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - sycl::queue Q(Plt.get_devices()[0]); - std::shared_ptr QImpl = detail::getSyclObjImpl(Q); - - // Emulating processing of command group function - MockHandlerStreamInit MockCGH(QImpl, true); - MockCGH.setType(detail::CG::Kernel); - - auto EmptyKernel = [](sycl::nd_item<1>) {}; - MockCGH - .setHostKernel, 1, class Empty>( - EmptyKernel); - MockCGH.setNDRangeDesc( - sycl::nd_range<1>{sycl::range<1>{1}, sycl::range<1>{1}}); - - // Emulating construction of stream object inside command group - detail::StreamImplPtr StreamImpl = - std::make_shared(1024, 200, MockCGH); - detail::GlobalBufAccessorT FlushBufAcc = - StreamImpl->accessGlobalFlushBuf(MockCGH); - MockCGH.addStream(StreamImpl); - - detail::SYCLMemObjI *FlushBufMemObjPtr = - detail::getSyclObjImpl(FlushBufAcc)->MSYCLMemObj; - ASSERT_TRUE(!!FlushBufMemObjPtr) - << "Memory object for stream flush buffer not initialized"; - - std::unique_ptr MainCG = MockCGH.finalize(); - - // Emulate call of Scheduler::addCG - std::vector Streams = - static_cast(MainCG.get())->getStreams(); - ASSERT_EQ(Streams.size(), 1u) << "Invalid number of stream objects"; - - Streams[0]->initStreamHost(QImpl); - - MockScheduler MS; - std::vector AuxCmds; - detail::Command *NewCmd = MS.addCG(std::move(MainCG), nullptr, AuxCmds); - ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler"; - ASSERT_GT(NewCmd->MDeps.size(), 0u) - << "No deps appeared in the new exec kernel command"; - - // Searching in dependencies for CG execution command that initializes flush - // buffer of a stream that is supposed to be used inside NewCmd's CG. - // Tree of dependencies should look like: - // [MAIN_CG] -> [EMPTY_NODE {FlushBufMemObj}] -> [FILL_CG {FlushBufMemObj}] -> - // [[ALLOC_TASK {FlushBufMemObj}] - std::vector DepCmdsTypes({CmdTypeTy::RUN_CG, // FILL_CG - CmdTypeTy::ALLOCA}); - ASSERT_TRUE(ValidateDepCommandsTree(NewCmd, DepCmdsTypes, FlushBufMemObjPtr)) - << "Dependency on stream flush buffer initialization not found"; -} From be12c01ecc837de0ff5f7f3c2f17ca34b03d921d Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 19 Jun 2024 04:44:06 -0700 Subject: [PATCH 28/52] test fix Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/device_impl.cpp | 3 ++ sycl/source/detail/event_impl.cpp | 30 +++++++++---------- sycl/source/detail/image_impl.cpp | 2 ++ .../scheduler/CommandsWaitForEvents.cpp | 2 +- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 846972254f7d9..e24b6f6f2510e 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -327,6 +327,9 @@ bool device_impl::has(aspect Aspect) const { size_t return_size = 0; switch (Aspect) { + case aspect::host: + //Deprecated + return false; case aspect::cpu: return is_cpu(); case aspect::gpu: diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 0d2976e7ec271..93dc4b7fca1b1 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -155,15 +155,13 @@ event_impl::event_impl(const QueueImplPtr &Queue) MFallbackProfiling{MIsProfilingEnabled && Queue && Queue->isProfilingFallback()} { if (Queue) this->setContextImpl(Queue->getContextImplPtr()); - if (!Queue) { + else { MState.store(HES_NotComplete); - if (Queue->has_property()) { - MHostProfilingInfo.reset(new HostProfilingInfo()); - if (!MHostProfilingInfo) - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Out of host memory " + - codeToString(PI_ERROR_OUT_OF_HOST_MEMORY)); - } + MHostProfilingInfo.reset(new HostProfilingInfo()); + if (!MHostProfilingInfo) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Out of host memory " + + codeToString(PI_ERROR_OUT_OF_HOST_MEMORY)); return; } MState.store(HES_Complete); @@ -381,13 +379,15 @@ event_impl::get_info() { if (MState == HES_Discarded) return info::event_command_status::ext_oneapi_unknown; - // Command is enqueued and PiEvent is ready - if (MEvent) - return get_event_info( - this->getHandleRef(), this->getPlugin()); - // Command is blocked and not enqueued, PiEvent is not assigned yet - else if (MCommand) - return sycl::info::event_command_status::submitted; + if (!MIsHostEvent) { + // Command is enqueued and PiEvent is ready + if (MEvent) + return get_event_info( + this->getHandleRef(), this->getPlugin()); + // Command is blocked and not enqueued, PiEvent is not assigned yet + else if (MCommand) + return sycl::info::event_command_status::submitted; + } return MState.load() != HES_Complete ? sycl::info::event_command_status::submitted diff --git a/sycl/source/detail/image_impl.cpp b/sycl/source/detail/image_impl.cpp index 0b512ae1aedbe..e5bacd33fc70d 100644 --- a/sycl/source/detail/image_impl.cpp +++ b/sycl/source/detail/image_impl.cpp @@ -471,6 +471,8 @@ bool image_impl::checkImageFormat( } std::vector image_impl::getDevices(const ContextImplPtr Context) { + if (!Context) + return {}; return Context->get_info(); } diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp index 43aa7a88775d7..daf8599947ad2 100644 --- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp +++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp @@ -163,7 +163,7 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) { auto EventImplProxy = std::static_pointer_cast(EventImpl); - ASSERT_EQ(EventImplProxy->MPostCompleteEvents.size(), 1) + ASSERT_EQ(EventImplProxy->MPostCompleteEvents.size(), 1u) << "Expected 1 post complete event"; Q.wait(); From e043ee01f185cecac5c0cbd2648853ac0ff4c6db Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 19 Jun 2024 05:35:10 -0700 Subject: [PATCH 29/52] restore & update ABI - not breaking Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/stream_impl.cpp | 9 +++++++++ sycl/source/detail/stream_impl.hpp | 9 +++++++++ sycl/test/abi/sycl_symbols_linux.dump | 17 +++++++++-------- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 7d926fbdb83dd..75c80745ec71c 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -106,6 +106,15 @@ void stream_impl::generateFlushCommand(handler& cgh) }); } + // ABI break: remove + void stream_impl::initStreamHost(QueueImplPtr ){}; + + // ABI break: remove + void stream_impl::flush(const EventImplPtr &) {}; + + // ABI break: remove + void stream_impl::flush() {}; + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index aacb495537943..4fc1f4b1d5a8a 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -41,6 +41,15 @@ class __SYCL_EXPORT stream_impl { // buffer and offset in the flush buffer GlobalOffsetAccessorT accessGlobalOffset(handler &CGH); + // ABI break: remove + void initStreamHost(QueueImplPtr); + + // ABI break: remove + void flush(const EventImplPtr &); + + // ABI break: remove + void flush(); + size_t size() const noexcept; size_t get_work_item_buffer_size() const; diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 0edaaa25b4ba1..c60fdb1318905 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3119,6 +3119,7 @@ _ZN4sycl3_V15queue10mem_adviseEPKvmiRKSt6vectorINS0_5eventESaIS5_EERKNS0_6detail _ZN4sycl3_V15queue10wait_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE +_ZN4sycl3_V15queue15ext_oneapi_prodEv _ZN4sycl3_V15queue17discard_or_returnERKNS0_5eventE _ZN4sycl3_V15queue18throw_asynchronousEv _ZN4sycl3_V15queue20memcpyToDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE @@ -3230,6 +3231,7 @@ _ZN4sycl3_V16detail11stream_impl14initStreamHostESt10shared_ptrINS1_10queue_impl _ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE +_ZN4sycl3_V16detail11stream_impl20generateFlushCommandERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE _ZN4sycl3_V16detail11stream_impl5flushEv _ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE @@ -3621,6 +3623,7 @@ _ZN4sycl3_V17handler28memcpyToHostOnlyDeviceGlobalEPKvS3_mbmm _ZN4sycl3_V17handler28setStateExplicitKernelBundleEv _ZN4sycl3_V17handler30memcpyFromHostOnlyDeviceGlobalEPvPKvbmm _ZN4sycl3_V17handler30verifyUsedKernelBundleInternalENS0_6detail11string_viewE +_ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ _ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE _ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE _ZN4sycl3_V17handler6memcpyEPvPKvm @@ -3633,7 +3636,6 @@ _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb -_ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE _ZN4sycl3_V17samplerC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE @@ -3748,7 +3750,6 @@ _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue16enable_profilingEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue4cuda18use_default_streamEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue8in_orderEEEbv _ZNK4sycl3_V15queue16ext_oneapi_emptyEv -_ZN4sycl3_V15queue15ext_oneapi_prodEv _ZNK4sycl3_V15queue16get_backend_infoINS0_4info6device15backend_versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V15queue16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V15queue16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv @@ -3973,6 +3974,12 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device22m _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device22max_image_linear_widthEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device23max_image_linear_heightEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device26max_image_linear_row_pitchEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE1EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device32work_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext8codeplay12experimental4info6device15supports_fusionEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext8codeplay12experimental4info6device28max_registers_per_work_groupEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device10extensionsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv @@ -4084,12 +4091,6 @@ _ZNK4sycl3_V16device13get_info_implINS0_4info6device7versionEEENS0_6detail11ABIN _ZNK4sycl3_V16device13get_info_implINS0_4info6device8atomic64EEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device8platformEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device9vendor_idEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device32work_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE1EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13has_extensionERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZNK4sycl3_V16device14is_acceleratorEv _ZNK4sycl3_V16device16get_backend_infoINS0_4info6device15backend_versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv From cea7c7271f0172ea8b45db2b3b221d4d5cb11937 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 19 Jun 2024 05:48:29 -0700 Subject: [PATCH 30/52] clang git-clang-format run on changed files Signed-off-by: Tikhomirova, Kseniya --- sycl/source/context.cpp | 4 +- sycl/source/detail/context_impl.cpp | 3 +- sycl/source/detail/device_impl.cpp | 5 +- sycl/source/detail/device_impl.hpp | 12 +- sycl/source/detail/event_impl.cpp | 18 +- sycl/source/detail/memory_manager.cpp | 28 +-- sycl/source/detail/platform_impl.hpp | 8 +- sycl/source/detail/program_impl.cpp | 22 +-- sycl/source/detail/program_impl.hpp | 4 +- sycl/source/detail/queue_impl.cpp | 15 +- sycl/source/detail/queue_impl.hpp | 19 +- sycl/source/detail/scheduler/commands.cpp | 165 +++++++++--------- sycl/source/detail/scheduler/commands.hpp | 12 +- .../source/detail/scheduler/graph_builder.cpp | 59 +++---- sycl/source/detail/scheduler/scheduler.cpp | 8 +- sycl/source/detail/stream_impl.cpp | 21 +-- sycl/source/detail/stream_impl.hpp | 2 +- sycl/source/detail/sycl_mem_obj_t.cpp | 5 +- sycl/source/detail/usm/usm_impl.cpp | 48 ++--- .../scheduler/EnqueueWithDependsOnDeps.cpp | 3 +- sycl/unittests/scheduler/GraphCleanup.cpp | 3 +- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 3 +- .../scheduler/LeafLimitDiffContexts.cpp | 4 +- sycl/unittests/scheduler/LeavesCollection.cpp | 3 +- .../scheduler/LinkedAllocaDependencies.cpp | 3 +- .../scheduler/NoHostUnifiedMemory.cpp | 3 +- sycl/unittests/scheduler/QueueFlushing.cpp | 10 +- .../scheduler/StreamInitDependencyOnHost.cpp | 2 +- 28 files changed, 239 insertions(+), 253 deletions(-) diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index 70b12836fc297..1261096b82047 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -56,13 +56,13 @@ context::context(const std::vector &DeviceList, throw invalid_parameter_error("DeviceList is empty.", PI_ERROR_INVALID_VALUE); } - + const auto &RefPlatform = detail::getSyclObjImpl(DeviceList[0].get_platform())->getHandleRef(); if (std::any_of(DeviceList.begin(), DeviceList.end(), [&](const device &CurrentDevice) { return (detail::getSyclObjImpl(CurrentDevice.get_platform()) - ->getHandleRef() != RefPlatform); + ->getHandleRef() != RefPlatform); })) throw invalid_parameter_error( "Can't add devices across platforms to a single context.", diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 0c79ed2f70462..8ae13b345b250 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -33,8 +33,7 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler, : MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(1, Device), MContext(nullptr), MPlatform(detail::getSyclObjImpl(Device.get_platform())), - MPropList(PropList), - MSupportBufferLocationByDevices(NotChecked) { + MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) { MKernelProgramCache.setContextPtr(this); } diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index e24b6f6f2510e..ebad36158cfc6 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -34,8 +34,7 @@ device_impl::device_impl(sycl::detail::pi::PiDevice Device, device_impl::device_impl(pi_native_handle InteropDeviceHandle, sycl::detail::pi::PiDevice Device, PlatformImplPtr Platform, const PluginPtr &Plugin) - : MDevice(Device), - MDeviceHostBaseTime(std::make_pair(0, 0)) { + : MDevice(Device), MDeviceHostBaseTime(std::make_pair(0, 0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -328,7 +327,7 @@ bool device_impl::has(aspect Aspect) const { switch (Aspect) { case aspect::host: - //Deprecated + // Deprecated return false; case aspect::cpu: return is_cpu(); diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 9249bbba59fe8..a3344ecdd3870 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -64,18 +64,14 @@ class device_impl { /// For host device an exception is thrown /// /// \return non-constant reference to PI device - sycl::detail::pi::PiDevice &getHandleRef() { - return MDevice; - } + sycl::detail::pi::PiDevice &getHandleRef() { return MDevice; } /// Get constant reference to PI device /// /// For host device an exception is thrown /// /// \return constant reference to PI device - const sycl::detail::pi::PiDevice &getHandleRef() const { - return MDevice; - } + const sycl::detail::pi::PiDevice &getHandleRef() const { return MDevice; } /// Check if device is a CPU device /// @@ -90,9 +86,7 @@ class device_impl { /// Check if device is an accelerator device /// /// \return true if SYCL device is an accelerator device - bool is_accelerator() const { - return MType == PI_DEVICE_TYPE_ACC; - } + bool is_accelerator() const { return MType == PI_DEVICE_TYPE_ACC; } /// Return device type /// diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 93dc4b7fca1b1..7d91129f25b51 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -38,8 +38,8 @@ void event_impl::ensureContextInitialized() { return; const device SyclDevice; - this->setContextImpl(detail::queue_impl::getDefaultOrNew( - detail::getSyclObjImpl(SyclDevice))); + this->setContextImpl( + detail::queue_impl::getDefaultOrNew(detail::getSyclObjImpl(SyclDevice))); } event_impl::~event_impl() { @@ -134,8 +134,8 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { event_impl::event_impl(sycl::detail::pi::PiEvent Event, const context &SyclContext) : MIsContextInitialized(true), MEvent(Event), - MContext(detail::getSyclObjImpl(SyclContext)), - MIsFlushed(true), MState(HES_Complete) { + MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true), + MState(HES_Complete) { sycl::detail::pi::PiContext TempContext; getPlugin()->call( @@ -150,9 +150,9 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event, } event_impl::event_impl(const QueueImplPtr &Queue) - : MQueue{Queue}, - MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled}, - MFallbackProfiling{MIsProfilingEnabled && Queue && Queue->isProfilingFallback()} { + : MQueue{Queue}, MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled}, + MFallbackProfiling{MIsProfilingEnabled && Queue && + Queue->isProfilingFallback()} { if (Queue) this->setContextImpl(Queue->getContextImplPtr()); else { @@ -412,7 +412,7 @@ event_impl::get_backend_info() const { } // If the queue has been released, no platform will be associated // so return empty string. - return ""; + return ""; } template <> @@ -571,7 +571,7 @@ bool event_impl::isCompleted() { void event_impl::setCommand(void *Cmd) { MCommand = Cmd; - auto TypedCommand = static_cast(Cmd); + auto TypedCommand = static_cast(Cmd); if (TypedCommand) MIsHostEvent = TypedCommand->getWorkerContext() == nullptr; } diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 461cf8b85915c..6f30ceef8eb51 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -398,9 +398,11 @@ void *MemoryManager::allocateMemBuffer( sycl::detail::pi::PiEvent &OutEventToWait) { void *MemPtr; if (!TargetContext) - MemPtr = allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList); + MemPtr = + allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList); else if (UserPtr && InteropContext) - MemPtr = allocateInteropMemObject(TargetContext, UserPtr, InteropEvent, + MemPtr = + allocateInteropMemObject(TargetContext, UserPtr, InteropEvent, InteropContext, PropsList, OutEventToWait); else MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size, @@ -665,7 +667,8 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); - assert(SrcQueue && "Source mem object and target mem object queues are expected to be not nullptr"); + assert(SrcQueue && "Source mem object and target mem object queues are " + "expected to be not nullptr"); const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef(); const PluginPtr &Plugin = SrcQueue->getPlugin(); @@ -778,9 +781,9 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, if (!SrcQueue) { if (!TgtQueue) copyH2H(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize, - SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, - nullptr, DimDst, DstSize, DstAccessRange, DstOffset, - DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl); + SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, nullptr, + DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, + std::move(DepEvents), OutEvent, OutEventImpl); else copyH2D(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, @@ -1235,7 +1238,8 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - assert(Queue && "Copy to device global USM must be called with a valid device queue"); + assert(Queue && + "Copy to device global USM must be called with a valid device queue"); // Get or allocate USM memory for the device_global. DeviceGlobalUSMMem &DeviceGlobalUSM = DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue); @@ -1337,7 +1341,9 @@ static void memcpyToDeviceGlobalDirect( size_t NumBytes, size_t Offset, const void *Src, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - assert(Queue && "Direct copy to device global must be called with a valid device queue"); + assert( + Queue && + "Direct copy to device global must be called with a valid device queue"); sycl::detail::pi::PiProgram Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); const PluginPtr &Plugin = Queue->getPlugin(); @@ -1352,7 +1358,8 @@ static void memcpyFromDeviceGlobalDirect( size_t NumBytes, size_t Offset, void *Dest, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - assert(Queue && "Direct copy from device global must be called with a valid device queue"); + assert(Queue && "Direct copy from device global must be called with a valid " + "device queue"); sycl::detail::pi::PiProgram Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); const PluginPtr &Plugin = Queue->getPlugin(); @@ -1762,7 +1769,8 @@ void MemoryManager::copy_image_bindless( sycl::detail::pi::PiImageRegion CopyExtent, const std::vector &DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - assert(Queue && "Copy image bindless must be called with a valid device queue"); + assert(Queue && + "Copy image bindless must be called with a valid device queue"); assert((Flags == (sycl::detail::pi::PiImageCopyFlags) ext::oneapi::experimental::image_copy_flags::HtoD || Flags == (sycl::detail::pi::PiImageCopyFlags) diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index e13bd0a3a1b31..bc6278d54f32c 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -103,9 +103,7 @@ class platform_impl { } /// \return an instance of OpenCL cl_platform_id. - cl_platform_id get() const { - return pi::cast(MPlatform); - } + cl_platform_id get() const { return pi::cast(MPlatform); } /// Returns raw underlying plug-in platform handle. /// @@ -114,9 +112,7 @@ class platform_impl { /// is in use. /// /// \return a raw plug-in platform handle. - const sycl::detail::pi::PiPlatform &getHandleRef() const { - return MPlatform; - } + const sycl::detail::pi::PiPlatform &getHandleRef() const { return MPlatform; } /// Returns all available SYCL platforms in the system. /// diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 584b2487f5dee..df95614d872c3 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -220,22 +220,22 @@ void program_impl::compile_with_kernel_name(std::string KernelName, std::string CompileOptions) { std::lock_guard Lock(MMutex); throw_if_state_is_not(program_state::none); - create_pi_program_with_kernel_name( - KernelName, - /*JITCompilationIsRequired=*/(!CompileOptions.empty())); - compile(CompileOptions); + create_pi_program_with_kernel_name( + KernelName, + /*JITCompilationIsRequired=*/(!CompileOptions.empty())); + compile(CompileOptions); MState = program_state::compiled; } void program_impl::link(std::string LinkOptions) { std::lock_guard Lock(MMutex); throw_if_state_is_not(program_state::compiled); - check_device_feature_support(MDevices); - std::vector Devices(get_pi_devices()); - const PluginPtr &Plugin = getPlugin(); - const char *LinkOpts = SYCLConfig::get(); - if (!LinkOpts) { - LinkOpts = LinkOptions.c_str(); + check_device_feature_support(MDevices); + std::vector Devices(get_pi_devices()); + const PluginPtr &Plugin = getPlugin(); + const char *LinkOpts = SYCLConfig::get(); + if (!LinkOpts) { + LinkOpts = LinkOptions.c_str(); } // Plugin resets MProgram with a new pi_program as a result of the call to @@ -251,7 +251,7 @@ void program_impl::link(std::string LinkOptions) { Plugin->checkPiResult(Err); MLinkOptions = LinkOptions; MBuildOptions = LinkOptions; - MState = program_state::linked; + MState = program_state::linked; } bool program_impl::has_kernel(std::string KernelName, diff --git a/sycl/source/detail/program_impl.hpp b/sycl/source/detail/program_impl.hpp index 1fa8767774961..67c02e95734ab 100644 --- a/sycl/source/detail/program_impl.hpp +++ b/sycl/source/detail/program_impl.hpp @@ -216,9 +216,7 @@ class program_impl { } /// \return the Plugin associated with the context of this program. - const PluginPtr &getPlugin() const { - return MContext->getPlugin(); - } + const PluginPtr &getPlugin() const { return MContext->getPlugin(); } ContextImplPtr getContextImplPtr() const { return MContext; } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index af7af19ede120..83f33688ed0b1 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -361,7 +361,8 @@ event queue_impl::submit_impl(const std::function &CGF, // Host and interop tasks, however, are not submitted to low-level runtimes // and require separate dependency management. const CG::CGTYPE Type = Handler.getType(); - event Event = detail::createSyclObjFromImpl(std::make_shared()); + event Event = detail::createSyclObjFromImpl( + std::make_shared()); std::vector Streams; if (Type == CG::Kernel) Streams = std::move(Handler.MStreamStorage); @@ -385,12 +386,12 @@ event queue_impl::submit_impl(const std::function &CGF, auto EventImpl = detail::getSyclObjImpl(Event); for (auto &Stream : Streams) { - // We don't want stream flushing to be blocking operation that is why submit a - // host task to print stream buffer. It will fire up as soon as the kernel + // We don't want stream flushing to be blocking operation that is why submit + // a host task to print stream buffer. It will fire up as soon as the kernel // finishes execution. - event FlushEvent = submit_impl([&](handler &ServiceCGH) { - Stream->generateFlushCommand(ServiceCGH); - }, Self, PrimaryQueue, SecondaryQueue, Loc, {}); + event FlushEvent = submit_impl( + [&](handler &ServiceCGH) { Stream->generateFlushCommand(ServiceCGH); }, + Self, PrimaryQueue, SecondaryQueue, Loc, {}); EventImpl->attachEventToComplete(detail::getSyclObjImpl(FlushEvent)); registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent)); } @@ -707,7 +708,7 @@ void queue_impl::revisitUnenqueuedCommandsState( Deps.UnenqueuedCmdEvents.begin(), Deps.UnenqueuedCmdEvents.end(), [](const EventImplPtr &CommandEvent) { return (CommandEvent->isHost() ? CommandEvent->isCompleted() - : CommandEvent->isEnqueued()); + : CommandEvent->isEnqueued()); }), Deps.UnenqueuedCmdEvents.end()); } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index e72ded829a798..d0a74cc80c793 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -194,14 +194,13 @@ class queue_impl { if (MDevice) { xpti::addMetadata(TEvent, "sycl_device_name", MDevice->getDeviceName()); - xpti::addMetadata( - TEvent, "sycl_device", - reinterpret_cast(MDevice->getHandleRef())); + xpti::addMetadata(TEvent, "sycl_device", + reinterpret_cast(MDevice->getHandleRef())); } xpti::addMetadata(TEvent, "is_inorder", MIsInorder); xpti::addMetadata(TEvent, "queue_id", MQueueID); xpti::addMetadata(TEvent, "queue_handle", - reinterpret_cast(getHandleRef())); + reinterpret_cast(getHandleRef())); }); // Also publish to TLS xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); @@ -257,9 +256,8 @@ class queue_impl { if (MDevice) { xpti::addMetadata(TEvent, "sycl_device_name", MDevice->getDeviceName()); - xpti::addMetadata( - TEvent, "sycl_device", - reinterpret_cast(MDevice->getHandleRef())); + xpti::addMetadata(TEvent, "sycl_device", + reinterpret_cast(MDevice->getHandleRef())); } xpti::addMetadata(TEvent, "is_inorder", MIsInorder); xpti::addMetadata(TEvent, "queue_id", MQueueID); @@ -751,9 +749,8 @@ class queue_impl { // tasks and host tasks is applicable for out of order queues only. Not neede // for in order ones. void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask); - - static ContextImplPtr getContext(const QueueImplPtr& Queue) - { + + static ContextImplPtr getContext(const QueueImplPtr &Queue) { return Queue ? Queue->getContextImplPtr() : nullptr; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index c751cf7438ae7..3d51fe7a1c12f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -90,21 +90,19 @@ static std::string deviceToString(device Device) { return "UNKNOWN"; } -static void addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue) -{ - xpti::addMetadata(TraceEvent, "sycl_device", - Queue ? deviceToID(Queue->get_device()) : 0); - xpti::addMetadata(TraceEvent, "sycl_device_type", - Queue ? deviceToString(Queue->get_device()) : "host"); - if (Queue) - xpti::addMetadata(TraceEvent, "sycl_device_name", +static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) { + xpti::addMetadata(TraceEvent, "sycl_device", + Queue ? deviceToID(Queue->get_device()) : 0); + xpti::addMetadata(TraceEvent, "sycl_device_type", + Queue ? deviceToString(Queue->get_device()) : "host"); + if (Queue) + xpti::addMetadata(TraceEvent, "sycl_device_name", getSyclObjImpl(Queue->get_device())->getDeviceName()); } #endif -static ContextImplPtr getContext(const QueueImplPtr& Queue) -{ +static ContextImplPtr getContext(const QueueImplPtr &Queue) { if (Queue) return Queue->getContextImplPtr(); return nullptr; @@ -350,10 +348,12 @@ class DispatchHostTask { PluginWithEvents.first->call(RawEvents.size(), RawEvents.data()); } catch (const sycl::exception &E) { - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception()); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( + std::current_exception()); return (pi_result)E.get_cl_code(); } catch (...) { - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception()); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( + std::current_exception()); return PI_ERROR_UNKNOWN; } } @@ -404,7 +404,8 @@ class DispatchHostTask { try { // we're ready to call the user-defined lambda now if (HostTask.MHostTask->isInteropTask()) { - assert(HostTask.MQueue && "Submitted queue for host task must be device queue"); + assert(HostTask.MQueue && + "Submitted queue for host task must be device queue"); interop_handle IH{MReqToMem, HostTask.MQueue, HostTask.MQueue->getDeviceImplPtr(), HostTask.MQueue->getContextImplPtr()}; @@ -431,7 +432,8 @@ class DispatchHostTask { } } #endif - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( + CurrentException); } HostTask.MHostTask.reset(); @@ -448,7 +450,8 @@ class DispatchHostTask { Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); } catch (...) { auto CurrentException = std::current_exception(); - MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException); + MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( + CurrentException); } } }; @@ -461,13 +464,13 @@ void Command::waitForPreparedHostEvents() const { void Command::waitForEvents(QueueImplPtr Queue, std::vector &EventImpls, sycl::detail::pi::PiEvent &Event) { - #ifndef NDEBUG - for (const EventImplPtr &Event : EventImpls) - assert(!Event->isHost() && - "Only non-host events are expected to be waited for here"); +#ifndef NDEBUG + for (const EventImplPtr &Event : EventImpls) + assert(!Event->isHost() && + "Only non-host events are expected to be waited for here"); #endif if (!EventImpls.empty()) { - if (!Queue) { + if (!Queue) { // Host queue can wait for events from different contexts, i.e. it may // contain events with different contexts in its MPreparedDepsEvents. // OpenCL 2.1 spec says that clWaitForEvents will return @@ -507,7 +510,7 @@ void Command::waitForEvents(QueueImplPtr Queue, MEvent->setHostEnqueueTime(); Plugin->call( Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event); - } + } } } @@ -716,7 +719,8 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, // 1. Non-host events can be ignored if they are not fully initialized. // 2. Some types of commands do not produce PI events after they are - // enqueued (e.g. alloca). Note that we can't check the pi event to make that distinction since the command might still be unenqueued at this point. + // enqueued (e.g. alloca). Note that we can't check the pi event to make that + // distinction since the command might still be unenqueued at this point. bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized()); if (auto *DepCmd = static_cast(DepEvent->getCommand())) PiEventExpected &= DepCmd->producesPiEvent(); @@ -732,7 +736,7 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match we'll connect them using host task - if (DepEventContext != WorkerContext && WorkerContext){ + if (DepEventContext != WorkerContext && WorkerContext) { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep, ToCleanUp); } else @@ -1006,7 +1010,7 @@ void AllocaCommandBase::emitInstrumentationData() { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + MQueue ? MQueue->getQueueID() : 0); } #endif } @@ -1066,9 +1070,9 @@ pi_int32 AllocaCommand::enqueueImp() { } // TODO: Check if it is correct to use std::move on stack variable and // delete it RawEvents below. - MMemAllocation = MemoryManager::allocate( - getContext(MQueue), getSYCLMemObj(), MInitFromUserData, HostPtr, - std::move(EventImpls), Event); + MMemAllocation = MemoryManager::allocate(getContext(MQueue), getSYCLMemObj(), + MInitFromUserData, HostPtr, + std::move(EventImpls), Event); return PI_SUCCESS; } @@ -1077,7 +1081,8 @@ void AllocaCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "ALLOCA ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Link : " << this->MLinkedAllocaCmd << "\\n"; Stream << "\"];" << std::endl; @@ -1163,8 +1168,8 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA SUB BUF ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") - << "\\n"; + Stream << "ALLOCA SUB BUF ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n"; Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n"; @@ -1266,9 +1271,9 @@ pi_int32 ReleaseCommand::enqueueImp() { if (SkipRelease) Command::waitForEvents(MQueue, EventImpls, Event); else { - MemoryManager::release( - getContext(MQueue), MAllocaCmd->getSYCLMemObj(), - MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); + MemoryManager::release(getContext(MQueue), MAllocaCmd->getSYCLMemObj(), + MAllocaCmd->getMemAllocation(), + std::move(EventImpls), Event); } return PI_SUCCESS; } @@ -1277,7 +1282,8 @@ void ReleaseCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "RELEASE ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "RELEASE ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << " Alloca : " << MAllocaCmd << "\\n"; Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n"; Stream << "\"];" << std::endl; @@ -1347,7 +1353,8 @@ void MapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "MAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "MAP ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << "\"];" << std::endl; @@ -1406,8 +1413,8 @@ bool UnMapMemObject::producesPiEvent() const { // so the execution of kernel B starts only on step 4. This workaround // restores the old behavior in this case until this is resolved. return MQueue && (MQueue->getDeviceImplPtr()->getBackend() != - backend::ext_oneapi_level_zero || - MEvent->getHandleRef() != nullptr); + backend::ext_oneapi_level_zero || + MEvent->getHandleRef() != nullptr); } pi_int32 UnMapMemObject::enqueueImp() { @@ -1428,7 +1435,8 @@ void UnMapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "UNMAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "UNMAP ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << "\"];" << std::endl; @@ -1476,13 +1484,12 @@ void MemCpyCommand::emitInstrumentationData() { reinterpret_cast(MAddress)); xpti::addMetadata(CmdTraceEvent, "copy_from", MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); - xpti::addMetadata( - CmdTraceEvent, "copy_to", - MQueue ? deviceToID(MQueue->get_device()): 0); + xpti::addMetadata(CmdTraceEvent, "copy_to", + MQueue ? deviceToID(MQueue->get_device()) : 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1539,11 +1546,10 @@ void MemCpyCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\""; Stream << "ID = " << this << " ; "; - Stream << "MEMCPY ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; - Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue - << "\\n"; - Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue - << "\\n"; + Stream << "MEMCPY ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n"; + Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue << "\\n"; Stream << "\"];" << std::endl; @@ -1597,7 +1603,8 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "UPDATE REQ ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "UPDATE REQ ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; bool IsReqOnBuffer = MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer; Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n"; @@ -1649,14 +1656,13 @@ void MemCpyCommandHost::emitInstrumentationData() { xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); xpti::addMetadata(CmdTraceEvent, "copy_from", - MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); - xpti::addMetadata( - CmdTraceEvent, "copy_to", - MQueue ? deviceToID(MQueue->get_device()) : 0); + MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); + xpti::addMetadata(CmdTraceEvent, "copy_to", + MQueue ? deviceToID(MQueue->get_device()) : 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1696,8 +1702,7 @@ pi_int32 MemCpyCommandHost::enqueueImp() { return PI_SUCCESS; } -EmptyCommand::EmptyCommand() - : Command(CommandType::EMPTY_TASK, nullptr) { +EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) { emitInstrumentationDataProxy(); } @@ -1746,7 +1751,7 @@ void EmptyCommand::emitInstrumentationData() { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -1775,7 +1780,8 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "MEMCPY HOST ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "MEMCPY HOST ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; Stream << "\"];" << std::endl; @@ -1814,7 +1820,7 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + MQueue ? MQueue->getQueueID() : 0); makeTraceEventEpilog(); } #endif @@ -2082,7 +2088,7 @@ std::pair emitKernelInstrumentationData( if (CmdTraceEvent) { // Stash the queue_id mutable metadata in TLS xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - Queue ? Queue->getQueueID() : 0); + Queue ? Queue->getQueueID() : 0); instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, @@ -2128,7 +2134,7 @@ void ExecCGCommand::emitInstrumentationData() { if (CmdTraceEvent) { xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + MQueue ? MQueue->getQueueID() : 0); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CG::Kernel) { auto KernelCG = @@ -2151,7 +2157,8 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "EXEC CG ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "EXEC CG ON " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; switch (MCommandGroup->getType()) { case detail::CG::Kernel: { @@ -2247,8 +2254,7 @@ void SetArgBasedOnType( const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, - const sycl::context &Context, detail::ArgDesc &Arg, - size_t NextTrueIndex) { + const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex) { switch (Arg.MType) { case kernel_param_kind_t::kind_stream: break; @@ -2338,8 +2344,7 @@ static pi_result SetKernelParamsAndLaunch( auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc, &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) { SetArgBasedOnType(Plugin, Kernel, DeviceImageImpl, getMemAllocationFunc, - Queue->get_context(), Arg, - NextTrueIndex); + Queue->get_context(), Arg, NextTrueIndex); }; applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); @@ -2639,7 +2644,8 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, bool blocking, void *ptr, size_t size, std::vector &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read) { - assert(Queue && "Queue with submitted read write host pipe could not be on host"); + assert(Queue && + "Queue with submitted read write host pipe could not be on host"); detail::HostPipeMapEntry *hostPipeEntry = ProgramManager::getInstance().getHostPipeEntry(PipeName); @@ -2856,7 +2862,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { flushCrossQueueDeps(EventImpls, MWorkerQueue); bool DiscardPiEvent = MQueue && MQueue->supportsDiscardingPiEvents() && - (MCommandGroup->getRequirements().size() == 0); + (MCommandGroup->getRequirements().size() == 0); sycl::detail::pi::PiEvent *Event = DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent; @@ -2876,10 +2882,9 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { MemoryManager::copy( AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(), MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, - Req->MElemSize, Copy->getDst(), - nullptr, Req->MDims, - Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, - Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); + Req->MElemSize, Copy->getDst(), nullptr, Req->MDims, Req->MAccessRange, + Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, Req->MElemSize, + std::move(RawEvents), MEvent->getHandleRef(), MEvent); return PI_SUCCESS; } @@ -2889,8 +2894,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); MemoryManager::copy( - AllocaCmd->getSYCLMemObj(), Copy->getSrc(), - nullptr, Req->MDims, + AllocaCmd->getSYCLMemObj(), Copy->getSrc(), nullptr, Req->MDims, Req->MAccessRange, Req->MAccessRange, /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, AllocaCmd->getMemAllocation(), MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, @@ -2937,7 +2941,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { std::vector &Args = ExecKernel->MArgs; if (MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator) { + backend::ext_intel_esimd_emulator) { for (ArgDesc &Arg : Args) if (kernel_param_kind_t::kind_accessor == Arg.MType) { Requirement *Req = (Requirement *)(Arg.MPtr); @@ -2959,7 +2963,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { reinterpret_cast(ExecKernel->MHostKernel->getPtr()), NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], &NDRDesc.LocalSize[0], 0, nullptr, nullptr); - return PI_SUCCESS; + return PI_SUCCESS; } auto getMemAllocationFunc = [this](Requirement *Req) { @@ -3119,7 +3123,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::BarrierWaitlist: { - assert(MQueue && "Device queue must be present for barrier with wait list command"); + assert(MQueue && + "Device queue must be present for barrier with wait list command"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; std::vector PiEvents = @@ -3224,7 +3229,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::SemaphoreSignal: { - assert(MQueue && "Device queue must be present for semaphore signal command"); + assert(MQueue && + "Device queue must be present for semaphore signal command"); CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get(); const detail::PluginPtr &Plugin = MQueue->getPlugin(); @@ -3348,7 +3354,7 @@ void KernelFusionCommand::emitInstrumentationData() { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + MQueue ? MQueue->getQueueID() : 0); xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, static_cast(MTraceEvent), MInstanceID, @@ -3362,7 +3368,8 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "KERNEL FUSION on " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n" + Stream << "KERNEL FUSION on " + << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n" << "FUSION LIST: {"; bool Initial = true; for (auto *Cmd : MFusionList) { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 628ccdf2593da..63fb4853d88e4 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -373,10 +373,11 @@ class Command { std::string MSubmissionFunctionName; // This flag allows to control whether event should be set complete - // after successfull enqueue of command. Event is considered as "host" event if - // there is no backend representation of event (i.e. getHandleRef() return reference to nullptr value). - // By default the flag is set to true due to most of host operations are - // synchronous. The only asynchronous operation currently is host-task. + // after successfull enqueue of command. Event is considered as "host" event + // if there is no backend representation of event (i.e. getHandleRef() return + // reference to nullptr value). By default the flag is set to true due to most + // of host operations are synchronous. The only asynchronous operation + // currently is host-task. bool MShouldCompleteEventIfPossible = true; /// Indicates that the node will be freed by graph cleanup. Such nodes should @@ -792,8 +793,7 @@ void SetArgBasedOnType( const detail::plugin &Plugin, sycl::detail::pi::PiKernel Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, - const sycl::context &Context, detail::ArgDesc &Arg, - size_t NextTrueIndex); + const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex); void applyFuncOnFilteredArgs( const KernelArgMask *EliminatedArgMask, std::vector &Args, diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 2919932c4e788..2ac97baefb543 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -54,9 +54,10 @@ static bool IsSuitableSubReq(const Requirement *Req) { return Req->MIsSubBuffer; } -static bool isOnSameContext(const ContextImplPtr Context, const QueueImplPtr& Queue) -{ - // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison. +static bool isOnSameContext(const ContextImplPtr Context, + const QueueImplPtr &Queue) { + // Covers case for host usage (nullptr == nullptr) and existing device + // contexts comparison. return Context == queue_impl::getContext(Queue); } @@ -289,8 +290,7 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { auto Context = queue_impl::getContext(Queue); - AllocaCommandBase *AllocaCmd = - findAllocaForReq(Record, Req, Context); + AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Context); assert(AllocaCmd && "There must be alloca for requirement!"); UpdateHostRequirementCommand *UpdateCommand = new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData); @@ -298,8 +298,7 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( // dependencies become invalid if requirement is stored by pointer. const Requirement *StoredReq = UpdateCommand->getRequirement(); - std::set Deps = - findDepsForReq(Record, Req, Context); + std::set Deps = findDepsForReq(Record, Req, Context); std::vector ToCleanUp; for (Command *Dep : Deps) { Command *ConnCmd = @@ -353,8 +352,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); auto Context = queue_impl::getContext(Queue); - std::set Deps = - findDepsForReq(Record, Req, Context); + std::set Deps = findDepsForReq(Record, Req, Context); Deps.insert(AllocaCmdDst); // Get parent allocation of sub buffer to perform full copy of whole buffer if (IsSuitableSubReq(Req)) { @@ -434,8 +432,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( Command *Scheduler::GraphBuilder::remapMemoryObject( MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd, std::vector &ToEnqueue) { - assert(!HostAllocaCmd->getQueue() && - "Host alloca command expected"); + assert(!HostAllocaCmd->getQueue() && "Host alloca command expected"); assert(HostAllocaCmd->MIsActive && "Active alloca command expected"); AllocaCommandBase *LinkedAllocaCmd = HostAllocaCmd->MLinkedAllocaCmd; @@ -490,8 +487,7 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req, if (nullptr == Record || !Record->MMemModified) return nullptr; - std::set Deps = - findDepsForReq(Record, Req, nullptr); + std::set Deps = findDepsForReq(Record, Req, nullptr); AllocaCommandBase *SrcAllocaCmd = findAllocaForReq(Record, Req, Record->MCurContext); @@ -531,7 +527,8 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, auto SYCLMemObj = static_cast(Req->MSYCLMemObj); SYCLMemObj->handleWriteAccessorCreation(); } - // Host accessor is not attached to any queue so no QueueImplPtr object to be sent to getOrInsertMemObjRecord. + // Host accessor is not attached to any queue so no QueueImplPtr object to be + // sent to getOrInsertMemObjRecord. MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req); if (MPrintOptionsArray[BeforeAddHostAcc]) printGraphAsDot("before_addHostAccessor"); @@ -556,8 +553,8 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, insertUpdateHostReqCmd(Record, Req, nullptr, ToEnqueue); // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = - addEmptyCmd(UpdateHostAccCmd, {Req}, Command::BlockReason::HostAccessor, ToEnqueue); + EmptyCommand *EmptyCmd = addEmptyCmd( + UpdateHostAccCmd, {Req}, Command::BlockReason::HostAccessor, ToEnqueue); Req->MBlockedCmd = EmptyCmd; @@ -621,8 +618,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, CanBypassDep |= !doOverlap(Dep.MDepRequirement, Req); // Going through copying memory between contexts is not supported. - if (Dep.MDepCommand) - { + if (Dep.MDepCommand) { auto DepQueue = Dep.MDepCommand->getQueue(); CanBypassDep &= isOnSameContext(Context, DepQueue); } @@ -686,7 +682,8 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) { if (std::strcmp(HUMConfig, "1") == 0) return true; } - // host task & host accessor is covered with no device context but provide required support. + // host task & host accessor is covered with no device context but provide + // required support. if (Ctx == nullptr) return true; @@ -705,8 +702,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { auto Context = queue_impl::getContext(Queue); - AllocaCommandBase *AllocaCmd = findAllocaForReq( - Record, Req, Context, /*AllowConst=*/false); + AllocaCommandBase *AllocaCmd = + findAllocaForReq(Record, Req, Context, /*AllowConst=*/false); if (!AllocaCmd) { std::vector ToCleanUp; @@ -736,8 +733,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // TODO the case where the first alloca is made with a discard mode and // the user pointer is read-only is still not handled: it leads to // unnecessary copy on devices with unified host memory support. - const bool HostUnifiedMemory = - checkHostUnifiedMemory(Context); + const bool HostUnifiedMemory = checkHostUnifiedMemory(Context); SYCLMemObjI *MemObj = Req->MSYCLMemObj; const bool InitFromUserData = Record->MAllocaCommands.empty() && (HostUnifiedMemory || MemObj->isInterop()); @@ -828,10 +824,9 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( AllocaCmd->MIsActive = false; } else { LinkedAllocaCmd->MIsActive = false; - Record->MCurContext =Context; + Record->MCurContext = Context; - std::set Deps = - findDepsForReq(Record, Req, Context); + std::set Deps = findDepsForReq(Record, Req, Context); for (Command *Dep : Deps) { Command *ConnCmd = AllocaCmd->addDep( DepDesc{Dep, Req, LinkedAllocaCmd}, ToCleanUp); @@ -871,8 +866,7 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( Command *Cmd, const std::vector &Reqs, - Command::BlockReason Reason, - std::vector &ToEnqueue) { + Command::BlockReason Reason, std::vector &ToEnqueue) { EmptyCommand *EmptyCmd = new EmptyCommand(); if (!EmptyCmd) @@ -1343,8 +1337,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( /* DepEvents = */ {DepEvent}), CG::CodeplayHostTask, /* Payload */ {})); - ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), nullptr); + ConnectCmd = new ExecCGCommand(std::move(ConnectCG), nullptr); } catch (const std::bad_alloc &) { throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); } @@ -1719,13 +1712,11 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate( NeedMemMoveToHost = true; if (NeedMemMoveToHost) - insertMemoryMove(Record, Req, - nullptr, - ToEnqueue); + insertMemoryMove(Record, Req, nullptr, ToEnqueue); insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue); } std::set Deps = - findDepsForReq(Record, Req, queue_impl::getContext(Queue)); + findDepsForReq(Record, Req, queue_impl::getContext(Queue)); for (Command *Dep : Deps) { if (Dep != NewCmd.get()) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 52eb59b225004..4d26c2a822457 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -110,13 +110,13 @@ EventImplPtr Scheduler::addCG( Command *NewCmd = nullptr; switch (Type) { case CG::UpdateHost: - NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), - AuxiliaryCmds); + NewCmd = + MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), AuxiliaryCmds); NewEvent = NewCmd->getEvent(); break; case CG::CodeplayHostTask: { - auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - nullptr, AuxiliaryCmds); + auto Result = + MGraphBuilder.addCG(std::move(CommandGroup), nullptr, AuxiliaryCmds); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 75c80745ec71c..7e81e964bdc17 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -76,14 +76,15 @@ size_t stream_impl::get_size() const { return BufferSize_; } size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; } -void stream_impl::generateFlushCommand(handler& cgh) -{ +void stream_impl::generateFlushCommand(handler &cgh) { // Create accessor to the flush buffer even if not using it yet. Otherwise // kernel will be a leaf for the flush buffer and scheduler will not be able // to cleanup the kernel. TODO: get rid of finalize method by using host // accessor to the flush buffer. - host_accessor FlushBuffHostAcc(FlushBuf_, cgh); - host_accessor BufHostAcc (Buf_, cgh, range<1>(BufferSize_), id<1>(OffsetSize)); + host_accessor FlushBuffHostAcc(FlushBuf_, + cgh); + host_accessor BufHostAcc( + Buf_, cgh, range<1>(BufferSize_), id<1>(OffsetSize)); cgh.host_task([=] { if (!BufHostAcc.empty()) { @@ -106,14 +107,14 @@ void stream_impl::generateFlushCommand(handler& cgh) }); } - // ABI break: remove - void stream_impl::initStreamHost(QueueImplPtr ){}; +// ABI break: remove +void stream_impl::initStreamHost(QueueImplPtr){}; - // ABI break: remove - void stream_impl::flush(const EventImplPtr &) {}; +// ABI break: remove +void stream_impl::flush(const EventImplPtr &) {}; - // ABI break: remove - void stream_impl::flush() {}; +// ABI break: remove +void stream_impl::flush() {}; } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index 4fc1f4b1d5a8a..670931c815185 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -68,7 +68,7 @@ class __SYCL_EXPORT stream_impl { return PropList_.get_property(); } - void generateFlushCommand(handler& cgh); + void generateFlushCommand(handler &cgh); private: // Size of the stream buffer diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 7440a3b816ce2..68207bec67d53 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -209,8 +209,9 @@ void SYCLMemObjT::detachMemoryObject( !MOwnNativeHandle || (MInteropContext && !MInteropContext->isOwnedByRuntime()); - if (MRecord && MRecord->MCurContext && MRecord->MCurContext->isOwnedByRuntime() && - !InteropObjectsUsed && (!MHostPtrProvided || MIsInternal)) { + if (MRecord && MRecord->MCurContext && + MRecord->MCurContext->isOwnedByRuntime() && !InteropObjectsUsed && + (!MHostPtrProvided || MIsInternal)) { bool okToDefer = GlobalHandler::instance().isOkToDefer(); if (okToDefer) Scheduler::getInstance().deferMemObjRelease(Self); diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 753c27d5f678d..57c54275069e6 100755 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -73,33 +73,33 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, return nullptr; std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - pi_context C = CtxImpl->getHandleRef(); - const PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_result Error = PI_ERROR_INVALID_VALUE; - - switch (Kind) { - case alloc::host: { - std::array Props; - auto PropsIter = Props.begin(); - - if (PropList.has_property() && - Ctxt.get_platform().has_extension( - "cl_intel_mem_alloc_buffer_location")) { - *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; - *PropsIter++ = PropList - .get_property() - .get_buffer_location(); - } + pi_context C = CtxImpl->getHandleRef(); + const PluginPtr &Plugin = CtxImpl->getPlugin(); + pi_result Error = PI_ERROR_INVALID_VALUE; + + switch (Kind) { + case alloc::host: { + std::array Props; + auto PropsIter = Props.begin(); + + if (PropList.has_property< + sycl::ext::intel::experimental::property::usm::buffer_location>() && + Ctxt.get_platform().has_extension( + "cl_intel_mem_alloc_buffer_location")) { + *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; + *PropsIter++ = PropList + .get_property() + .get_buffer_location(); + } - assert(PropsIter >= Props.begin() && PropsIter < Props.end()); - *PropsIter++ = 0; // null-terminate property list + assert(PropsIter >= Props.begin() && PropsIter < Props.end()); + *PropsIter++ = 0; // null-terminate property list - Error = Plugin->call_nocheck( - &RetVal, C, Props.data(), Size, Alignment); + Error = Plugin->call_nocheck( + &RetVal, C, Props.data(), Size, Alignment); - break; + break; } case alloc::device: case alloc::shared: diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 1947e31b7daaa..e1bc8c894f311 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -83,8 +83,7 @@ class DependsOnTests : public ::testing::Test { detail::Command *NewCmd = MS.addCG( std::move(CmdGroup), - Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl, - ToEnqueue); + Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl, ToEnqueue); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 437f98b1579a6..c3681bfc07a3b 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -245,7 +245,8 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) { checkCleanupOnLeafUpdate( MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) { detail::Command *Leaf = *Record->MWriteLeaves.begin(); - MS.addEmptyCmd(Leaf, {&MockReq}, detail::Command::BlockReason::HostTask, ToEnqueue); + MS.addEmptyCmd(Leaf, {&MockReq}, detail::Command::BlockReason::HostTask, + ToEnqueue); }); checkCleanupOnLeafUpdate( MS, nullptr, Buf, MockReq, [&](detail::MemObjRecord *Record) { diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index bffdf6af4afe2..9ce9a1f944349 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -91,8 +91,7 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { // Check that sequential memory movements submitted to the same in-order // queue do not depend on each other. - detail::Command *Cmd = - MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds); + detail::Command *Cmd = MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds); detail::EnqueueResultT Res; auto ReadLock = MS.acquireGraphReadLock(); MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); diff --git a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp index 71f30f91117a0..565c3b2a2314c 100644 --- a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp +++ b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp @@ -60,8 +60,8 @@ TEST_F(SchedulerTest, LeafLimitDiffContexts) { std::vector ToEnqueue; AllocaCmd = MS.getOrCreateAllocaForReq( Rec, &MockReq, detail::getSyclObjImpl(Queue), ToEnqueue); - std::ignore = MS.getOrCreateAllocaForReq( - Rec, &MockReq, nullptr, ToEnqueue); + std::ignore = + MS.getOrCreateAllocaForReq(Rec, &MockReq, nullptr, ToEnqueue); DepCmd = std::make_unique(detail::getSyclObjImpl(Queue), MockReq); } diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp index 39146ffaa95e8..e0732926537b0 100644 --- a/sycl/unittests/scheduler/LeavesCollection.cpp +++ b/sycl/unittests/scheduler/LeavesCollection.cpp @@ -36,8 +36,7 @@ createGenericCommand(const std::shared_ptr &Q) { return std::shared_ptr{new MockCommand(Q, Command::RUN_CG)}; } -std::shared_ptr -createEmptyCommand(const Requirement &Req) { +std::shared_ptr createEmptyCommand(const Requirement &Req) { EmptyCommand *Cmd = new EmptyCommand(); Cmd->addRequirement(/* DepCmd = */ nullptr, /* AllocaCmd = */ nullptr, &Req); Cmd->MBlockReason = Command::BlockReason::HostAccessor; diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp index 6ae6b9bfc2344..b08b211d1e2dc 100644 --- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp +++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp @@ -69,8 +69,7 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { std::vector &) {}; std::shared_ptr Record{ - new sycl::detail::MemObjRecord(nullptr, 10, - AllocaDep)}; + new sycl::detail::MemObjRecord(nullptr, 10, AllocaDep)}; MemObjMock MemObj(Record); Req.MSYCLMemObj = &MemObj; diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp index 83a0702861141..24a19977844fb 100644 --- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp +++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp @@ -152,8 +152,7 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // No special handling required: alloca commands are created one after // another and the transfer is done via a write operation. - detail::MemObjRecord *Record = - MS.getOrInsertMemObjRecord(nullptr, &Req); + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(nullptr, &Req); std::vector AuxCmds; detail::AllocaCommandBase *HostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds); diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index 330ff7e0f02d2..c90db25fc019a 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -125,14 +125,12 @@ TEST_F(SchedulerTest, QueueFlushing) { detail::AllocaCommand HostAllocaCmd = detail::AllocaCommand(nullptr, MockReq); - detail::MemCpyCommand MemCpyCmd{MockReq, &AllocaCmd, - MockReq, &HostAllocaCmd, - QueueImplA, nullptr}; + detail::MemCpyCommand MemCpyCmd{MockReq, &AllocaCmd, MockReq, + &HostAllocaCmd, QueueImplA, nullptr}; testCommandEnqueue(&MemCpyCmd, QueueImplB, MockReq); - detail::MemCpyCommandHost MemCpyCmdHost{MockReq, &AllocaCmd, - MockReq, &MockHostPtr, - QueueImplA, nullptr}; + detail::MemCpyCommandHost MemCpyCmdHost{MockReq, &AllocaCmd, MockReq, + &MockHostPtr, QueueImplA, nullptr}; testCommandEnqueue(&MemCpyCmdHost, QueueImplB, MockReq); std::unique_ptr CG{ diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index d1e7f22aa9485..789961b081da8 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -11,8 +11,8 @@ #include #include -#include #include +#include using namespace sycl; From c76484daf99edc74b77d6722fdbb4d62b707df56 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 19 Jun 2024 05:56:31 -0700 Subject: [PATCH 31/52] fix clang-format Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/program_impl.cpp | 93 ++++++++-------- sycl/source/detail/usm/usm_impl.cpp | 160 ++++++++++++++-------------- 2 files changed, 126 insertions(+), 127 deletions(-) mode change 100755 => 100644 sycl/source/detail/usm/usm_impl.cpp diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index df95614d872c3..f3ac2185627f9 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -94,22 +94,22 @@ program_impl::program_impl( } } - std::vector Devices(get_pi_devices()); - std::vector Programs; - bool NonInterOpToLink = false; - for (const auto &Prg : ProgramList) { - if (!Prg->MLinkable && NonInterOpToLink) - continue; - NonInterOpToLink |= !Prg->MLinkable; - Programs.push_back(Prg->MProgram); - } - const PluginPtr &Plugin = getPlugin(); - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck( - MContext->getHandleRef(), Devices.size(), Devices.data(), - LinkOptions.c_str(), Programs.size(), Programs.data(), nullptr, - nullptr, &MProgram); - Plugin->checkPiResult(Err); + std::vector Devices(get_pi_devices()); + std::vector Programs; + bool NonInterOpToLink = false; + for (const auto &Prg : ProgramList) { + if (!Prg->MLinkable && NonInterOpToLink) + continue; + NonInterOpToLink |= !Prg->MLinkable; + Programs.push_back(Prg->MProgram); + } + const PluginPtr &Plugin = getPlugin(); + sycl::detail::pi::PiResult Err = + Plugin->call_nocheck( + MContext->getHandleRef(), Devices.size(), Devices.data(), + LinkOptions.c_str(), Programs.size(), Programs.data(), nullptr, + nullptr, &MProgram); + Plugin->checkPiResult(Err); } program_impl::program_impl(ContextImplPtr Context, @@ -236,22 +236,22 @@ void program_impl::link(std::string LinkOptions) { const char *LinkOpts = SYCLConfig::get(); if (!LinkOpts) { LinkOpts = LinkOptions.c_str(); - } + } - // Plugin resets MProgram with a new pi_program as a result of the call to - // "piProgramLink". Thus, we need to release MProgram before the call to - // piProgramLink. - if (MProgram != nullptr) - Plugin->call(MProgram); - - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck( - MContext->getHandleRef(), Devices.size(), Devices.data(), LinkOpts, - /*num_input_programs*/ 1, &MProgram, nullptr, nullptr, &MProgram); - Plugin->checkPiResult(Err); - MLinkOptions = LinkOptions; - MBuildOptions = LinkOptions; - MState = program_state::linked; + // Plugin resets MProgram with a new pi_program as a result of the call to + // "piProgramLink". Thus, we need to release MProgram before the call to + // piProgramLink. + if (MProgram != nullptr) + Plugin->call(MProgram); + + sycl::detail::pi::PiResult Err = + Plugin->call_nocheck( + MContext->getHandleRef(), Devices.size(), Devices.data(), LinkOpts, + /*num_input_programs*/ 1, &MProgram, nullptr, nullptr, &MProgram); + Plugin->checkPiResult(Err); + MLinkOptions = LinkOptions; + MBuildOptions = LinkOptions; + MState = program_state::linked; } bool program_impl::has_kernel(std::string KernelName, @@ -363,24 +363,23 @@ std::pair program_impl::get_pi_kernel_arg_mask_pair(const std::string &KernelName) const { std::pair Result; - const PluginPtr &Plugin = getPlugin(); - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck( - MProgram, KernelName.c_str(), &Result.first); - if (Err == PI_ERROR_INVALID_KERNEL_NAME) { - throw invalid_object_error( - "This instance of program does not contain the kernel requested", - Err); - } - Plugin->checkPiResult(Err); + const PluginPtr &Plugin = getPlugin(); + sycl::detail::pi::PiResult Err = + Plugin->call_nocheck( + MProgram, KernelName.c_str(), &Result.first); + if (Err == PI_ERROR_INVALID_KERNEL_NAME) { + throw invalid_object_error( + "This instance of program does not contain the kernel requested", Err); + } + Plugin->checkPiResult(Err); - // Some PI Plugins (like OpenCL) require this call to enable USM - // For others, PI will turn this into a NOP. - if (getContextImplPtr()->getPlatformImpl()->supports_usm()) - Plugin->call( - Result.first, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE); + // Some PI Plugins (like OpenCL) require this call to enable USM + // For others, PI will turn this into a NOP. + if (getContextImplPtr()->getPlatformImpl()->supports_usm()) + Plugin->call( + Result.first, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE); - return Result; + return Result; } std::vector diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp old mode 100755 new mode 100644 index 57c54275069e6..7237e88be440f --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -100,20 +100,20 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, &RetVal, C, Props.data(), Size, Alignment); break; - } - case alloc::device: - case alloc::shared: - case alloc::unknown: { - RetVal = nullptr; - Error = PI_ERROR_INVALID_VALUE; - break; - } - } + } + case alloc::device: + case alloc::shared: + case alloc::unknown: { + RetVal = nullptr; + Error = PI_ERROR_INVALID_VALUE; + break; + } + } - // Error is for debugging purposes. - // The spec wants a nullptr returned, not an exception. - if (Error != PI_SUCCESS) - return nullptr; + // Error is for debugging purposes. + // The spec wants a nullptr returned, not an exception. + if (Error != PI_SUCCESS) + return nullptr; #ifdef XPTI_ENABLE_INSTRUMENTATION xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr", reinterpret_cast(RetVal)); @@ -139,79 +139,79 @@ void *alignedAllocInternal(size_t Alignment, size_t Size, if (Size == 0) return nullptr; - pi_context C = CtxImpl->getHandleRef(); - const PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_result Error = PI_ERROR_INVALID_VALUE; - pi_device Id; + pi_context C = CtxImpl->getHandleRef(); + const PluginPtr &Plugin = CtxImpl->getPlugin(); + pi_result Error = PI_ERROR_INVALID_VALUE; + pi_device Id; - switch (Kind) { - case alloc::device: { - Id = DevImpl->getHandleRef(); + switch (Kind) { + case alloc::device: { + Id = DevImpl->getHandleRef(); - std::array Props; - auto PropsIter = Props.begin(); + std::array Props; + auto PropsIter = Props.begin(); - // Buffer location is only supported on FPGA devices - if (PropList.has_property() && - DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) { - *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; - *PropsIter++ = PropList - .get_property() - .get_buffer_location(); - } + // Buffer location is only supported on FPGA devices + if (PropList.has_property< + sycl::ext::intel::experimental::property::usm::buffer_location>() && + DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) { + *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; + *PropsIter++ = PropList + .get_property() + .get_buffer_location(); + } - assert(PropsIter >= Props.begin() && PropsIter < Props.end()); - *PropsIter++ = 0; // null-terminate property list + assert(PropsIter >= Props.begin() && PropsIter < Props.end()); + *PropsIter++ = 0; // null-terminate property list - Error = Plugin->call_nocheck( - &RetVal, C, Id, Props.data(), Size, Alignment); + Error = Plugin->call_nocheck( + &RetVal, C, Id, Props.data(), Size, Alignment); - break; - } - case alloc::shared: { - Id = DevImpl->getHandleRef(); - - std::array Props; - auto PropsIter = Props.begin(); - - if (PropList.has_property< - sycl::ext::oneapi::property::usm::device_read_only>()) { - *PropsIter++ = PI_MEM_ALLOC_FLAGS; - *PropsIter++ = PI_MEM_ALLOC_DEVICE_READ_ONLY; - } - - if (PropList.has_property() && - DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) { - *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; - *PropsIter++ = PropList - .get_property() - .get_buffer_location(); - } - - assert(PropsIter >= Props.begin() && PropsIter < Props.end()); - *PropsIter++ = 0; // null-terminate property list - - Error = Plugin->call_nocheck( - &RetVal, C, Id, Props.data(), Size, Alignment); - - break; - } - case alloc::host: - case alloc::unknown: { - RetVal = nullptr; - Error = PI_ERROR_INVALID_VALUE; - break; + break; + } + case alloc::shared: { + Id = DevImpl->getHandleRef(); + + std::array Props; + auto PropsIter = Props.begin(); + + if (PropList.has_property< + sycl::ext::oneapi::property::usm::device_read_only>()) { + *PropsIter++ = PI_MEM_ALLOC_FLAGS; + *PropsIter++ = PI_MEM_ALLOC_DEVICE_READ_ONLY; } + + if (PropList.has_property< + sycl::ext::intel::experimental::property::usm::buffer_location>() && + DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) { + *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; + *PropsIter++ = PropList + .get_property() + .get_buffer_location(); } - // Error is for debugging purposes. - // The spec wants a nullptr returned, not an exception. - if (Error != PI_SUCCESS) - return nullptr; + assert(PropsIter >= Props.begin() && PropsIter < Props.end()); + *PropsIter++ = 0; // null-terminate property list + + Error = Plugin->call_nocheck( + &RetVal, C, Id, Props.data(), Size, Alignment); + + break; + } + case alloc::host: + case alloc::unknown: { + RetVal = nullptr; + Error = PI_ERROR_INVALID_VALUE; + break; + } + } + + // Error is for debugging purposes. + // The spec wants a nullptr returned, not an exception. + if (Error != PI_SUCCESS) + return nullptr; return RetVal; } @@ -250,9 +250,9 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, void freeInternal(void *Ptr, const context_impl *CtxImpl) { if (Ptr == nullptr) return; - pi_context C = CtxImpl->getHandleRef(); - const PluginPtr &Plugin = CtxImpl->getPlugin(); - Plugin->call(C, Ptr); + pi_context C = CtxImpl->getHandleRef(); + const PluginPtr &Plugin = CtxImpl->getPlugin(); + Plugin->call(C, Ptr); } void free(void *Ptr, const context &Ctxt, From 61d1c6208e4ef52c3b72908b9f904ba9869ffdb5 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 19 Jun 2024 08:52:31 -0700 Subject: [PATCH 32/52] fix connect task queue Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/graph_builder.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 2ac97baefb543..7cfc0446fdd69 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1330,7 +1330,8 @@ Command *Scheduler::GraphBuilder::connectDepEvent( try { std::unique_ptr HT(new detail::HostTask); std::unique_ptr ConnectCG(new detail::CGHostTask( - std::move(HT), /* Queue = */ {}, /* Context = */ {}, /* Args = */ {}, + std::move(HT), /* Queue = */ Cmd->getQueue(), /* Context = */ {}, + /* Args = */ {}, detail::CG::StorageInitHelper( /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, From 5814e466577f0b99d6d6095d3e0d68a25452203c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 20 Jun 2024 06:30:09 -0700 Subject: [PATCH 33/52] fix bugs Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 11 +++++++++-- sycl/source/detail/queue_impl.cpp | 4 ++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 7d91129f25b51..a270867f6b637 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -262,7 +262,8 @@ void event_impl::wait_and_throw( void event_impl::checkProfilingPreconditions() const { std::weak_ptr EmptyPtr; - if (!EmptyPtr.owner_before(MQueue) && !MQueue.owner_before(EmptyPtr)) { + if (!MIsHostEvent && !EmptyPtr.owner_before(MQueue) && + !MQueue.owner_before(EmptyPtr)) { throw sycl::exception(make_error_code(sycl::errc::invalid), "Profiling information is unavailable as the event " "has no associated queue."); @@ -300,7 +301,7 @@ event_impl::get_profiling_info() { // made by forcing the re-sync of submit time to start time is less than // 0.5ms. These timing values were obtained empirically using an integrated // Intel GPU). - if (MEventFromSubmittedExecCommandBuffer && MEvent) { + if (MEventFromSubmittedExecCommandBuffer && !MIsHostEvent && MEvent) { uint64_t StartTime = get_event_profiling_info( this->getHandleRef(), this->getPlugin()); @@ -546,6 +547,12 @@ void event_impl::setSubmissionTime() { e.what()); std::rethrow_exception(std::current_exception()); } + } else { + // Returning host time + using namespace std::chrono; + MSubmitTime = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); } } else { // Capture the host timestamp for a return value of function call diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 83f33688ed0b1..572b0b8cf568a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -299,12 +299,12 @@ void queue_impl::addEvent(const event &Event) { // if there is no command on the event, we cannot track it with MEventsWeak // as that will leave it with no owner. Track in MEventsShared only if we're // unable to call piQueueFinish during wait. - if (EImpl->isHost() || MEmulateOOO) + if (MEmulateOOO) addSharedEvent(Event); } // As long as the queue supports piQueueFinish we only need to store events // for unenqueued commands and host tasks. - else if (EImpl->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) { + else if (MEmulateOOO || EImpl->getHandleRef() == nullptr) { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; MEventsWeak.push_back(std::move(EventWeakPtr)); From a03468173acf6f9c58593685069d030955a4782c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 20 Jun 2024 09:43:06 -0700 Subject: [PATCH 34/52] fix work with graph Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 4 ++-- sycl/source/detail/queue_impl.cpp | 21 ++++++++++++++++----- sycl/source/detail/queue_impl.hpp | 16 +++++++++++++--- sycl/source/detail/scheduler/commands.cpp | 20 ++++++++++---------- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index a270867f6b637..e203924d2d612 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -48,7 +48,7 @@ event_impl::~event_impl() { } void event_impl::waitInternal(bool *Success) { - if (MEvent) { + if (!MIsHostEvent && MEvent) { // Wait for the native event sycl::detail::pi::PiResult Err = getPlugin()->call_nocheck(1, &MEvent); @@ -390,7 +390,7 @@ event_impl::get_info() { return sycl::info::event_command_status::submitted; } - return MState.load() != HES_Complete + return MIsHostEvent && MState.load() != HES_Complete ? sycl::info::event_command_status::submitted : info::event_command_status::complete; } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 572b0b8cf568a..a5f9ae9964ac6 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -696,6 +696,19 @@ void queue_impl::revisitUnenqueuedCommandsState( const EventImplPtr &CompletedHostTask) { if (MIsInorder) return; + + std::unique_lock Lock{MMutex, std::try_to_lock}; + if (Lock.owns_lock()) + doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph()); + else { + std::lock_guard RequestLock(MMissedCleanupRequestsMtx); + MMissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); + } +} + +void queue_impl::doUnenqueuedCommandCleanup( + const std::shared_ptr + &Graph) { auto tryToCleanup = [](DependencyTrackingItems &Deps) { if (Deps.LastBarrier && Deps.LastBarrier->isEnqueued()) { Deps.LastBarrier = nullptr; @@ -713,14 +726,12 @@ void queue_impl::revisitUnenqueuedCommandsState( Deps.UnenqueuedCmdEvents.end()); } }; - std::lock_guard Lock{MMutex}; // Barrier enqueue could be significantly postponed due to host task // dependency if any. No guarantee that it will happen while same graph deps // are still recording. - if (auto Graph = CompletedHostTask->getCommandGraph()) { - if (Graph == getCommandGraph()) - tryToCleanup(MExtGraphDeps); - } else + if (Graph && Graph == getCommandGraph()) + tryToCleanup(MExtGraphDeps); + else tryToCleanup(MDefaultGraphDeps); } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index d0a74cc80c793..aa3dd9fc780bf 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -93,7 +93,7 @@ class queue_impl { /// \param PropList is a list of properties to use for queue construction. queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){}; + : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. @@ -749,6 +749,9 @@ class queue_impl { // tasks and host tasks is applicable for out of order queues only. Not neede // for in order ones. void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask); + void doUnenqueuedCommandCleanup( + const std::shared_ptr + &Graph); static ContextImplPtr getContext(const QueueImplPtr &Queue) { return Queue ? Queue->getContextImplPtr() : nullptr; @@ -790,13 +793,12 @@ class queue_impl { EventToBuildDeps = getSyclObjImpl(EventRet); } else { const CG::CGTYPE Type = Handler.getType(); - + std::lock_guard Lock{MMutex}; // The following code supports barrier synchronization if host task is // involved in the scenario. Native barriers cannot handle host task // dependency so in the case where some commands were not enqueued // (blocked), we track them to prevent barrier from being enqueued // earlier. - std::lock_guard Lock{MMutex}; auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps; if (Type == CG::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { Handler.depends_on(Deps.UnenqueuedCmdEvents); @@ -814,6 +816,10 @@ class queue_impl { } else Deps.UnenqueuedCmdEvents.push_back(EventRetImpl); } + std::lock_guard RequestLock(MMissedCleanupRequestsMtx); + for (auto &UpdatedGraph : MMissedCleanupRequests) + doUnenqueuedCommandCleanup(UpdatedGraph); + MMissedCleanupRequests.clear(); } } @@ -966,6 +972,10 @@ class queue_impl { unsigned long long MQueueID; static std::atomic MNextAvailableQueueID; + std::deque> + MMissedCleanupRequests; + std::mutex MMissedCleanupRequestsMtx; + friend class sycl::ext::oneapi::experimental::detail::node_impl; }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3d51fe7a1c12f..6322b904fd6bc 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2954,16 +2954,16 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Plugin->call(RawEvents.size(), &RawEvents[0]); } - assert(MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator); - if (MEvent != nullptr) - MEvent->setHostEnqueueTime(); - MQueue->getPlugin()->call( - nullptr, - reinterpret_cast(ExecKernel->MHostKernel->getPtr()), - NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], - &NDRDesc.LocalSize[0], 0, nullptr, nullptr); - return PI_SUCCESS; + assert(MQueue->getDeviceImplPtr()->getBackend() == + backend::ext_intel_esimd_emulator); + if (MEvent != nullptr) + MEvent->setHostEnqueueTime(); + MQueue->getPlugin()->call( + nullptr, + reinterpret_cast(ExecKernel->MHostKernel->getPtr()), + NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], + &NDRDesc.LocalSize[0], 0, nullptr, nullptr); + return PI_SUCCESS; } auto getMemAllocationFunc = [this](Requirement *Req) { From c274c5ec74a0e92306824194a7f5ef9509c83df2 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 20 Jun 2024 10:14:54 -0700 Subject: [PATCH 35/52] fix tracing tests Signed-off-by: Tikhomirova, Kseniya --- .../Tracing/code_location_queue_submit.cpp | 13 +++---------- sycl/test-e2e/Tracing/task_execution.cpp | 18 ++++++------------ .../Tracing/task_execution_handler.cpp | 4 ++-- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/sycl/test-e2e/Tracing/code_location_queue_submit.cpp b/sycl/test-e2e/Tracing/code_location_queue_submit.cpp index 6ebfe43e936e5..ce780f5e81725 100644 --- a/sycl/test-e2e/Tracing/code_location_queue_submit.cpp +++ b/sycl/test-e2e/Tracing/code_location_queue_submit.cpp @@ -5,8 +5,7 @@ // Test tracing of the code location data for queue.submit in case of failure // (exception generation) -// First queue creation (id = 0) is queue created on line 15. -// The second queue is a host queue created on first scheduler usage. +// First queue creation (id = 0) is queue created on line 17. #include #include @@ -19,16 +18,10 @@ int main() { unsigned char *HostAllocDst = NULL; // CHECK: [SYCL] Queue create: // CHECK-DAG: queue_handle : {{.*}} - // CHECK-DAG: queue_id : 0 - // CHECK-DAG: is_inorder : false - // CHECK-DAG: sycl_device : {{.*}} - // CHECK-DAG: sycl_device_name : {{.*}} - // CHECK-DAG: sycl_context : {{.*}} - // CHECK-NEXT: [SYCL] Queue create: // CHECK-DAG: queue_id : 1 // CHECK-DAG: is_inorder : false // CHECK-DAG: sycl_device : {{.*}} - // CHECK-DAG: sycl_device_name : SYCL host device + // CHECK-DAG: sycl_device_name : {{.*}} // CHECK-DAG: sycl_context : {{.*}} // CHECK: [SYCL] Runtime reports: // CHECK-NEXT: what: NULL pointer argument in memory copy operation. -30 (PI_ERROR_INVALID_VALUE) @@ -44,6 +37,6 @@ int main() { sycl::free(HostAllocSrc, Q); } // CHECK-NEXT: [SYCL] Queue destroy: - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 return !ExceptionCaught; } diff --git a/sycl/test-e2e/Tracing/task_execution.cpp b/sycl/test-e2e/Tracing/task_execution.cpp index d591c20b8f6c0..b4932df0eda55 100644 --- a/sycl/test-e2e/Tracing/task_execution.cpp +++ b/sycl/test-e2e/Tracing/task_execution.cpp @@ -15,38 +15,32 @@ int main() { Q.copy(AllocDst, AllocSrc, 1).wait(); // CHECK: [SYCL] Queue create: // CHECK-DAG: queue_handle : {{.*}} - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 // CHECK-DAG: is_inorder : false // CHECK-DAG: sycl_device : {{.*}} // CHECK-DAG: sycl_device_name : {{.*}} // CHECK-DAG: sycl_context : {{.*}} // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}}) - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 // CHECK-DAG: memory_size : 1 // CHECK-DAG: value_set : 0 // CHECK-DAG: memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: [SYCL] Task end (event={{.*}},instanceID={{.*}}) // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}}) - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 // CHECK-DAG: memory_size : 1 // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: [SYCL] Task end (event={{.*}},instanceID={{.*}}) - // CHECK-NEXT: [SYCL] Queue create: - // CHECK-DAG: queue_id : 1 - // CHECK-DAG: is_inorder : false - // CHECK-DAG: sycl_device : {{.*}} - // CHECK-DAG: sycl_device_name : SYCL host device - // CHECK-DAG: sycl_context : {{.*}} Q.single_task([]() {}).wait(); // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}}) // CHECK-DAG: enqueue_kernel_data : {{.*}} // CHECK-DAG: sym_column_no : {{.*}} - // CHECK-DAG: sym_line_no : 43 + // CHECK-DAG: sym_line_no : 37 // CHECK-DAG: sym_source_file_name : {{.*}}task_execution.cpp - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 // CHECK-DAG: sym_function_name : typeinfo name for main::E2ETestKernel // CHECK-DAG: from_source : {{.*}} // CHECK-DAG: sycl_device_name : {{.*}} @@ -55,7 +49,7 @@ int main() { // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: [SYCL] Task end (event={{.*}},instanceID={{.*}}) // CHECK-NEXT: [SYCL] Queue destroy: - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 sycl::free(AllocSrc, Q); sycl::free(AllocDst, Q); } diff --git a/sycl/test-e2e/Tracing/task_execution_handler.cpp b/sycl/test-e2e/Tracing/task_execution_handler.cpp index 0563275f81312..a208fe6655bda 100644 --- a/sycl/test-e2e/Tracing/task_execution_handler.cpp +++ b/sycl/test-e2e/Tracing/task_execution_handler.cpp @@ -16,7 +16,7 @@ int main() { { cgh.memset(AllocSrc, 0, 1); }) .wait(); // CHECK: [SYCL] Task begin (event={{.*}},instanceID={{.*}}) - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 // CHECK-DAG: sym_column_no : {{.*}} // CHECK-DAG: sym_function_name : {{.*}} // CHECK-DAG: kernel_name : {{.*}} @@ -27,7 +27,7 @@ int main() { // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: [SYCL] Task end (event={{.*}},instanceID={{.*}}) // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}}) - // CHECK-DAG: queue_id : 0 + // CHECK-DAG: queue_id : 1 // CHECK-DAG: sym_column_no : {{.*}} // CHECK-DAG: sym_function_name : {{.*}} // CHECK-DAG: kernel_name : {{.*}} From f50526bf29351cbc0d897ae6a59c699aca910522 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 21 Jun 2024 04:23:03 -0700 Subject: [PATCH 36/52] fix test Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 4d26c2a822457..905ca889aaf0d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -207,7 +207,7 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) { { WriteLockT Lock = acquireWriteLock(); NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds); - // Command was not creted because there were no operations with + // Command was not created because there were no operations with // buffer. if (!NewCmd) return nullptr; @@ -232,7 +232,9 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) { throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); } catch (...) { - NewCmd->getQueue()->reportAsyncException(std::current_exception()); + auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue(); + assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null"); + WorkerQueue->reportAsyncException(std::current_exception()); } EventImplPtr NewEvent = NewCmd->getEvent(); cleanupCommands(ToCleanUp); From 2bd06e3a3ab0170ce0dfef9ace4ae16573ce7c69 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 24 Jun 2024 04:17:25 -0700 Subject: [PATCH 37/52] update win symbols Signed-off-by: Tikhomirova, Kseniya --- sycl/test/abi/sycl_symbols_windows.dump | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index e8610211e8572..c091a7751a0cc 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -41,18 +41,12 @@ ??$get_info@U?$max_work_groups@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$00@23@XZ ??$get_info@U?$max_work_groups@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$01@23@XZ ??$get_info@U?$max_work_groups@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$02@23@XZ +??$get_info@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uarchitecture@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AW4architecture@experimental@oneapi@ext@23@XZ ??$get_info@Uatomic_fence_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_fence_scope_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ @@ -108,6 +102,12 @@ ??$get_info_impl@U?$max_work_item_sizes@$00@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$00@12@XZ ??$get_info_impl@U?$max_work_item_sizes@$01@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$01@12@XZ ??$get_info_impl@U?$max_work_item_sizes@$02@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$02@12@XZ +??$get_info_impl@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uarchitecture@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AW4architecture@experimental@oneapi@ext@12@XZ ??$get_info_impl@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ @@ -4080,7 +4080,6 @@ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z -?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?ext_oneapi_advise_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEBX_KW4_pi_mem_advice@@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z @@ -4096,7 +4095,6 @@ ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@0AEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@PEAX111@Z -?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z @@ -4158,6 +4156,7 @@ ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vstream@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVstream@34@@Z ?ext_oneapi_prefetch_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KV?$vector@IV?$allocator@I@std@@@6@PEAI@Z +?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ ?ext_oneapi_set_external_event@queue@_V1@sycl@@QEAAXAEBVevent@23@@Z ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@_K@Z @@ -4205,6 +4204,7 @@ ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z +?generateFlushCommand@stream_impl@detail@_V1@sycl@@QEAAXAEAVhandler@34@@Z ?get@context@_V1@sycl@@QEBAPEAU_cl_context@@XZ ?get@device@_V1@sycl@@QEBAPEAU_cl_device_id@@XZ ?get@kernel@_V1@sycl@@QEBAPEAU_cl_kernel@@XZ @@ -4655,6 +4655,7 @@ ?useHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NXZ ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z ?usesPinnedHostMemory@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ +?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyKernelInvoc@handler@_V1@sycl@@AEAAXAEBVkernel@23@@Z ?verifyUsedKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?verifyUsedKernelBundleInternal@handler@_V1@sycl@@AEAAXVstring_view@detail@23@@Z From 5fbcb1ead2551a055366f906a093c9267ccaf978 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 24 Jun 2024 05:17:33 -0700 Subject: [PATCH 38/52] fix format Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/stream_impl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 7e81e964bdc17..1ba09ed36369c 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -108,13 +108,13 @@ void stream_impl::generateFlushCommand(handler &cgh) { } // ABI break: remove -void stream_impl::initStreamHost(QueueImplPtr){}; +void stream_impl::initStreamHost(QueueImplPtr){} // ABI break: remove -void stream_impl::flush(const EventImplPtr &) {}; +void stream_impl::flush(const EventImplPtr &) {} // ABI break: remove -void stream_impl::flush() {}; +void stream_impl::flush() {} } // namespace detail } // namespace _V1 From d5d15bf8f4b4317e3a9f43ce179a65f7a195f849 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 24 Jun 2024 08:28:19 -0700 Subject: [PATCH 39/52] fix formatting Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/stream_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 1ba09ed36369c..b9f70581ac7a8 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -108,7 +108,7 @@ void stream_impl::generateFlushCommand(handler &cgh) { } // ABI break: remove -void stream_impl::initStreamHost(QueueImplPtr){} +void stream_impl::initStreamHost(QueueImplPtr) {} // ABI break: remove void stream_impl::flush(const EventImplPtr &) {} From e185cbcca90a9d76827c95fe211aace1c7284f95 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 25 Jun 2024 08:25:30 -0700 Subject: [PATCH 40/52] self review comments fix Signed-off-by: Tikhomirova, Kseniya --- sycl/source/context.cpp | 2 +- sycl/source/detail/buffer_impl.cpp | 4 +- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/detail/event_impl.hpp | 2 +- sycl/source/detail/memory_manager.cpp | 2 +- sycl/source/detail/platform_impl.hpp | 6 -- sycl/source/detail/queue_impl.cpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 84 ++++++++----------- sycl/source/detail/scheduler/scheduler.hpp | 10 +-- sycl/source/device.cpp | 2 +- sycl/source/event.cpp | 2 +- sycl/source/kernel.cpp | 2 +- sycl/source/platform.cpp | 2 +- sycl/source/queue.cpp | 2 +- .../test-e2e/SubGroup/sub_groups_sycl2020.cpp | 4 - 15 files changed, 52 insertions(+), 76 deletions(-) diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index 1261096b82047..e4c7404c7b078 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -127,7 +127,7 @@ context::get_backend_info() const { cl_context context::get() const { return impl->get(); } bool context::is_host() const { - assert(true && "context::is_host should not be called in implementation."); + assert(false && "context::is_host should not be called in implementation."); return false; } diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index f13444107e9eb..1795992594078 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -24,7 +24,9 @@ void *buffer_impl::allocateMem(ContextImplPtr Context, bool InitFromUserData, sycl::detail::pi::PiEvent &OutEventToWait) { bool HostPtrReadOnly = false; BaseT::determineHostPtr(Context, InitFromUserData, HostPtr, HostPtrReadOnly); - + assert(!(nullptr == HostPtr && BaseT::useHostPtr() && !Context) && + "Internal error. Allocating memory on the host " + "while having use_host_ptr property"); return MemoryManager::allocateMemBuffer( std::move(Context), this, HostPtr, HostPtrReadOnly, BaseT::getSizeInBytes(), BaseT::MInteropEvent, BaseT::MInteropContext, diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index e203924d2d612..f4ad52221ed37 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -367,7 +367,7 @@ uint64_t event_impl::get_profiling_info() { } template <> uint32_t event_impl::get_info() { - if (MEvent) { + if (!MIsHostEvent && MEvent) { return get_event_info(this->getHandleRef(), this->getPlugin()); } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 8b46e715cd13e..12b58d25ab3cd 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -49,7 +49,7 @@ class event_impl { /// Normally constructs a host event, use std::nullopt to instead instantiate /// a device event. event_impl(std::optional State = HES_Complete) - : MIsInitialized(false), MIsFlushed(true), + : MIsInitialized(false), MIsHostEvent(State), MIsFlushed(true), MState(State.value_or(HES_Complete)) { // Need to fail in event() constructor if there are problems with the // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 6f30ceef8eb51..97615960877ff 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -935,7 +935,7 @@ void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, std::vector DepEvents, sycl::detail::pi::PiEvent &OutEvent) { - // Host queue is not supported here. + // Execution on host is not supported here. if (!Queue) { throw runtime_error("Not supported configuration of unmap requested", PI_ERROR_INVALID_OPERATION); diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index bc6278d54f32c..0a926712eb806 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -32,9 +32,6 @@ class device_impl; // TODO: implement parameters treatment for host device class platform_impl { public: - /// Constructs platform_impl for a SYCL host platform. - platform_impl() : MHostPlatform(true) {} - /// Constructs platform_impl from a plug-in interoperability platform /// handle. /// @@ -125,7 +122,6 @@ class platform_impl { // \return the Plugin associated with this platform. const PluginPtr &getPlugin() const { - assert(!MHostPlatform && "Plugin is not available for Host."); return MPlugin; } @@ -134,7 +130,6 @@ class platform_impl { /// \param PluginPtr is a pointer to a plugin instance /// \param Backend is the backend that we want this platform to use void setPlugin(PluginPtr &PluginPtr, backend Backend) { - assert(!MHostPlatform && "Plugin is not available for Host"); MPlugin = PluginPtr; MBackend = Backend; } @@ -214,7 +209,6 @@ class platform_impl { filterDeviceFilter(std::vector &PiDevices, ListT *FilterList) const; - bool MHostPlatform = false; sycl::detail::pi::PiPlatform MPlatform = 0; backend MBackend; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index a5f9ae9964ac6..ae59239664327 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -655,7 +655,7 @@ bool queue_impl::ext_oneapi_empty() const { info::event_command_status::complete; } - // Check the status of the backend queue if this is not a host queue. + // Check the status of the backend queue. pi_bool IsReady = false; getPlugin()->call( MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6322b904fd6bc..d52fb0da025f3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -79,7 +79,10 @@ static size_t deviceToID(const device &Device) { return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); } -static std::string deviceToString(device Device) { +static std::string queueDeviceToString(const QueueImplPtr &Queue) { + if (!Queue) + return "host"; + auto Device = Queue->get_device(); if (Device.is_cpu()) return "CPU"; else if (Device.is_gpu()) @@ -91,15 +94,19 @@ static std::string deviceToString(device Device) { } static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) { - xpti::addMetadata(TraceEvent, "sycl_device", - Queue ? deviceToID(Queue->get_device()) : 0); - xpti::addMetadata(TraceEvent, "sycl_device_type", - Queue ? deviceToString(Queue->get_device()) : "host"); + xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue)); if (Queue) + { + xpti::addMetadata(TraceEvent, "sycl_device", deviceToID(Queue->get_device())); xpti::addMetadata(TraceEvent, "sycl_device_name", getSyclObjImpl(Queue->get_device())->getDeviceName()); + } } +static unsigned long long getQueueID(const QueueImplPtr& Queue) +{ + return Queue ? Queue->getQueueID() : 0; +} #endif static ContextImplPtr getContext(const QueueImplPtr &Queue) { @@ -1009,8 +1016,7 @@ void AllocaCommandBase::emitInstrumentationData() { xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); } #endif } @@ -1081,8 +1087,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "ALLOCA ON " << queueDeviceToString(MQueue) << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Link : " << this->MLinkedAllocaCmd << "\\n"; Stream << "\"];" << std::endl; @@ -1130,8 +1135,7 @@ void AllocaSubBufCommand::emitInstrumentationData() { this->MRequirement.MAccessRange[0]); xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1168,8 +1172,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA SUB BUF ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue)<< "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n"; Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n"; @@ -1207,8 +1210,7 @@ void ReleaseCommand::emitInstrumentationData() { commandToName(MAllocaCmd->getType())); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1282,8 +1284,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "RELEASE ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "RELEASE ON " << queueDeviceToString(MQueue) << "\\n"; Stream << " Alloca : " << MAllocaCmd << "\\n"; Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n"; Stream << "\"];" << std::endl; @@ -1327,8 +1328,7 @@ void MapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1353,8 +1353,7 @@ void MapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "MAP ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "MAP ON " << queueDeviceToString(MQueue) : "host") << "\\n"; Stream << "\"];" << std::endl; @@ -1389,8 +1388,7 @@ void UnMapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1435,8 +1433,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "UNMAP ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "UNMAP ON " << queueDeviceToString(MQueue) << "\\n"; Stream << "\"];" << std::endl; @@ -1488,8 +1485,7 @@ void MemCpyCommand::emitInstrumentationData() { MQueue ? deviceToID(MQueue->get_device()) : 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1546,8 +1542,7 @@ void MemCpyCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\""; Stream << "ID = " << this << " ; "; - Stream << "MEMCPY ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "MEMCPY ON " << queueDeviceToString(MQueue) << "\\n"; Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n"; Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue << "\\n"; @@ -1603,8 +1598,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "UPDATE REQ ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "UPDATE REQ ON " << queueDeviceToString(MQueue) << "\\n"; bool IsReqOnBuffer = MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer; Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n"; @@ -1661,8 +1655,7 @@ void MemCpyCommandHost::emitInstrumentationData() { MQueue ? deviceToID(MQueue->get_device()) : 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1750,8 +1743,7 @@ void EmptyCommand::emitInstrumentationData() { reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1780,8 +1772,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "MEMCPY HOST ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "MEMCPY HOST ON " << queueDeviceToString(MQueue) << "\\n"; Stream << "\"];" << std::endl; @@ -1819,8 +1810,7 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -2087,9 +2077,7 @@ std::pair emitKernelInstrumentationData( if (CmdTraceEvent) { // Stash the queue_id mutable metadata in TLS - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - Queue ? Queue->getQueueID() : 0); - + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(Queue)); instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, SyclKernel, Queue, CGArgs); @@ -2133,8 +2121,7 @@ void ExecCGCommand::emitInstrumentationData() { CmdTraceEvent); if (CmdTraceEvent) { - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CG::Kernel) { auto KernelCG = @@ -2157,8 +2144,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "EXEC CG ON " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"; + Stream << "EXEC CG ON " << queueDeviceToString(MQueue) << "\\n"; switch (MCommandGroup->getType()) { case detail::CG::Kernel: { @@ -3353,8 +3339,7 @@ void KernelFusionCommand::emitInstrumentationData() { if (MFirstInstance) { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, - MQueue ? MQueue->getQueueID() : 0); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, static_cast(MTraceEvent), MInstanceID, @@ -3368,8 +3353,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "KERNEL FUSION on " - << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n" + Stream << "KERNEL FUSION on " << queueDeviceToString(MQueue) << "\\n" << "FUSION LIST: {"; bool Initial = true; for (auto *Cmd : MFusionList) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 03372fc0b7a8f..cd5ae6bd0e0fe 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -213,16 +213,16 @@ struct MemObjRecord { // Contains latest write commands working with memory object. LeavesCollection MWriteLeaves; - // The flag indicates that the content of the memory object was/will be - // modified. Used while deciding if copy back needed. - bool MMemModified = false; - // The context which has the latest state of the memory object. ContextImplPtr MCurContext; - // The mode this object can be accessed with from the host (host_accessor). + // The mode this object can be accessed from the host (host_accessor). // Valid only if the current usage is on host. access::mode MHostAccess = access::mode::read_write; + + // The flag indicates that the content of the memory object was/will be + // modified. Used while deciding if copy back needed. + bool MMemModified = false; }; /// DPC++ graph scheduler class. diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index a3a88ebf6636a..18b9cf4036cda 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -71,7 +71,7 @@ std::vector device::get_devices(info::device_type deviceType) { cl_device_id device::get() const { return impl->get(); } bool device::is_host() const { - assert(true && "device::is_host should not be called in implementation."); + assert(false && "device::is_host should not be called in implementation."); return false; } diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 12b4a7e68164e..69d62f354ea4c 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -38,7 +38,7 @@ bool event::operator==(const event &rhs) const { return rhs.impl == impl; } bool event::operator!=(const event &rhs) const { return !(*this == rhs); } bool event::is_host() const { - assert(true && "event::is_host should not be called in implementation."); + assert(false && "event::is_host should not be called in implementation."); return false; } diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index bc842f6e596a5..625eb995c47d3 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -31,7 +31,7 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext) cl_kernel kernel::get() const { return impl->get(); } bool kernel::is_host() const { - assert(true && "kernel::is_host should not be called in implementation."); + assert(false && "kernel::is_host should not be called in implementation."); return false; } diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index 9a15943213ec6..179c8c09d0825 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -41,7 +41,7 @@ bool platform::has_extension(const std::string &ExtensionName) const { } bool platform::is_host() const { - assert(true && "platform::is_host should not be called in implementation."); + assert(false && "platform::is_host should not be called in implementation."); return false; } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 174d1f9197af1..5cd0bd3449095 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -96,7 +96,7 @@ queue::ext_oneapi_get_graph() const { } bool queue::is_host() const { - assert(true && "queue::is_host should not be called in implementation."); + assert(false && "queue::is_host should not be called in implementation."); return false; } diff --git a/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp b/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp index 5b71a60a54051..a7d4c6493b8b5 100644 --- a/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp +++ b/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp @@ -1,9 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// Assertion `!MHostPlatform && "Plugin is not available for Host."' failed on -// Nvidia. -// XFAIL: hip_nvidia #include From a87b32817a46d1dfdba9205163106f2af565ea6c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 26 Jun 2024 04:35:59 -0700 Subject: [PATCH 41/52] fix Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.hpp | 4 ++-- sycl/source/detail/scheduler/commands.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 12b58d25ab3cd..f609bd96b7189 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -49,8 +49,8 @@ class event_impl { /// Normally constructs a host event, use std::nullopt to instead instantiate /// a device event. event_impl(std::optional State = HES_Complete) - : MIsInitialized(false), MIsHostEvent(State), MIsFlushed(true), - MState(State.value_or(HES_Complete)) { + : MIsInitialized(false), MIsFlushed(true), + MState(State.value_or(HES_Complete)), MIsHostEvent(State) { // Need to fail in event() constructor if there are problems with the // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept // event methods. This ::get() call uses static vars to read and parse the diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d52fb0da025f3..9d9315652ed55 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1353,7 +1353,7 @@ void MapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "MAP ON " << queueDeviceToString(MQueue) : "host") << "\\n"; + Stream << "MAP ON " << queueDeviceToString(MQueue) << "\\n"; Stream << "\"];" << std::endl; From 0a5a7583eef8f597c8b82c70a8671aeb1f45097c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 26 Jun 2024 07:18:55 -0700 Subject: [PATCH 42/52] Update isCOntextInitialized stuff Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 27 +++++----- sycl/source/detail/event_impl.hpp | 39 ++++++++------- sycl/source/detail/helpers.cpp | 4 +- sycl/source/detail/scheduler/commands.cpp | 49 ++++++++++++------- sycl/source/detail/scheduler/scheduler.cpp | 4 +- sycl/source/queue.cpp | 2 +- sycl/unittests/buffer/BufferReleaseBase.hpp | 4 -- sycl/unittests/pi/PiMock.cpp | 4 -- .../scheduler/EnqueueWithDependsOnDeps.cpp | 4 -- .../scheduler/InOrderQueueHostTaskDeps.cpp | 4 -- sycl/unittests/scheduler/KernelFusion.cpp | 4 -- 11 files changed, 66 insertions(+), 79 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index f4ad52221ed37..58a52230f1269 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -33,8 +33,8 @@ extern xpti::trace_event_data_t *GSYCLGraphEvent; #endif // If we do not yet have a context, use the default one. -void event_impl::ensureContextInitialized() { - if (MIsContextInitialized) +void event_impl::tryToInitContext() { + if (MContext || !MIsDefaultConstructed) return; const device SyclDevice; @@ -114,12 +114,12 @@ const sycl::detail::pi::PiEvent &event_impl::getHandleRef() const { sycl::detail::pi::PiEvent &event_impl::getHandleRef() { return MEvent; } const ContextImplPtr &event_impl::getContextImpl() { - ensureContextInitialized(); + tryToInitContext(); return MContext; } const PluginPtr &event_impl::getPlugin() { - ensureContextInitialized(); + tryToInitContext(); return MContext->getPlugin(); } @@ -128,14 +128,12 @@ void event_impl::setStateIncomplete() { MState = HES_NotComplete; } void event_impl::setContextImpl(const ContextImplPtr &Context) { MIsHostEvent = Context == nullptr; MContext = Context; - MIsContextInitialized = true; } event_impl::event_impl(sycl::detail::pi::PiEvent Event, const context &SyclContext) - : MIsContextInitialized(true), MEvent(Event), - MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true), - MState(HES_Complete) { + : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), + MIsFlushed(true), MState(HES_Complete) { sycl::detail::pi::PiContext TempContext; getPlugin()->call( @@ -398,7 +396,7 @@ event_impl::get_info() { template <> typename info::platform::version::return_type event_impl::get_backend_info() const { - if (!MIsContextInitialized) { + if (!MContext) { return "Context not initialized, no backend info available"; } if (MContext->getBackend() != backend::opencl) { @@ -419,7 +417,7 @@ event_impl::get_backend_info() const { template <> typename info::device::version::return_type event_impl::get_backend_info() const { - if (!MIsContextInitialized) { + if (!MContext) { return "Context not initialized, no backend info available"; } if (MContext->getBackend() != backend::opencl) { @@ -437,7 +435,7 @@ event_impl::get_backend_info() const { template <> typename info::device::backend_version::return_type event_impl::get_backend_info() const { - if (!MIsContextInitialized) { + if (!MContext) { return "Context not initialized, no backend info available"; } if (MContext->getBackend() != backend::ext_oneapi_level_zero) { @@ -456,11 +454,12 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); } void HostProfilingInfo::end() { EndTime = getTimestamp(); } pi_native_handle event_impl::getNative() { - ensureContextInitialized(); + if (isHost()) + return {}; + tryToInitContext(); auto Plugin = getPlugin(); - if (!MIsInitialized) { - MIsInitialized = true; + if (MIsDefaultConstructed && !MEvent) { auto TempContext = MContext.get()->getHandleRef(); Plugin->call(TempContext, &MEvent); } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index f609bd96b7189..f4c2ac2e90a86 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -49,8 +49,8 @@ class event_impl { /// Normally constructs a host event, use std::nullopt to instead instantiate /// a device event. event_impl(std::optional State = HES_Complete) - : MIsInitialized(false), MIsFlushed(true), - MState(State.value_or(HES_Complete)), MIsHostEvent(State) { + : MIsFlushed(true), MState(State.value_or(HES_Complete)), + MIsDefaultConstructed(!State), MIsHostEvent(State) { // Need to fail in event() constructor if there are problems with the // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept // event methods. This ::get() call uses static vars to read and parse the @@ -255,15 +255,6 @@ class event_impl { QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; - /// Checks if an event is in a fully intialized state. Default-constructed - /// events will return true only after having initialized its native event, - /// while other events will assume that they are fully initialized at - /// construction, relying on external sources to supply member data. - /// - /// \return true if the event is considered to be in a fully initialized - /// state. - bool isInitialized() const noexcept { return MIsInitialized; } - /// Checks if this event is complete. /// /// \return true if this event is complete. @@ -279,10 +270,11 @@ class event_impl { MPostCompleteEvents.push_back(Event); } - bool isContextInitialized() const noexcept { return MIsContextInitialized; } + bool isDefaultConstructed() const noexcept { return MIsDefaultConstructed; } ContextImplPtr getContextImplPtr() { - ensureContextInitialized(); + if (MIsDefaultConstructed) + tryToInitContext(); return MContext; } @@ -347,11 +339,7 @@ class event_impl { void instrumentationEpilog(void *TelementryEvent, const std::string &Name, int32_t StreamID, uint64_t IId) const; void checkProfilingPreconditions() const; - // Events constructed without a context will lazily use the default context - // when needed. - void ensureContextInitialized(); - bool MIsInitialized = true; - bool MIsContextInitialized = false; + sycl::detail::pi::PiEvent MEvent = nullptr; // Stores submission time of command associated with event uint64_t MSubmitTime = 0; @@ -409,7 +397,20 @@ class event_impl { std::shared_ptr Context); std::atomic_bool MIsEnqueued{false}; - bool MIsHostEvent{false}; + + // Events constructed without a context will lazily use the default context + // when needed. + void tryToInitContext(); + // Event class represents 3 different kinds of operations: + // | type | has PI event | MContext | MIsHostTask | MIsDefaultConstructed | + // | dev | true | !nullptr | false | false | + // | host | false | nullptr | true | false | + // |default| * | * | false | true | + // Default constructed event is created with empty ctor in host code, MContext + // is lazily initialized with default device context on first context query. + // MEvent is lazily created in first pi handle query. + bool MIsDefaultConstructed = false; + bool MIsHostEvent = false; }; } // namespace detail diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 75c6fd72b8fd0..901fd34b4cce8 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -31,9 +31,7 @@ getOrWaitEvents(std::vector DepEvents, ContextImplPtr Context) { // throwaway events created with empty constructor will not have a context // (which is set lazily) calling getContextImpl() would set that // context, which we wish to avoid as it is expensive. - if ((!SyclEventImplPtr->isContextInitialized() && - !SyclEventImplPtr->isHost()) || - SyclEventImplPtr->isNOP()) { + if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) { continue; } // The fusion command and its event are associated with a non-host context, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 9d9315652ed55..1b9aea1c10f02 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -95,16 +95,15 @@ static std::string queueDeviceToString(const QueueImplPtr &Queue) { static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) { xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue)); - if (Queue) - { - xpti::addMetadata(TraceEvent, "sycl_device", deviceToID(Queue->get_device())); + if (Queue) { + xpti::addMetadata(TraceEvent, "sycl_device", + deviceToID(Queue->get_device())); xpti::addMetadata(TraceEvent, "sycl_device_name", getSyclObjImpl(Queue->get_device())->getDeviceName()); } } -static unsigned long long getQueueID(const QueueImplPtr& Queue) -{ +static unsigned long long getQueueID(const QueueImplPtr &Queue) { return Queue ? Queue->getQueueID() : 0; } #endif @@ -279,7 +278,7 @@ std::vector Command::getPiEventsBlocking( // (which is set lazily) calling getContextImpl() would set that // context, which we wish to avoid as it is expensive. // Skip host task and NOP events also. - if (!EventImpl->isContextInitialized() || EventImpl->isHost() || + if (EventImpl->isDefaultConstructed() || EventImpl->isHost() || EventImpl->isNOP()) continue; // In this path nullptr native event means that the command has not been @@ -728,7 +727,8 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, // 2. Some types of commands do not produce PI events after they are // enqueued (e.g. alloca). Note that we can't check the pi event to make that // distinction since the command might still be unenqueued at this point. - bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized()); + bool PiEventExpected = + (!DepEvent->isHost() && !DepEvent->isDefaultConstructed()); if (auto *DepCmd = static_cast(DepEvent->getCommand())) PiEventExpected &= DepCmd->producesPiEvent(); @@ -1016,7 +1016,8 @@ void AllocaCommandBase::emitInstrumentationData() { xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); } #endif } @@ -1135,7 +1136,8 @@ void AllocaSubBufCommand::emitInstrumentationData() { this->MRequirement.MAccessRange[0]); xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1172,7 +1174,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue)<< "\\n"; + Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue) << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n"; Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n"; @@ -1210,7 +1212,8 @@ void ReleaseCommand::emitInstrumentationData() { commandToName(MAllocaCmd->getType())); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1328,7 +1331,8 @@ void MapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1388,7 +1392,8 @@ void UnMapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1485,7 +1490,8 @@ void MemCpyCommand::emitInstrumentationData() { MQueue ? deviceToID(MQueue->get_device()) : 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1655,7 +1661,8 @@ void MemCpyCommandHost::emitInstrumentationData() { MQueue ? deviceToID(MQueue->get_device()) : 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1743,7 +1750,8 @@ void EmptyCommand::emitInstrumentationData() { reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -1810,7 +1818,8 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); makeTraceEventEpilog(); } #endif @@ -2121,7 +2130,8 @@ void ExecCGCommand::emitInstrumentationData() { CmdTraceEvent); if (CmdTraceEvent) { - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CG::Kernel) { auto KernelCG = @@ -3339,7 +3349,8 @@ void KernelFusionCommand::emitInstrumentationData() { if (MFirstInstance) { // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(MQueue)); xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, static_cast(MTraceEvent), MInstanceID, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 905ca889aaf0d..4acc5b6c3a6a4 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -697,9 +697,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context, // don't represent actual dependencies. Calling getContextImpl() would set // their context, which we wish to avoid as it is expensive. // NOP events also don't represent actual dependencies. - if ((!SyclEventImplPtr->isContextInitialized() && - !SyclEventImplPtr->isHost()) || - SyclEventImplPtr->isNOP()) { + if ((SyclEventImplPtr->isDefaultConstructed()) || SyclEventImplPtr->isNOP()) { return true; } if (SyclEventImplPtr->isHost()) { diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 5cd0bd3449095..9c807f90061b5 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -244,7 +244,7 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, bool AllEventsEmptyOrNop = std::all_of( begin(WaitList), end(WaitList), [&](const event &Event) -> bool { auto EventImpl = detail::getSyclObjImpl(Event); - return !EventImpl->isContextInitialized() || EventImpl->isNOP(); + return EventImpl->isDefaultConstructed() || EventImpl->isNOP(); }); if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) diff --git a/sycl/unittests/buffer/BufferReleaseBase.hpp b/sycl/unittests/buffer/BufferReleaseBase.hpp index b35d73cb3909c..bfcc4fb8369ed 100644 --- a/sycl/unittests/buffer/BufferReleaseBase.hpp +++ b/sycl/unittests/buffer/BufferReleaseBase.hpp @@ -43,10 +43,6 @@ class BufferDestructionCheckCommon : public ::testing::Test { protected: void SetUp() override { - if (Plt.is_host()) { - std::cout << "Not run due to host-only environment\n"; - GTEST_SKIP(); - } MockSchedulerPtr = new MockScheduler(); sycl::detail::GlobalHandler::instance().attachScheduler( dynamic_cast(MockSchedulerPtr)); diff --git a/sycl/unittests/pi/PiMock.cpp b/sycl/unittests/pi/PiMock.cpp index c7014162f9cf8..02044d9631376 100644 --- a/sycl/unittests/pi/PiMock.cpp +++ b/sycl/unittests/pi/PiMock.cpp @@ -56,10 +56,6 @@ TEST(PiMockTest, ConstructFromQueue) { sycl::unittest::PiMock Mock; queue MockQ{Mock.getPlatform().get_devices()[0]}; queue NormalQ; - if (NormalQ.is_host()) { - std::cerr << "Not run due to host-only environment\n"; - return; - } const auto &NormalPiPlugin = detail::getSyclObjImpl(NormalQ)->getPlugin()->getPiPlugin(); diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index e1bc8c894f311..08f03420ac54e 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -26,10 +26,6 @@ constexpr auto DisableCleanupName = "SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP"; std::vector> PassedNumEvents; bool CheckTestExecutionRequirements(const platform &plt) { - if (plt.is_host()) { - std::cout << "Not run due to host-only environment\n"; - return false; - } // This test only contains device image for SPIR-V capable devices. if (plt.get_backend() != sycl::backend::opencl && plt.get_backend() != sycl::backend::ext_oneapi_level_zero) { diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 8693ff5e4c52b..929f8735bc85f 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -130,10 +130,6 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); - if (Plt.is_host()) { - std::cout << "Not run due to host-only environment\n"; - GTEST_SKIP(); - } context Ctx{Plt}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; diff --git a/sycl/unittests/scheduler/KernelFusion.cpp b/sycl/unittests/scheduler/KernelFusion.cpp index 8b45c03e37f1f..5a86636b13c09 100644 --- a/sycl/unittests/scheduler/KernelFusion.cpp +++ b/sycl/unittests/scheduler/KernelFusion.cpp @@ -42,10 +42,6 @@ detail::Command *CreateTaskCommand(MockScheduler &MS, } bool CheckTestExecRequirements(const platform &plt) { - if (plt.is_host()) { - std::cout << "Not run due to host-only environment\n"; - return false; - } // This test only contains device image for SPIR-V capable devices. if (plt.get_backend() != sycl::backend::opencl && plt.get_backend() != sycl::backend::ext_oneapi_level_zero) { From 97c4ce548c894ab94b223fd66d1d18f7a97f7d78 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 26 Jun 2024 12:00:51 -0700 Subject: [PATCH 43/52] prepare removal from handler Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/handler.hpp | 69 +++++++++------------------- sycl/source/detail/platform_impl.hpp | 4 +- 2 files changed, 23 insertions(+), 50 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index a71f5400a813d..19d0c5ac1e85e 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -178,22 +178,22 @@ template -static Arg member_ptr_helper(RetType (Func::*)(Arg) const); +static Arg member_ptr_helper(RetType (Func:: *)(Arg) const); // Non-const version of the above template to match functors whose 'operator()' // is declared w/o the 'const' qualifier. template -static Arg member_ptr_helper(RetType (Func::*)(Arg)); +static Arg member_ptr_helper(RetType (Func:: *)(Arg)); // Version with two arguments to handle the case when kernel_handler is passed // to a lambda template -static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2) const); +static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2) const); // Non-const version of the above template to match functors whose 'operator()' // is declared w/o the 'const' qualifier. template -static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2)); +static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2)); template decltype(member_ptr_helper(&F::operator())) argument_helper(int); @@ -464,8 +464,8 @@ class __SYCL_EXPORT handler { /// Constructs SYCL handler from queue. /// /// \param Queue is a SYCL queue. - /// \param IsHost indicates if this handler is created for SYCL host device. - handler(std::shared_ptr Queue, bool IsHost); + handler(std::shared_ptr Queue, + bool /*ABI Break: to remove */); /// Constructs SYCL handler from the associated queue and the submission's /// primary and secondary queue. @@ -475,10 +475,10 @@ class __SYCL_EXPORT handler { /// \param PrimaryQueue is the primary SYCL queue of the submission. /// \param SecondaryQueue is the secondary SYCL queue of the submission. This /// is null if no secondary queue is associated with the submission. - /// \param IsHost indicates if this handler is created for SYCL host device. handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue, bool IsHost); + std::shared_ptr SecondaryQueue, + bool /*ABI Break: to remove */); /// Constructs SYCL handler from Graph. /// @@ -609,7 +609,7 @@ class __SYCL_EXPORT handler { ~handler() = default; // TODO: Private and unusued. Remove when ABI break is allowed. - bool is_host() { return MIsHost; } + bool is_host() { return false; } #ifdef __SYCL_DEVICE_ONLY__ // In device compilation accessor isn't inherited from host base classes, so @@ -888,12 +888,6 @@ class __SYCL_EXPORT handler { detail::KernelLambdaHasKernelHandlerArgT::value; - if (IsCallableWithKernelHandler && MIsHost) { - throw sycl::feature_not_supported( - "kernel_handler is not yet supported by host device.", - PI_ERROR_INVALID_OPERATION); - } - KernelType *KernelPtr = ResetHostKernel(KernelFunc); @@ -1042,8 +1036,7 @@ class __SYCL_EXPORT handler { std::enable_if_t<(DimSrc > 0) && (DimDst > 0), bool> copyAccToAccHelper(accessor Src, accessor Dst) { - if (!MIsHost && - IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range())) + if (IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range())) return false; range<1> LinearizedRange(Src.size()); @@ -1065,6 +1058,7 @@ class __SYCL_EXPORT handler { /// /// \param Src is a source SYCL accessor. /// \param Dst is a destination SYCL accessor. + // ABI break: to remove whole method template copyAccToAccHelper(accessor Src, accessor Dst) { - if (!MIsHost) - return false; - - single_task<__copyAcc2Acc>( - [=]() { *(Dst.get_pointer()) = *(Src.get_pointer()); }); - return true; + return false; } #ifndef __SYCL_DEVICE_ONLY__ + // ABI break: to remove whole method /// Copies the content of memory object accessed by Src into the memory /// pointed by Dst. /// @@ -1101,6 +1090,7 @@ class __SYCL_EXPORT handler { }); } + // ABI break: to remove whole method /// Copies 1 element accessed by 0-dimensional accessor Src into the memory /// pointed by Dst. /// @@ -1118,6 +1108,7 @@ class __SYCL_EXPORT handler { }); } + // ABI break: to remove whole method /// Copies the memory pointed by Src into the memory accessed by Dst. /// /// \param Src is a pointer to source memory. @@ -1135,6 +1126,7 @@ class __SYCL_EXPORT handler { }); } + // ABI break: to remove whole method /// Copies 1 element pointed by Src to memory accessed by 0-dimensional /// accessor Dst. /// @@ -2245,7 +2237,7 @@ class __SYCL_EXPORT handler { MNDRDesc.set(range<1>{1}); MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CG::Kernel); - if (!MIsHost && !lambdaAndKernelHaveEqualName()) { + if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); } else @@ -2282,7 +2274,7 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CG::Kernel); setNDRangeUsed(false); - if (!MIsHost && !lambdaAndKernelHaveEqualName()) { + if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); } else @@ -2322,7 +2314,7 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CG::Kernel); setNDRangeUsed(false); - if (!MIsHost && !lambdaAndKernelHaveEqualName()) { + if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); } else @@ -2361,7 +2353,7 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CG::Kernel); setNDRangeUsed(true); - if (!MIsHost && !lambdaAndKernelHaveEqualName()) { + if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); } else @@ -2688,14 +2680,6 @@ class __SYCL_EXPORT handler { "Invalid accessor target for the copy method."); static_assert(isValidModeForSourceAccessor(AccessMode), "Invalid accessor mode for the copy method."); -#ifndef __SYCL_DEVICE_ONLY__ - if (MIsHost) { - // TODO: Temporary implementation for host. Should be handled by memory - // manager. - copyAccToPtrHost(Src, Dst); - return; - } -#endif setType(detail::CG::CopyAccToPtr); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Src; @@ -2732,14 +2716,7 @@ class __SYCL_EXPORT handler { "Invalid accessor mode for the copy method."); // TODO: Add static_assert with is_device_copyable when vec is // device-copyable. -#ifndef __SYCL_DEVICE_ONLY__ - if (MIsHost) { - // TODO: Temporary implementation for host. Should be handled by memory - // manager. - copyPtrToAccHost(Src, Dst); - return; - } -#endif + setType(detail::CG::CopyPtrToAcc); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst; @@ -2853,8 +2830,6 @@ class __SYCL_EXPORT handler { fill(accessor Dst, const T &Pattern) { - assert(!MIsHost && "fill() should no longer be callable on a host device."); - if (Dst.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Dst); @@ -3392,7 +3367,7 @@ class __SYCL_EXPORT handler { /// Storage for the CG created when handling graph nodes added explicitly. std::unique_ptr MGraphNodeCG; - bool MIsHost = false; + bool MIsHost = false; // ABI break: to remove detail::code_location MCodeLoc = {}; bool MIsFinalized = false; diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index 0a926712eb806..dfb2597bf417b 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -121,9 +121,7 @@ class platform_impl { static std::vector get_platforms(); // \return the Plugin associated with this platform. - const PluginPtr &getPlugin() const { - return MPlugin; - } + const PluginPtr &getPlugin() const { return MPlugin; } /// Sets the platform implementation to use another plugin. /// From 6cf3171d7d43021fd668789e5b83d12331d41858 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 26 Jun 2024 12:05:12 -0700 Subject: [PATCH 44/52] fix test Signed-off-by: Tikhomirova, Kseniya --- sycl/test-e2e/Config/allowlist.cpp | 58 +++++++++++++----------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp index 121e911c0474c..7bfb16ca687d0 100644 --- a/sycl/test-e2e/Config/allowlist.cpp +++ b/sycl/test-e2e/Config/allowlist.cpp @@ -35,61 +35,51 @@ int main() { // Expected that the allowlist filter is not set if (getenv("PRINT_PLATFORM_INFO")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) - if (!Platform.is_host()) { + std::string Name = Platform.get_info(); + std::string Ver = Platform.get_info(); + // As a string will be used as regexp pattern, we need to get rid of + // symbols that can be treated in a special way. + replaceSpecialCharacters(Name); + replaceSpecialCharacters(Ver); - std::string Name = Platform.get_info(); - std::string Ver = Platform.get_info(); - // As a string will be used as regexp pattern, we need to get rid of - // symbols that can be treated in a special way. - replaceSpecialCharacters(Name); - replaceSpecialCharacters(Ver); + std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name + << "}},PlatformVersion:{{" << Ver << "}}"; - std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name - << "}},PlatformVersion:{{" << Ver << "}}"; - - return 0; - } - throw std::runtime_error("Non host device is not found"); + return 0; } // Expected that the allowlist filter is not set if (getenv("PRINT_DEVICE_INFO")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) - if (!Platform.is_host()) { - const sycl::device Dev = Platform.get_devices().at(0); - std::string Name = Dev.get_info(); - std::string Ver = Dev.get_info(); + const sycl::device Dev = Platform.get_devices().at(0); + std::string Name = Dev.get_info(); + std::string Ver = Dev.get_info(); - // As a string will be used as regexp pattern, we need to get rid of - // symbols that can be treated in a special way. - replaceSpecialCharacters(Name); - replaceSpecialCharacters(Ver); + // As a string will be used as regexp pattern, we need to get rid of + // symbols that can be treated in a special way. + replaceSpecialCharacters(Name); + replaceSpecialCharacters(Ver); - std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name - << "}},DriverVersion:{{" << Ver << "}}"; + std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name + << "}},DriverVersion:{{" << Ver << "}}"; - return 0; - } - throw std::runtime_error("Non host device is not found"); + return 0; } // Expected the allowlist to be set with the "PRINT_DEVICE_INFO" run result if (getenv("TEST_DEVICE_AVAILABLE")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) - if (!Platform.is_host()) { - if (Platform.get_devices().size() != 1) - throw std::runtime_error("Expected only one non host device."); + if (Platform.get_devices().size() != 1) + throw std::runtime_error("Expected only one device."); - return 0; - } - throw std::runtime_error("Non host device is not found"); + return 0; + } } // Expected the allowlist to be set but empty if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) - if (!Platform.is_host()) - throw std::runtime_error("Expected no non host device is available"); + throw std::runtime_error("Expected no device is available"); return 0; } From 989557abba027be8a90c106ac69bac046016565d Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 26 Jun 2024 12:22:56 -0700 Subject: [PATCH 45/52] fix clang-format Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/handler.hpp | 8 +++--- sycl/test-e2e/Config/allowlist.cpp | 40 +++++++++++++++--------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 19d0c5ac1e85e..6df476e2d2d96 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -178,22 +178,22 @@ template -static Arg member_ptr_helper(RetType (Func:: *)(Arg) const); +static Arg member_ptr_helper(RetType (Func::*)(Arg) const); // Non-const version of the above template to match functors whose 'operator()' // is declared w/o the 'const' qualifier. template -static Arg member_ptr_helper(RetType (Func:: *)(Arg)); +static Arg member_ptr_helper(RetType (Func::*)(Arg)); // Version with two arguments to handle the case when kernel_handler is passed // to a lambda template -static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2) const); +static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2) const); // Non-const version of the above template to match functors whose 'operator()' // is declared w/o the 'const' qualifier. template -static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2)); +static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2)); template decltype(member_ptr_helper(&F::operator())) argument_helper(int); diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp index 7bfb16ca687d0..7891088db5abb 100644 --- a/sycl/test-e2e/Config/allowlist.cpp +++ b/sycl/test-e2e/Config/allowlist.cpp @@ -36,34 +36,34 @@ int main() { if (getenv("PRINT_PLATFORM_INFO")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) std::string Name = Platform.get_info(); - std::string Ver = Platform.get_info(); - // As a string will be used as regexp pattern, we need to get rid of - // symbols that can be treated in a special way. - replaceSpecialCharacters(Name); - replaceSpecialCharacters(Ver); + std::string Ver = Platform.get_info(); + // As a string will be used as regexp pattern, we need to get rid of + // symbols that can be treated in a special way. + replaceSpecialCharacters(Name); + replaceSpecialCharacters(Ver); - std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name - << "}},PlatformVersion:{{" << Ver << "}}"; + std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name + << "}},PlatformVersion:{{" << Ver << "}}"; - return 0; + return 0; } // Expected that the allowlist filter is not set if (getenv("PRINT_DEVICE_INFO")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) const sycl::device Dev = Platform.get_devices().at(0); - std::string Name = Dev.get_info(); - std::string Ver = Dev.get_info(); + std::string Name = Dev.get_info(); + std::string Ver = Dev.get_info(); - // As a string will be used as regexp pattern, we need to get rid of - // symbols that can be treated in a special way. - replaceSpecialCharacters(Name); - replaceSpecialCharacters(Ver); + // As a string will be used as regexp pattern, we need to get rid of + // symbols that can be treated in a special way. + replaceSpecialCharacters(Name); + replaceSpecialCharacters(Ver); - std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name - << "}},DriverVersion:{{" << Ver << "}}"; + std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name + << "}},DriverVersion:{{" << Ver << "}}"; - return 0; + return 0; } // Expected the allowlist to be set with the "PRINT_DEVICE_INFO" run result @@ -72,14 +72,14 @@ int main() { if (Platform.get_devices().size() != 1) throw std::runtime_error("Expected only one device."); - return 0; - } + return 0; + } } // Expected the allowlist to be set but empty if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) - throw std::runtime_error("Expected no device is available"); + throw std::runtime_error("Expected no device is available"); return 0; } From 1a139752d02529ac27903be31b1e772e994aeb34 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 27 Jun 2024 03:41:00 -0700 Subject: [PATCH 46/52] fix warning Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/handler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 6df476e2d2d96..a536d41f329e0 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1064,8 +1064,8 @@ class __SYCL_EXPORT handler { access::mode ModeDst, access::target TargetDst, access::placeholder IsPHSrc, access::placeholder IsPHDst> std::enable_if_t - copyAccToAccHelper(accessor Src, - accessor Dst) { + copyAccToAccHelper(accessor, + accessor) { return false; } From e9fffb6419638e729ca7a9da32bd054b50a1dc37 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 27 Jun 2024 03:48:10 -0700 Subject: [PATCH 47/52] fix allowlist test cherry-pick issues Signed-off-by: Tikhomirova, Kseniya --- sycl/test-e2e/Config/allowlist.cpp | 49 ++++++++++++++++-------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp index 7891088db5abb..393326cb76283 100644 --- a/sycl/test-e2e/Config/allowlist.cpp +++ b/sycl/test-e2e/Config/allowlist.cpp @@ -34,46 +34,51 @@ int main() { // Expected that the allowlist filter is not set if (getenv("PRINT_PLATFORM_INFO")) { - for (const sycl::platform &Platform : sycl::platform::get_platforms()) + for (const sycl::platform &Platform : sycl::platform::get_platforms()) { std::string Name = Platform.get_info(); - std::string Ver = Platform.get_info(); - // As a string will be used as regexp pattern, we need to get rid of - // symbols that can be treated in a special way. - replaceSpecialCharacters(Name); - replaceSpecialCharacters(Ver); + std::string Ver = Platform.get_info(); + // As a string will be used as regexp pattern, we need to get rid of + // symbols that can be treated in a special way. + replaceSpecialCharacters(Name); + replaceSpecialCharacters(Ver); - std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name - << "}},PlatformVersion:{{" << Ver << "}}"; + std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name + << "}},PlatformVersion:{{" << Ver << "}}"; - return 0; + return 0; + } + throw std::runtime_error("No device is found"); } // Expected that the allowlist filter is not set if (getenv("PRINT_DEVICE_INFO")) { - for (const sycl::platform &Platform : sycl::platform::get_platforms()) + for (const sycl::platform &Platform : sycl::platform::get_platforms()) { const sycl::device Dev = Platform.get_devices().at(0); - std::string Name = Dev.get_info(); - std::string Ver = Dev.get_info(); + std::string Name = Dev.get_info(); + std::string Ver = Dev.get_info(); - // As a string will be used as regexp pattern, we need to get rid of - // symbols that can be treated in a special way. - replaceSpecialCharacters(Name); - replaceSpecialCharacters(Ver); + // As a string will be used as regexp pattern, we need to get rid of + // symbols that can be treated in a special way. + replaceSpecialCharacters(Name); + replaceSpecialCharacters(Ver); - std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name - << "}},DriverVersion:{{" << Ver << "}}"; + std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name + << "}},DriverVersion:{{" << Ver << "}}"; - return 0; + return 0; + } + throw std::runtime_error("No device is found"); } // Expected the allowlist to be set with the "PRINT_DEVICE_INFO" run result if (getenv("TEST_DEVICE_AVAILABLE")) { - for (const sycl::platform &Platform : sycl::platform::get_platforms()) + for (const sycl::platform &Platform : sycl::platform::get_platforms()) { if (Platform.get_devices().size() != 1) throw std::runtime_error("Expected only one device."); - return 0; - } + return 0; + } + throw std::runtime_error("No device is found"); } // Expected the allowlist to be set but empty From 6ec2b63ecaedf8476d8a7dab3ce1bcc7b6e5963d Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 1 Jul 2024 05:06:17 -0700 Subject: [PATCH 48/52] fix code review comments Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.cpp | 14 +---- sycl/source/detail/scheduler/commands.cpp | 60 +++++++------------ .../source/detail/scheduler/graph_builder.cpp | 4 +- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/source/detail/xpti_registry.cpp | 15 +++++ sycl/source/detail/xpti_registry.hpp | 3 + sycl/test-e2e/Config/allowlist.cpp | 2 +- 7 files changed, 47 insertions(+), 53 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 0ec8f57abb596..6f6e72fbd2af9 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -26,7 +26,7 @@ namespace sycl { inline namespace _V1 { namespace detail { -// Treat 0 as reserved for "host" queue +// Treat 0 as reserved for host task traces std::atomic queue_impl::MNextAvailableQueueID = 1; thread_local bool NestedCallsDetector = false; @@ -498,17 +498,7 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, xpti_at::active, &QWaitInstanceNo); IId = QWaitInstanceNo; if (WaitEvent) { - device D = get_device(); - std::string DevStr; - if (D.is_cpu()) - DevStr = "CPU"; - else if (D.is_gpu()) - DevStr = "GPU"; - else if (D.is_accelerator()) - DevStr = "ACCELERATOR"; - else - DevStr = "UNKNOWN"; - xpti::addMetadata(WaitEvent, "sycl_device_type", DevStr); + xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this)); if (HasSourceInfo) { xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName()); xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName()); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2ab4663c5db20..9ea45424f0ce5 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -79,22 +79,8 @@ static size_t deviceToID(const device &Device) { return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); } -static std::string queueDeviceToString(const QueueImplPtr &Queue) { - if (!Queue) - return "host"; - auto Device = Queue->get_device(); - if (Device.is_cpu()) - return "CPU"; - else if (Device.is_gpu()) - return "GPU"; - else if (Device.is_accelerator()) - return "ACCELERATOR"; - else - return "UNKNOWN"; -} - static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) { - xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue)); + xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue.get())); if (Queue) { xpti::addMetadata(TraceEvent, "sycl_device", deviceToID(Queue->get_device())); @@ -411,7 +397,7 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now if (HostTask.MHostTask->isInteropTask()) { assert(HostTask.MQueue && - "Submitted queue for host task must be device queue"); + "Host task submissions should have an associated queue"); interop_handle IH{MReqToMem, HostTask.MQueue, HostTask.MQueue->getDeviceImplPtr(), HostTask.MQueue->getContextImplPtr()}; @@ -1088,7 +1074,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "ALLOCA ON " << queueDeviceToString(MQueue.get()) << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Link : " << this->MLinkedAllocaCmd << "\\n"; Stream << "\"];" << std::endl; @@ -1174,7 +1160,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue.get()) << "\\n"; Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n"; Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n"; Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n"; @@ -1287,7 +1273,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "RELEASE ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "RELEASE ON " << queueDeviceToString(MQueue.get()) << "\\n"; Stream << " Alloca : " << MAllocaCmd << "\\n"; Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n"; Stream << "\"];" << std::endl; @@ -1357,7 +1343,7 @@ void MapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "MAP ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "MAP ON " << queueDeviceToString(MQueue.get()) << "\\n"; Stream << "\"];" << std::endl; @@ -1438,7 +1424,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\""; Stream << "ID = " << this << " ; "; - Stream << "UNMAP ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "UNMAP ON " << queueDeviceToString(MQueue.get()) << "\\n"; Stream << "\"];" << std::endl; @@ -1548,7 +1534,7 @@ void MemCpyCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\""; Stream << "ID = " << this << " ; "; - Stream << "MEMCPY ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "MEMCPY ON " << queueDeviceToString(MQueue.get()) << "\\n"; Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n"; Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue << "\\n"; @@ -1604,7 +1590,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "UPDATE REQ ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "UPDATE REQ ON " << queueDeviceToString(MQueue.get()) << "\\n"; bool IsReqOnBuffer = MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer; Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n"; @@ -1780,7 +1766,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "MEMCPY HOST ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "MEMCPY HOST ON " << queueDeviceToString(MQueue.get()) << "\\n"; Stream << "\"];" << std::endl; @@ -1971,7 +1957,7 @@ void instrumentationAddExtraKernelMetadata( if (!SyclKernel->isCreatedFromSource()) EliminatedArgMask = SyclKernel->getKernelArgMask(); } else { - assert(Queue && "Queue with submitted kernel could not be on host"); + assert(Queue && "Kernel submissions should have an associated queue"); std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) = detail::ProgramManager::getInstance().getOrCreateKernel( Queue->getContextImplPtr(), Queue->getDeviceImplPtr(), KernelName); @@ -2154,7 +2140,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "EXEC CG ON " << queueDeviceToString(MQueue) << "\\n"; + Stream << "EXEC CG ON " << queueDeviceToString(MQueue.get()) << "\\n"; switch (MCommandGroup->getType()) { case detail::CG::Kernel: { @@ -2345,7 +2331,7 @@ static pi_result SetKernelParamsAndLaunch( const KernelArgMask *EliminatedArgMask, const std::function &getMemAllocationFunc, bool IsCooperative) { - assert(Queue && "Queue with submitted kernel could not be on host"); + assert(Queue && "Kernel submissions should have an associated queue"); const PluginPtr &Plugin = Queue->getPlugin(); auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc, @@ -2536,7 +2522,7 @@ pi_int32 enqueueImpKernel( const std::function &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative) { - assert(Queue && "Queue with submitted kernel could not be on host"); + assert(Queue && "Kernel submissions should have an associated queue"); // Run OpenCL kernel auto ContextImpl = Queue->getContextImplPtr(); auto DeviceImpl = Queue->getDeviceImplPtr(); @@ -2652,7 +2638,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, std::vector &RawEvents, const detail::EventImplPtr &OutEventImpl, bool read) { assert(Queue && - "Queue with submitted read write host pipe could not be on host"); + "ReadWrite host pipe submissions should have an associated queue"); detail::HostPipeMapEntry *hostPipeEntry = ProgramManager::getInstance().getHostPipeEntry(PipeName); @@ -2702,7 +2688,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, } pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { - assert(MQueue && "Device queue is required for command buffer enqueue"); + assert(MQueue && "Command buffer enqueue should have an associated queue"); // Wait on host command dependencies waitForPreparedHostEvents(); @@ -2941,7 +2927,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::Kernel: { - assert(MQueue && "Device queue must be present for kernel command"); + assert(MQueue && "Kernel submissions should have an associated queue"); CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get(); NDRDescT &NDRDesc = ExecKernel->MNDRDesc; @@ -3094,7 +3080,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::Barrier: { - assert(MQueue && "Device queue must be present for barrier command"); + assert(MQueue && "Barrier submission should have an associated queue"); const PluginPtr &Plugin = MQueue->getPlugin(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); @@ -3105,7 +3091,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { } case CG::CGTYPE::BarrierWaitlist: { assert(MQueue && - "Device queue must be present for barrier with wait list command"); + "Barrier submission should have an associated queue"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; std::vector PiEvents = @@ -3173,7 +3159,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { typeSize, RawEvents, EventImpl, read); } case CG::CGTYPE::ExecCommandBuffer: { - assert(MQueue && "Device queue must be present for command buffer enqueue"); + assert(MQueue && "Command buffer submissions should have an associated queue"); CGExecCommandBuffer *CmdBufferCG = static_cast(MCommandGroup.get()); if (MEvent != nullptr) @@ -3197,7 +3183,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::SemaphoreWait: { - assert(MQueue && "Device queue must be present for semaphore wait command"); + assert(MQueue && "Semaphore wait submissions should have an associated queue"); CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get(); const detail::PluginPtr &Plugin = MQueue->getPlugin(); @@ -3211,7 +3197,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { } case CG::CGTYPE::SemaphoreSignal: { assert(MQueue && - "Device queue must be present for semaphore signal command"); + "Semaphore signal submissions should have an associated queue"); CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get(); const detail::PluginPtr &Plugin = MQueue->getPlugin(); @@ -3349,7 +3335,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "KERNEL FUSION on " << queueDeviceToString(MQueue) << "\\n" + Stream << "KERNEL FUSION on " << queueDeviceToString(MQueue.get()) << "\\n" << "FUSION LIST: {"; bool Initial = true; for (auto *Cmd : MFusionList) { diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 7cfc0446fdd69..284985b2f9c16 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -678,7 +678,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) { if (const char *HUMConfig = SYCLConfig::get()) { if (std::strcmp(HUMConfig, "0") == 0) - return false; + return Ctx == nullptr; if (std::strcmp(HUMConfig, "1") == 0) return true; } @@ -768,7 +768,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // new one. There could be situations when we could setup link with // "not" current allocation, but it will require memory copy. // Can setup link between cl and host allocations only - if ((Context != nullptr) != (Record->MCurContext != nullptr)) { + if ((Context == nullptr) != (Record->MCurContext == nullptr)) { // Linked commands assume that the host allocation is reused by the // plugin runtime and that can lead to unnecessary copy overhead on // devices that do not support host unified memory. Do not link the diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 4acc5b6c3a6a4..a14af63b1a2a0 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -697,7 +697,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context, // don't represent actual dependencies. Calling getContextImpl() would set // their context, which we wish to avoid as it is expensive. // NOP events also don't represent actual dependencies. - if ((SyclEventImplPtr->isDefaultConstructed()) || SyclEventImplPtr->isNOP()) { + if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) { return true; } if (SyclEventImplPtr->isHost()) { diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp index c08e620b0583d..ed629b39b9be0 100644 --- a/sycl/source/detail/xpti_registry.cpp +++ b/sycl/source/detail/xpti_registry.cpp @@ -8,6 +8,7 @@ #include #include +#include #ifdef XPTI_ENABLE_INSTRUMENTATION #include "xpti/xpti_trace_framework.hpp" @@ -362,6 +363,20 @@ void XPTIRegistry::sampledImageHostAccessorNotification( #endif } +std::string queueDeviceToString(const queue_impl* const &Queue) { + if (!Queue) + return "HOST"; + auto Device = Queue->get_device(); + if (Device.is_cpu()) + return "CPU"; + else if (Device.is_gpu()) + return "GPU"; + else if (Device.is_accelerator()) + return "ACCELERATOR"; + else + return "UNKNOWN"; +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index 681e2841c027b..a66ac46a0cd34 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -319,6 +319,9 @@ class XPTIScope { }; // class XPTIScope #endif +class queue_impl; +std::string queueDeviceToString(const detail::queue_impl* const &Queue); + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp index 393326cb76283..063ebabc1aba5 100644 --- a/sycl/test-e2e/Config/allowlist.cpp +++ b/sycl/test-e2e/Config/allowlist.cpp @@ -83,7 +83,7 @@ int main() { // Expected the allowlist to be set but empty if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) { - for (const sycl::platform &Platform : sycl::platform::get_platforms()) + if (!sycl::platform::get_platforms().empty()) throw std::runtime_error("Expected no device is available"); return 0; } From 954ba8b77e99d017fdaac40417b75da7419a0d11 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 1 Jul 2024 05:22:06 -0700 Subject: [PATCH 49/52] extra code review changes Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 8 ++++---- sycl/source/detail/event_impl.hpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 58a52230f1269..85afb56fcaf9b 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -33,7 +33,7 @@ extern xpti::trace_event_data_t *GSYCLGraphEvent; #endif // If we do not yet have a context, use the default one. -void event_impl::tryToInitContext() { +void event_impl::initContextIfNeeded() { if (MContext || !MIsDefaultConstructed) return; @@ -114,12 +114,12 @@ const sycl::detail::pi::PiEvent &event_impl::getHandleRef() const { sycl::detail::pi::PiEvent &event_impl::getHandleRef() { return MEvent; } const ContextImplPtr &event_impl::getContextImpl() { - tryToInitContext(); + initContextIfNeeded(); return MContext; } const PluginPtr &event_impl::getPlugin() { - tryToInitContext(); + initContextIfNeeded(); return MContext->getPlugin(); } @@ -456,7 +456,7 @@ void HostProfilingInfo::end() { EndTime = getTimestamp(); } pi_native_handle event_impl::getNative() { if (isHost()) return {}; - tryToInitContext(); + initContextIfNeeded(); auto Plugin = getPlugin(); if (MIsDefaultConstructed && !MEvent) { diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index f4c2ac2e90a86..e52ac40ad78d7 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -274,7 +274,7 @@ class event_impl { ContextImplPtr getContextImplPtr() { if (MIsDefaultConstructed) - tryToInitContext(); + initContextIfNeeded(); return MContext; } @@ -400,7 +400,7 @@ class event_impl { // Events constructed without a context will lazily use the default context // when needed. - void tryToInitContext(); + void initContextIfNeeded(); // Event class represents 3 different kinds of operations: // | type | has PI event | MContext | MIsHostTask | MIsDefaultConstructed | // | dev | true | !nullptr | false | false | From 3fb26e0fdc88ee470b6a360f0fda3f3a35137b9c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 1 Jul 2024 05:35:49 -0700 Subject: [PATCH 50/52] fix format Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/handler.hpp | 8 ++++---- sycl/source/detail/queue_impl.cpp | 3 +-- sycl/source/detail/scheduler/commands.cpp | 12 +++++++----- sycl/source/detail/scheduler/graph_builder.cpp | 5 ++--- sycl/source/detail/scheduler/scheduler.cpp | 5 ++--- sycl/source/detail/xpti_registry.cpp | 4 ++-- sycl/source/detail/xpti_registry.hpp | 2 +- sycl/source/handler.cpp | 15 +++++++-------- sycl/test-e2e/Config/allowlist.cpp | 2 +- .../scheduler/EnqueueWithDependsOnDeps.cpp | 8 ++++---- 10 files changed, 31 insertions(+), 33 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index ec59dc8aece7c..61b23ffd707d5 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -488,8 +488,8 @@ class __SYCL_EXPORT handler { /// \param IsHost indicates if this handler is created for SYCL host device. /// \param CallerNeedsEvent indicates if the event resulting from this handler /// is needed by the caller. - handler(std::shared_ptr Queue, bool /* ABI break: remove */, - bool CallerNeedsEvent); + handler(std::shared_ptr Queue, + bool /* ABI break: remove */, bool CallerNeedsEvent); /// Constructs SYCL handler from the associated queue and the submission's /// primary and secondary queue. @@ -504,8 +504,8 @@ class __SYCL_EXPORT handler { /// is needed by the caller. handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue, bool /* ABI break: remove */, - bool CallerNeedsEvent); + std::shared_ptr SecondaryQueue, + bool /* ABI break: remove */, bool CallerNeedsEvent); /// Constructs SYCL handler from Graph. /// diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 45ca3aa0b2291..588254743701f 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -354,8 +354,7 @@ event queue_impl::submit_impl(const std::function &CGF, bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess) { - handler Handler(Self, PrimaryQueue, SecondaryQueue, false, - CallerNeedsEvent); + handler Handler(Self, PrimaryQueue, SecondaryQueue, false, CallerNeedsEvent); Handler.saveCodeLoc(Loc); { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 0b7f38d6e429d..38aa77e0c92ed 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -80,7 +80,8 @@ static size_t deviceToID(const device &Device) { } static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) { - xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue.get())); + xpti::addMetadata(TraceEvent, "sycl_device_type", + queueDeviceToString(Queue.get())); if (Queue) { xpti::addMetadata(TraceEvent, "sycl_device", deviceToID(Queue->get_device())); @@ -3099,8 +3100,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::BarrierWaitlist: { - assert(MQueue && - "Barrier submission should have an associated queue"); + assert(MQueue && "Barrier submission should have an associated queue"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; std::vector PiEvents = @@ -3168,7 +3168,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { typeSize, RawEvents, EventImpl, read); } case CG::CGTYPE::ExecCommandBuffer: { - assert(MQueue && "Command buffer submissions should have an associated queue"); + assert(MQueue && + "Command buffer submissions should have an associated queue"); CGExecCommandBuffer *CmdBufferCG = static_cast(MCommandGroup.get()); if (MEvent != nullptr) @@ -3192,7 +3193,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } case CG::CGTYPE::SemaphoreWait: { - assert(MQueue && "Semaphore wait submissions should have an associated queue"); + assert(MQueue && + "Semaphore wait submissions should have an associated queue"); CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get(); const detail::PluginPtr &Plugin = MQueue->getPlugin(); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index dcd4a0aa96dce..f8397016fce41 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1339,9 +1339,8 @@ Command *Scheduler::GraphBuilder::connectDepEvent( /* DepEvents = */ {DepEvent}), CG::CodeplayHostTask, /* Payload */ {})); - ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), nullptr, - /*EventNeeded=*/true); + ConnectCmd = new ExecCGCommand(std::move(ConnectCG), nullptr, + /*EventNeeded=*/true); } catch (const std::bad_alloc &) { throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index cea700a311b7d..fbea6f14dea3d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -115,9 +115,8 @@ EventImplPtr Scheduler::addCG( NewEvent = NewCmd->getEvent(); break; case CG::CodeplayHostTask: { - auto Result = - MGraphBuilder.addCG(std::move(CommandGroup), nullptr, - AuxiliaryCmds, EventNeeded); + auto Result = MGraphBuilder.addCG(std::move(CommandGroup), nullptr, + AuxiliaryCmds, EventNeeded); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp index ed629b39b9be0..1884f5cd34265 100644 --- a/sycl/source/detail/xpti_registry.cpp +++ b/sycl/source/detail/xpti_registry.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include -#include #include +#include #ifdef XPTI_ENABLE_INSTRUMENTATION #include "xpti/xpti_trace_framework.hpp" @@ -363,7 +363,7 @@ void XPTIRegistry::sampledImageHostAccessorNotification( #endif } -std::string queueDeviceToString(const queue_impl* const &Queue) { +std::string queueDeviceToString(const queue_impl *const &Queue) { if (!Queue) return "HOST"; auto Device = Queue->get_device(); diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index a66ac46a0cd34..356679a75c2fb 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -320,7 +320,7 @@ class XPTIScope { #endif class queue_impl; -std::string queueDeviceToString(const detail::queue_impl* const &Queue); +std::string queueDeviceToString(const detail::queue_impl *const &Queue); } // namespace detail } // namespace _V1 diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 011d3c4efce22..72277bb39ed31 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -87,8 +87,7 @@ handler::handler(std::shared_ptr Queue, bool) /// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue, - bool) + std::shared_ptr SecondaryQueue, bool) : handler(Queue, PrimaryQueue, SecondaryQueue, false, /*CallerNeedsEvent=*/true) {} @@ -98,8 +97,8 @@ handler::handler(std::shared_ptr Queue, bool, handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, - std::shared_ptr SecondaryQueue, - bool, bool CallerNeedsEvent) + std::shared_ptr SecondaryQueue, bool, + bool CallerNeedsEvent) : MImpl(std::make_shared(std::move(PrimaryQueue), std::move(SecondaryQueue), CallerNeedsEvent)), @@ -287,10 +286,10 @@ event handler::finalize() { detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); #endif - Result = enqueueImpKernel( - MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, - MKernelName.c_str(), RawEvents, NewEvent, nullptr, - MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative); + Result = enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, + MKernel, MKernelName.c_str(), RawEvents, + NewEvent, nullptr, MImpl->MKernelCacheConfig, + MImpl->MKernelIsCooperative); #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit signal only when event is created if (NewEvent != nullptr) { diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp index 063ebabc1aba5..56dfbc081fb06 100644 --- a/sycl/test-e2e/Config/allowlist.cpp +++ b/sycl/test-e2e/Config/allowlist.cpp @@ -83,7 +83,7 @@ int main() { // Expected the allowlist to be set but empty if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) { - if (!sycl::platform::get_platforms().empty()) + if (!sycl::platform::get_platforms().empty()) throw std::runtime_error("Expected no device is available"); return 0; } diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 5ad8a17af15d9..31d4e92bf89a8 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -78,10 +78,10 @@ class DependsOnTests : public ::testing::Test { std::unique_ptr CmdGroup = MockCGH.finalize(); - detail::Command *NewCmd = MS.addCG( - std::move(CmdGroup), - Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl, - ToEnqueue, /*EventNeeded=*/true); + detail::Command *NewCmd = + MS.addCG(std::move(CmdGroup), + Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl, + ToEnqueue, /*EventNeeded=*/true); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } From 67a546270431a328f5920883732bce9820c394df Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 1 Jul 2024 05:42:16 -0700 Subject: [PATCH 51/52] fix format 2 Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 4e9936fe042fb..123efc3d87af6 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -350,9 +350,7 @@ class queue_impl { bool hasDiscardEventsProperty() const { return MDiscardEvents; } /// \return true if this queue allows for discarded events. - bool supportsDiscardingPiEvents() const { - return MIsInorder; - } + bool supportsDiscardingPiEvents() const { return MIsInorder; } bool isInOrder() const { return MIsInorder; } From 76a073c7d04b31c7952d1ce3f6e9dda37f36e800 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 1 Jul 2024 10:09:15 -0700 Subject: [PATCH 52/52] update win symbols Signed-off-by: Tikhomirova, Kseniya --- sycl/test/abi/sycl_symbols_windows.dump | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 54c7a77403c92..d02be89140c5a 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -569,10 +569,10 @@ ??0half@host_half_impl@detail@_V1@sycl@@QEAA@AEBM@Z ??0half@host_half_impl@detail@_V1@sycl@@QEAA@G@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z -??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N1@Z -??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N1@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z ??0host_selector@_V1@sycl@@QEAA@$$QEAV012@@Z ??0host_selector@_V1@sycl@@QEAA@AEBV012@@Z ??0host_selector@_V1@sycl@@QEAA@XZ @@ -4084,7 +4084,6 @@ ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z -?generateFlushCommand@stream_impl@detail@_V1@sycl@@QEAAXAEAVhandler@34@@Z ?get@context@_V1@sycl@@QEBAPEAU_cl_context@@XZ ?get@device@_V1@sycl@@QEBAPEAU_cl_device_id@@XZ ?get@kernel@_V1@sycl@@QEBAPEAU_cl_kernel@@XZ