From 2f1181544fe549cc976e4965bd6396f837408806 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 13 Oct 2025 23:27:55 +0200 Subject: [PATCH 01/10] Add support for kernel launch properties in no-handler path --- .../sycl/detail/kernel_launch_helper.hpp | 215 +++++++++++++++++- .../oneapi/experimental/enqueue_functions.hpp | 37 +-- sycl/include/sycl/handler.hpp | 151 +++++------- .../sycl/khr/free_function_commands.hpp | 37 ++- sycl/include/sycl/queue.hpp | 80 +++++-- sycl/source/detail/kernel_data.hpp | 66 ++++++ sycl/source/detail/queue_impl.cpp | 7 + sycl/source/detail/queue_impl.hpp | 7 +- sycl/source/handler.cpp | 57 ++--- sycl/source/queue.cpp | 12 +- .../non_esimd_kernel_fp_control.cpp | 4 +- .../include_deps/sycl_detail_core.hpp.cpp | 4 +- .../virtual-functions/properties-negative.cpp | 6 +- 13 files changed, 470 insertions(+), 213 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index a80ddc9feb83..781a992cef03 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -253,21 +256,219 @@ struct KernelWrapper< } }; // KernelWrapper struct -struct KernelLaunchPropertyWrapper { - template - static void parseProperties([[maybe_unused]] PropertyProcessor h, - [[maybe_unused]] const KernelType &KernelFunc) { +// This struct is inherited by sycl::handler. +class KernelLaunchPropertyWrapper { +public: + // This struct is used to store kernel launch properties. + // std::optional is used to indicate that the property is not set. + // In some code paths, kernel launch properties are set multiple times + // for the same kernel, that is why using std::optional to avoid overriding + // previously set properties. + struct KernelLaunchPropertiesT { + + struct ScopeForwardProgressProperty { + std::optional + Guarantee; + std::optional ExecScope; + std::optional + CoordinationScope; + }; + + std::optional MCacheConfig = std::nullopt; + std::optional MIsCooperative = std::nullopt; + std::optional MWorkGroupMemorySize = std::nullopt; + std::optional MUsesClusterLaunch = std::nullopt; + size_t MClusterDims = 0; + std::array MClusterSize = {0, 0, 0}; + + // Forward progress guarantee properties for work_item, sub_group and + // work_group scopes. + // Indexed by ExecutionScope enum. + std::array MForwardProgressProperties; + + KernelLaunchPropertiesT() = default; + + // TODO: Do you even need this? + KernelLaunchPropertiesT( + ur_kernel_cache_config_t _CacheConfig, bool _IsCooperative, + uint32_t _WorkGroupMemorySize, bool _UsesClusterLaunch, + size_t _ClusterDims, std::array _ClusterSize, + std::array _ForwardProgressProperties) + : MCacheConfig(_CacheConfig), MIsCooperative(_IsCooperative), + MWorkGroupMemorySize(_WorkGroupMemorySize), + MUsesClusterLaunch(_UsesClusterLaunch), MClusterDims(_ClusterDims), + MClusterSize(_ClusterSize), + MForwardProgressProperties(_ForwardProgressProperties) {} + }; // struct KernelLaunchPropertiesT + + /// Process runtime kernel properties. + /// + /// Stores information about kernel properties into the handler. + template + static KernelLaunchPropertiesT + processKernelLaunchProperties(PropertiesT Props) { + using namespace sycl::ext::oneapi::experimental; + using namespace sycl::ext::oneapi::experimental::detail; + KernelLaunchPropertiesT retval; + + // Process Kernel cache configuration property. + { + if constexpr (PropertiesT::template has_property< + sycl::ext::intel::experimental::cache_config_key>()) { + auto Config = Props.template get_property< + sycl::ext::intel::experimental::cache_config_key>(); + if (Config == sycl::ext::intel::experimental::large_slm) { + retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + } else if (Config == sycl::ext::intel::experimental::large_data) { + retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + } + } else { + std::ignore = Props; + } + } + + // Process Kernel cooperative property. + { + if constexpr (PropertiesT::template has_property()) + retval.MIsCooperative = true; + } + + // Process device progress properties. + { + using forward_progress = + sycl::ext::oneapi::experimental::forward_progress_guarantee; + if constexpr (PropertiesT::template has_property< + work_group_progress_key>()) { + auto prop = Props.template get_property(); + retval.MForwardProgressProperties[0].Guarantee = prop.guarantee; + retval.MForwardProgressProperties[0].ExecScope = + execution_scope::work_group; + retval.MForwardProgressProperties[0].CoordinationScope = + prop.coordinationScope; + + // If we are here, the device supports the guarantee required but there + // is a caveat in that if the guarantee required is a concurrent + // guarantee, then we most likely also need to enable cooperative launch + // of the kernel. That is, although the device supports the required + // guarantee, some setup work is needed to truly make the device provide + // that guarantee at runtime. Otherwise, we will get the default + // guarantee which is weaker than concurrent. Same reasoning applies for + // sub_group but not for work_item. + // TODO: Further design work is probably needed to reflect this behavior + // in Unified Runtime. + if constexpr (prop.guarantee == forward_progress::concurrent) + retval.MIsCooperative = true; + } + if constexpr (PropertiesT::template has_property< + sub_group_progress_key>()) { + auto prop = Props.template get_property(); + retval.MForwardProgressProperties[1].Guarantee = prop.guarantee; + retval.MForwardProgressProperties[1].ExecScope = + execution_scope::sub_group; + retval.MForwardProgressProperties[1].CoordinationScope = + prop.coordinationScope; + + // Same reasoning as above for work_group applies here. + if constexpr (prop.guarantee == forward_progress::concurrent) + retval.MIsCooperative = true; + } + if constexpr (PropertiesT::template has_property< + work_item_progress_key>()) { + auto prop = Props.template get_property(); + retval.MForwardProgressProperties[2].Guarantee = prop.guarantee; + retval.MForwardProgressProperties[2].ExecScope = + execution_scope::work_item; + retval.MForwardProgressProperties[2].CoordinationScope = + prop.coordinationScope; + } + } + + // Process work group scratch memory property. + { + if constexpr (PropertiesT::template has_property< + work_group_scratch_size>()) { + auto WorkGroupMemSize = + Props.template get_property(); + retval.MWorkGroupMemorySize = WorkGroupMemSize.size; + } + } + + // Parse cluster properties. + { + constexpr std::size_t ClusterDim = getClusterDim(); + if constexpr (ClusterDim > 0) { + + auto ClusterSize = + Props.template get_property>() + .get_cluster_size(); + retval.MUsesClusterLaunch = true; + retval.MClusterDims = ClusterDim; + if (ClusterDim == 1) { + retval.MClusterSize[0] = ClusterSize[0]; + } else if (ClusterDim == 2) { + retval.MClusterSize[0] = ClusterSize[0]; + retval.MClusterSize[1] = ClusterSize[1]; + } else if (ClusterDim == 3) { + retval.MClusterSize[0] = ClusterSize[0]; + retval.MClusterSize[1] = ClusterSize[1]; + retval.MClusterSize[2] = ClusterSize[2]; + } else { + assert(ClusterDim <= 3 && + "Only 1D, 2D, and 3D cluster launch is supported."); + } + } + } + + return retval; + } + + /// Process kernel properties. + /// + /// Stores information about kernel properties into the handler. + /// + /// Note: it is important that this function *does not* depend on kernel + /// name or kernel type, because then it will be instantiated for every + /// kernel, even though body of those instantiated functions could be almost + /// the same, thus unnecessary increasing compilation time. + template < + bool IsESIMDKernel, + typename PropertiesT = ext::oneapi::experimental::empty_properties_t> + static KernelLaunchPropertiesT processKernelProperties(PropertiesT Props) { + static_assert( + ext::oneapi::experimental::is_property_list::value, + "Template type is not a property list."); + static_assert( + !PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() || + (PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() && + IsESIMDKernel), + "Floating point control property is supported for ESIMD kernels only."); + static_assert( + !PropertiesT::template has_property< + sycl::ext::oneapi::experimental::indirectly_callable_key>(), + "indirectly_callable property cannot be applied to SYCL kernels"); + + return processKernelLaunchProperties(Props); + } + + // Returns KernelLaunchPropertiesT or std::nullopt based on whether the + // kernel functor has a get method that returns properties. + template + static std::optional + parseProperties([[maybe_unused]] const KernelType &KernelFunc) { #ifndef __SYCL_DEVICE_ONLY__ // If there are properties provided by get method then process them. if constexpr (ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod::value) { - h->template processProperties< - detail::CompileTimeKernelInfo.IsESIMD>( + return processKernelProperties( KernelFunc.get(ext::oneapi::experimental::properties_tag{})); } #endif + // If there are no properties provided by get method then return empty + // optional. + return std::nullopt; } }; // KernelLaunchPropertyWrapper struct diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index e393bd626d4d..49aa3a252682 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -259,17 +259,12 @@ template void nd_launch(queue Q, nd_range Range, const KernelType &KernelObj, ReductionsT &&...Reductions) { - // TODO The handler-less path does not support reductions, kernel - // function properties and kernel functions with the kernel_handler - // type argument yet. + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. if constexpr (sizeof...(ReductionsT) == 0 && - !(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { - detail::submit_kernel_direct(std::move(Q), empty_properties_t{}, - Range, KernelObj); + detail::submit_kernel_direct(std::move(Q), Range, KernelObj); } else { submit(std::move(Q), [&](handler &CGH) { nd_launch(CGH, Range, KernelObj, @@ -296,13 +291,25 @@ template void nd_launch(queue Q, launch_config, Properties> Config, const KernelType &KernelObj, ReductionsT &&...Reductions) { - // TODO This overload of the nd_launch function takes the kernel function - // properties, which are not yet supported for the handler-less path, - // so it only supports handler based submission for now - submit(std::move(Q), [&](handler &CGH) { - nd_launch(CGH, Config, KernelObj, - std::forward(Reductions)...); - }); + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(ReductionsT) == 0 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + + ext::oneapi::experimental::detail::LaunchConfigAccess, + Properties> + ConfigAccess(Config); + + detail::submit_kernel_direct(std::move(Q), + ConfigAccess.getRange(), KernelObj, + Config.getProperties()); + } else { + submit(std::move(Q), [&](handler &CGH) { + nd_launch(CGH, Config, KernelObj, + std::forward(Reductions)...); + }); + } } template diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 67f21bc05857..78d58c80d6b4 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -877,6 +877,7 @@ class __SYCL_EXPORT handler { } } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, @@ -901,64 +902,10 @@ class __SYCL_EXPORT handler { /// Stores information about kernel properties into the handler. template void processLaunchProperties(PropertiesT Props) { - if constexpr (PropertiesT::template has_property< - sycl::ext::intel::experimental::cache_config_key>()) { - auto Config = Props.template get_property< - sycl::ext::intel::experimental::cache_config_key>(); - if (Config == sycl::ext::intel::experimental::large_slm) { - setKernelCacheConfig(StableKernelCacheConfig::LargeSLM); - } else if (Config == sycl::ext::intel::experimental::large_data) { - setKernelCacheConfig(StableKernelCacheConfig::LargeData); - } - } else { - (void)Props; - } - - constexpr bool UsesRootSync = PropertiesT::template has_property< - sycl::ext::oneapi::experimental::use_root_sync_key>(); - if (UsesRootSync) { - setKernelIsCooperative(UsesRootSync); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - sub_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::sub_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::sub_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_item_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_item_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_item, - prop.coordinationScope); - } - - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_scratch_size>()) { - auto WorkGroupMemSize = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_scratch_size>(); - setKernelWorkGroupMem(WorkGroupMemSize.size); - } - - checkAndSetClusterRange(Props); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = + detail::KernelLaunchPropertyWrapper::processKernelLaunchProperties< + PropertiesT>(Props); + setKernelLaunchProperties(ParsedProp); } /// Process kernel properties. @@ -973,23 +920,12 @@ class __SYCL_EXPORT handler { bool IsESIMDKernel, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> void processProperties(PropertiesT Props) { - static_assert( - ext::oneapi::experimental::is_property_list::value, - "Template type is not a property list."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() || - (PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() && - IsESIMDKernel), - "Floating point control property is supported for ESIMD kernels only."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::oneapi::experimental::indirectly_callable_key>(), - "indirectly_callable property cannot be applied to SYCL kernels"); - - processLaunchProperties(Props); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = + detail::KernelLaunchPropertyWrapper::processKernelProperties< + IsESIMDKernel>(Props); + setKernelLaunchProperties(ParsedProp); } +#endif // INTEL_PREVIEW_BREAKING_CHANGES /// Checks whether it is possible to copy the source shape to the destination /// shape(the shapes are described by the accessor ranges) by using @@ -1297,8 +1233,11 @@ class __SYCL_EXPORT handler { decltype(Wrapper), TransformedArgType, PropertiesT>::wrap(Wrapper); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - Wrapper); + if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties< + KName, Info.IsESIMD>(Wrapper)) { + setKernelLaunchProperties(*prop); + } + #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal(Info.Name); // We are executing over the rounded range, but there are still @@ -1322,11 +1261,17 @@ class __SYCL_EXPORT handler { // kernel is generated detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties< + NameT, Info.IsESIMD>(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal(Info.Name); - processProperties(Props); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + ProcessedProps = + detail::KernelLaunchPropertyWrapper::processKernelProperties< + Info.IsESIMD, PropertiesT>(Props); + setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); StoreLambda( @@ -1355,7 +1300,10 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); - processLaunchProperties(Props); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = + detail::KernelLaunchPropertyWrapper::processKernelLaunchProperties< + PropertiesT>(Props); + setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif } @@ -1378,7 +1326,10 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); - processLaunchProperties(Props); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = + detail::KernelLaunchPropertyWrapper::processKernelLaunchProperties< + PropertiesT>(Props); + setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif } @@ -1395,12 +1346,14 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; (void)Props; + constexpr auto Info = detail::CompileTimeKernelInfo; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties< + NameT, Info.IsESIMD>(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ - constexpr auto Info = detail::CompileTimeKernelInfo; if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(Info); } @@ -1416,7 +1369,11 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - processProperties(Props); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + ProcessedProps = + detail::KernelLaunchPropertyWrapper::processKernelProperties< + Info.IsESIMD, PropertiesT>(Props); + setKernelLaunchProperties(ProcessedProps); #endif } @@ -1439,8 +1396,10 @@ class __SYCL_EXPORT handler { (void)Kernel; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties( + KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ constexpr auto Info = detail::CompileTimeKernelInfo; if constexpr (WrapAsVal == detail::WrapAs::single_task) { @@ -1467,7 +1426,11 @@ class __SYCL_EXPORT handler { "the kernel name must match the name of the lambda"); } StoreLambda(std::move(KernelFunc)); - processProperties(Props); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + ProcessedProps = + detail::KernelLaunchPropertyWrapper::processKernelProperties< + Info.IsESIMD, PropertiesT>(Props); + setKernelLaunchProperties(ProcessedProps); #endif } #endif // __INTEL_PREVIEW_BREAKING_CHANGES @@ -3490,7 +3453,9 @@ class __SYCL_EXPORT handler { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset); - // Changing values in this will break ABI/API. +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Modeled after ur_kernel_cache_config_t + // Used as an argument to setKernelCacheConfig that's part of the ABI. enum class StableKernelCacheConfig : int32_t { Default = 0, LargeSLM = 1, @@ -3503,15 +3468,18 @@ class __SYCL_EXPORT handler { void setKernelIsCooperative(bool); // Set using cuda thread block cluster launch flag and set the launch bounds. -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); -#endif void setKernelClusterLaunch(sycl::range<3> ClusterSize); void setKernelClusterLaunch(sycl::range<2> ClusterSize); void setKernelClusterLaunch(sycl::range<1> ClusterSize); // Set the request work group memory size (work_group_static ext). void setKernelWorkGroupMem(size_t Size); +#endif + + void setKernelLaunchProperties( + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + &KernelLaunchProperties); // Various checks that are only meaningful for host compilation, because they // result in runtime errors (i.e. exceptions being thrown). To save time @@ -3668,7 +3636,6 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; - friend struct detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index b04fac17a6f9..b422964f3a3a 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -157,16 +157,15 @@ template r, range<1> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. if constexpr (!(ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod< const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<1>>::value)) { - detail::submit_kernel_direct( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<1>(r, size), std::forward(k)); + detail::submit_kernel_direct(q, nd_range<1>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -178,16 +177,12 @@ template r, range<2> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT< + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<2>>::value)) { - detail::submit_kernel_direct( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<2>(r, size), std::forward(k)); + detail::submit_kernel_direct(q, nd_range<2>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -199,16 +194,12 @@ template r, range<3> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT< + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<3>>::value)) { - detail::submit_kernel_direct( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<3>(r, size), std::forward(k)); + detail::submit_kernel_direct(q, nd_range<3>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 755b07f39bbf..8cdf35f50a93 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -68,6 +68,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -75,6 +76,7 @@ void __SYCL_EXPORT submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); namespace detail { @@ -158,17 +160,13 @@ class __SYCL_EXPORT SubmissionInfo { } // namespace v1 template + typename PropertiesT = ext::oneapi::experimental::empty_properties_t, + typename KernelTypeUniversalRef, int Dims> auto submit_kernel_direct( - const queue &Queue, PropertiesT Props, const nd_range &Range, + const queue &Queue, const nd_range &Range, KernelTypeUniversalRef &&KernelFunc, + PropertiesT ExtraProps = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { - // TODO Properties not supported yet - (void)Props; - static_assert( - std::is_same_v, - "Setting properties not supported yet for no-CGH kernel submit."); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = @@ -194,6 +192,32 @@ auto submit_kernel_direct( HostKernelRef HostKernel(std::forward(KernelFunc)); + // Get Kernel Launch properties. User can specify properties either + // via specifying get(property_tag{}) method in kernel type or by using + // launch_config API or by explicitly passing them in call to + // parallel_for (deprecated API). + // ExtraProps are properties passed explicitly or via launch_config. + + // Asumption: If user specify properties via launch_config or explicitly + // then we don't check for properties specified via get() method. + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT parsedProps; + if constexpr (std::is_same_v) { + // Use properties passed via. get() method. + if constexpr (ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod::value) { + auto prop = KernelFunc.get(ext::oneapi::experimental::properties_tag{}); + parsedProps = + detail::KernelLaunchPropertyWrapper::processKernelProperties( + prop); + } + } else { + // Use ExtraProps + parsedProps = + detail::KernelLaunchPropertyWrapper::processKernelProperties( + ExtraProps); + } + // Instantiating the kernel on the host improves debugging. // Passing this pointer to another translation unit prevents optimization. #ifndef NDEBUG @@ -207,11 +231,11 @@ auto submit_kernel_direct( if constexpr (EventNeeded) { return submit_kernel_direct_with_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, Range, HostKernel, DeviceKernelInfoPtr, parsedProps, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } else { submit_kernel_direct_without_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, Range, HostKernel, DeviceKernelInfoPtr, parsedProps, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } } @@ -3261,11 +3285,24 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.template parallel_for(Range, Properties, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + + // FIXME: Can it happen that user defined both get() and properties? + // If so, we should use MergedProperties instead of Properties here. + return detail::submit_kernel_direct( + *this, Range, Rest..., Properties, TlsCodeLocCapture.query()); + } else + return submit( + [&](handler &CGH) { + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); } /// parallel_for version with a kernel represented as a lambda + nd_range that @@ -3282,18 +3319,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = std::tuple_element_t<0, std::tuple>; - // TODO The handler-less path does not support reductions, kernel - // function properties and kernel functions with the kernel_handler - // type argument yet. + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. if constexpr (sizeof...(RestT) == 1 && - !(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct( - *this, ext::oneapi::experimental::empty_properties_t{}, Range, - Rest..., TlsCodeLocCapture.query()); + *this, Range, Rest..., + ext::oneapi::experimental::empty_properties_t{}, + TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { diff --git a/sycl/source/detail/kernel_data.hpp b/sycl/source/detail/kernel_data.hpp index 7ba849dc33f1..037341b9c069 100644 --- a/sycl/source/detail/kernel_data.hpp +++ b/sycl/source/detail/kernel_data.hpp @@ -13,7 +13,10 @@ #include #include +#include + #include +#include #include @@ -107,6 +110,7 @@ class KernelData { void setDeviceKernelInfoPtr(DeviceKernelInfo *Ptr) { MDeviceKernelInfoPtr = Ptr; } + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelInfo(void *KernelFuncPtr, int KernelNumArgs, KernelParamDescGetterT KernelParamDescGetter, @@ -134,6 +138,7 @@ class KernelData { return MDeviceKernelInfoPtr->usesAssert(); } + // Kernel launch properties getter and setters. ur_kernel_cache_config_t getKernelCacheConfig() const { return MKernelCacheConfig; } @@ -163,6 +168,67 @@ class KernelData { MKernelWorkGroupMemorySize = Size; } + void validateAndSetKernelLaunchProperties( + const KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Kprop, + bool HasGraph, const device_impl &dev) { + + // Validate properties before setting. + { + if (HasGraph) { + if (Kprop.MWorkGroupMemorySize) { + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "Setting work group scratch memory size is not yet supported " + "for use with the SYCL Graph extension."); + } + + if (Kprop.MUsesClusterLaunch && *Kprop.MUsesClusterLaunch) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Cluster launch is not yet supported " + "for use with the SYCL Graph extension."); + } + } + + for (int i = 0; i < 3; i++) { + if (Kprop.MForwardProgressProperties[i].Guarantee.has_value()) { + + if (!dev.supportsForwardProgress( + *Kprop.MForwardProgressProperties[i].Guarantee, + *Kprop.MForwardProgressProperties[i].ExecScope, + *Kprop.MForwardProgressProperties[i].CoordinationScope)) { + // TODO: Make the error message more descriptive. + throw sycl::exception( + sycl::make_error_code(errc::feature_not_supported), + "The device associated with the queue does not support the " + "requested forward progress guarantee."); + } + } + } + } + + // Set properties. + if (Kprop.MIsCooperative) + setCooperative(*Kprop.MIsCooperative); + + if (Kprop.MCacheConfig) + setKernelCacheConfig(*Kprop.MCacheConfig); + + if (Kprop.MWorkGroupMemorySize) + setKernelWorkGroupMemorySize(*Kprop.MWorkGroupMemorySize); + + if (Kprop.MUsesClusterLaunch && *Kprop.MUsesClusterLaunch) { + if (Kprop.MClusterDims == 1) + setClusterDimensions(sycl::range<1>{Kprop.MClusterSize[0]}); + else if (Kprop.MClusterDims == 2) + setClusterDimensions( + sycl::range<2>{Kprop.MClusterSize[0], Kprop.MClusterSize[1]}); + else if (Kprop.MClusterDims == 3) + setClusterDimensions(sycl::range<3>{Kprop.MClusterSize[0], + Kprop.MClusterSize[1], + Kprop.MClusterSize[2]}); + } + } + KernelNameStrRefT getKernelName() const { assert(MDeviceKernelInfoPtr); return static_cast(MDeviceKernelInfoPtr->Name); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 4ed73e700d8c..c1e0452796fb 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -567,6 +567,7 @@ EventImplPtr queue_impl::submit_command_to_graph( EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { KernelData KData; @@ -574,6 +575,12 @@ EventImplPtr queue_impl::submit_kernel_direct_impl( KData.setDeviceKernelInfoPtr(DeviceKernelInfo); KData.setNDRDesc(NDRDesc); + // Validate and set kernel launch properties. + KData.validateAndSetKernelLaunchProperties( + Props, getCommandGraph() != nullptr, /*HasGraph?*/ + getDeviceImpl() /*device_impl*/ + ); + auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData, bool SchedulerBypass) -> EventImplPtr { if (SchedulerBypass) { diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 49da7aee8c44..4528752ba95f 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -363,10 +363,11 @@ class queue_impl : public std::enable_shared_from_this { event submit_kernel_direct_with_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::EventImplPtr EventImpl = submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - true, CodeLoc, IsTopCodeLoc); + true, Props, CodeLoc, IsTopCodeLoc); return createSyclObjFromImpl(EventImpl); } @@ -374,9 +375,10 @@ class queue_impl : public std::enable_shared_from_this { void submit_kernel_direct_without_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - false, CodeLoc, IsTopCodeLoc); + false, Props, CodeLoc, IsTopCodeLoc); } void submit_without_event(const detail::type_erased_cgfo_ty &CGF, @@ -929,6 +931,7 @@ class queue_impl : public std::enable_shared_from_this { EventImplPtr submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 26477c99be62..2aacd56fda15 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1752,51 +1752,17 @@ static bool checkContextSupports(detail::context_impl &ContextImpl, return SupportsOp; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { - using execution_scope = sycl::ext::oneapi::experimental::execution_scope; - using forward_progress = - sycl::ext::oneapi::experimental::forward_progress_guarantee; - const bool supported = impl->get_device().supportsForwardProgress( - guarantee, threadScope, coordinationScope); - if (threadScope == execution_scope::work_group) { - if (!supported) { - throw sycl::exception( - sycl::errc::feature_not_supported, - "Required progress guarantee for work groups is not " - "supported by this device."); - } - // If we are here, the device supports the guarantee required but there is a - // caveat in that if the guarantee required is a concurrent guarantee, then - // we most likely also need to enable cooperative launch of the kernel. That - // is, although the device supports the required guarantee, some setup work - // is needed to truly make the device provide that guarantee at runtime. - // Otherwise, we will get the default guarantee which is weaker than - // concurrent. Same reasoning applies for sub_group but not for work_item. - // TODO: Further design work is probably needed to reflect this behavior in - // Unified Runtime. - if (guarantee == forward_progress::concurrent) - setKernelIsCooperative(true); - } else if (threadScope == execution_scope::sub_group) { - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for sub groups is not " - "supported by this device."); - } - // Same reasoning as above. - if (guarantee == forward_progress::concurrent) - setKernelIsCooperative(true); - } else { // threadScope is execution_scope::work_item otherwise undefined - // behavior - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for work items is not " - "supported by this device."); - } - } + + // FIXME! + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT Kprop; + setKernelLaunchProperties(Kprop); } +#endif bool handler::supportsUSMMemcpy2D() { if (impl->get_graph_or_null()) @@ -1910,6 +1876,13 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest, }); } +void handler::setKernelLaunchProperties( + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Kprop) { + impl->MKernelData.validateAndSetKernelLaunchProperties( + Kprop, getCommandGraph() != nullptr /*hasGraph?*/, + impl->get_device() /*device_impl*/); +} + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES const std::shared_ptr & handler::getContextImplPtr() const { @@ -1927,6 +1900,7 @@ detail::context_impl &handler::getContextImpl() const { return impl->get_queue().getContextImpl(); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { switch (Config) { case handler::StableKernelCacheConfig::Default: @@ -1945,7 +1919,6 @@ void handler::setKernelIsCooperative(bool KernelIsCooperative) { impl->MKernelData.setCooperative(KernelIsCooperative); } -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: @@ -1961,7 +1934,6 @@ void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { impl->MKernelData.setClusterDimensions(ClusterSize); } } -#endif void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< @@ -1989,6 +1961,7 @@ void handler::setKernelWorkGroupMem(size_t Size) { sycl_ext_oneapi_work_group_scratch_memory>(); impl->MKernelData.setKernelWorkGroupMemorySize(Size); } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES void handler::ext_oneapi_graph( ext::oneapi::experimental::command_graph< diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index f34da4785226..73a53a01abda 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -476,27 +476,31 @@ event submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); } template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -504,27 +508,31 @@ void submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); } template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); } // namespace _V1 diff --git a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp index 46d11eccdfe5..e6910484bf52 100644 --- a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp +++ b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp @@ -20,7 +20,7 @@ struct ESIMDKernel { int main(void) { queue q; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} syclex::properties properties7{ intelex::fp_control}; @@ -28,7 +28,7 @@ int main(void) { cgh.single_task(properties7, [=]() {}); }); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} ESIMDKernel Kern; q.submit([&](handler &cgh) { cgh.parallel_for(range<1>(1), Kern); }); diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index cf98e8708254..9f68a2bbe4d9 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -138,6 +138,8 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp @@ -149,9 +151,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: feature_test.hpp diff --git a/sycl/test/virtual-functions/properties-negative.cpp b/sycl/test/virtual-functions/properties-negative.cpp index b8e1b75f1d9a..0ef06b3652ad 100644 --- a/sycl/test/virtual-functions/properties-negative.cpp +++ b/sycl/test/virtual-functions/properties-negative.cpp @@ -17,15 +17,15 @@ int main() { oneapi::properties props_int{oneapi::indirectly_callable_in}; oneapi::properties props_user{oneapi::indirectly_callable_in}; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_empty, [=]() {}); // When both "props_empty" and "props_void" are in use, we won't see the // static assert firing for the second one, because there will be only one // instantiation of handler::processProperties. q.single_task(props_void, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_int, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_user, [=]() {}); return 0; From 012c7931acf3fee4409bf86e01fe9b2a9c05be60 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 14 Oct 2025 00:16:14 +0200 Subject: [PATCH 02/10] Minor fixes --- .../sycl/detail/kernel_launch_helper.hpp | 30 +++---------- sycl/source/detail/kernel_data.hpp | 3 +- sycl/source/handler.cpp | 43 +++++++++++++++++-- 3 files changed, 47 insertions(+), 29 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 781a992cef03..bd203d58850c 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -256,7 +256,8 @@ struct KernelWrapper< } }; // KernelWrapper struct -// This struct is inherited by sycl::handler. +// This class encapsulates everything related to parsing kernel launch +// properties. class KernelLaunchPropertyWrapper { public: // This struct is used to store kernel launch properties. @@ -264,8 +265,9 @@ class KernelLaunchPropertyWrapper { // In some code paths, kernel launch properties are set multiple times // for the same kernel, that is why using std::optional to avoid overriding // previously set properties. + // This struct is used to pass kernel launch properties across the ABI + // boundary. struct KernelLaunchPropertiesT { - struct ScopeForwardProgressProperty { std::optional Guarantee; @@ -282,28 +284,11 @@ class KernelLaunchPropertyWrapper { std::array MClusterSize = {0, 0, 0}; // Forward progress guarantee properties for work_item, sub_group and - // work_group scopes. - // Indexed by ExecutionScope enum. + // work_group scopes. We need to store them for validation later. std::array MForwardProgressProperties; - - KernelLaunchPropertiesT() = default; - - // TODO: Do you even need this? - KernelLaunchPropertiesT( - ur_kernel_cache_config_t _CacheConfig, bool _IsCooperative, - uint32_t _WorkGroupMemorySize, bool _UsesClusterLaunch, - size_t _ClusterDims, std::array _ClusterSize, - std::array _ForwardProgressProperties) - : MCacheConfig(_CacheConfig), MIsCooperative(_IsCooperative), - MWorkGroupMemorySize(_WorkGroupMemorySize), - MUsesClusterLaunch(_UsesClusterLaunch), MClusterDims(_ClusterDims), - MClusterSize(_ClusterSize), - MForwardProgressProperties(_ForwardProgressProperties) {} - }; // struct KernelLaunchPropertiesT + }; /// Process runtime kernel properties. - /// - /// Stores information about kernel properties into the handler. template static KernelLaunchPropertiesT processKernelLaunchProperties(PropertiesT Props) { @@ -423,9 +408,6 @@ class KernelLaunchPropertyWrapper { } /// Process kernel properties. - /// - /// Stores information about kernel properties into the handler. - /// /// Note: it is important that this function *does not* depend on kernel /// name or kernel type, because then it will be instantiated for every /// kernel, even though body of those instantiated functions could be almost diff --git a/sycl/source/detail/kernel_data.hpp b/sycl/source/detail/kernel_data.hpp index 037341b9c069..d73229a909fa 100644 --- a/sycl/source/detail/kernel_data.hpp +++ b/sycl/source/detail/kernel_data.hpp @@ -8,13 +8,12 @@ #pragma once +#include #include #include #include #include -#include - #include #include diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 2aacd56fda15..619f8b121aa6 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1758,9 +1758,46 @@ void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { - // FIXME! - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT Kprop; - setKernelLaunchProperties(Kprop); + using execution_scope = sycl::ext::oneapi::experimental::execution_scope; + using forward_progress = + sycl::ext::oneapi::experimental::forward_progress_guarantee; + const bool supported = impl->get_device().supportsForwardProgress( + guarantee, threadScope, coordinationScope); + if (threadScope == execution_scope::work_group) { + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for work groups is not " + "supported by this device."); + } + // If we are here, the device supports the guarantee required but there is a + // caveat in that if the guarantee required is a concurrent guarantee, then + // we most likely also need to enable cooperative launch of the kernel. That + // is, although the device supports the required guarantee, some setup work + // is needed to truly make the device provide that guarantee at runtime. + // Otherwise, we will get the default guarantee which is weaker than + // concurrent. Same reasoning applies for sub_group but not for work_item. + // TODO: Further design work is probably needed to reflect this behavior in + // Unified Runtime. + if (guarantee == forward_progress::concurrent) + setKernelIsCooperative(true); + } else if (threadScope == execution_scope::sub_group) { + if (!supported) { + throw sycl::exception(sycl::errc::feature_not_supported, + "Required progress guarantee for sub groups is not " + "supported by this device."); + } + // Same reasoning as above. + if (guarantee == forward_progress::concurrent) + setKernelIsCooperative(true); + } else { // threadScope is execution_scope::work_item otherwise undefined + // behavior + if (!supported) { + throw sycl::exception(sycl::errc::feature_not_supported, + "Required progress guarantee for work items is not " + "supported by this device."); + } + } } #endif From 6f12dff1235c756d4116358d4aea61628b91af0b Mon Sep 17 00:00:00 2001 From: Udit Kumar Agarwal Date: Mon, 13 Oct 2025 15:17:44 -0700 Subject: [PATCH 03/10] Update sycl/include/sycl/queue.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sycl/include/sycl/queue.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 8cdf35f50a93..24edef878a3b 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -198,7 +198,7 @@ auto submit_kernel_direct( // parallel_for (deprecated API). // ExtraProps are properties passed explicitly or via launch_config. - // Asumption: If user specify properties via launch_config or explicitly + // Assumption: If user specify properties via launch_config or explicitly // then we don't check for properties specified via get() method. detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT parsedProps; if constexpr (std::is_same_v Date: Wed, 15 Oct 2025 01:11:32 +0200 Subject: [PATCH 04/10] Address feedback --- .../sycl/detail/kernel_launch_helper.hpp | 330 ++++++++---------- .../oneapi/experimental/enqueue_functions.hpp | 8 +- sycl/include/sycl/handler.hpp | 54 ++- sycl/include/sycl/queue.hpp | 14 +- sycl/source/detail/kernel_data.hpp | 83 +++-- sycl/source/detail/queue_impl.cpp | 2 +- sycl/source/detail/queue_impl.hpp | 6 +- sycl/source/handler.cpp | 2 +- sycl/source/queue.cpp | 16 +- 9 files changed, 247 insertions(+), 268 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index bd203d58850c..05b76bd0e1b5 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -256,203 +256,179 @@ struct KernelWrapper< } }; // KernelWrapper struct -// This class encapsulates everything related to parsing kernel launch +// This namespace encapsulates everything related to parsing kernel launch // properties. -class KernelLaunchPropertyWrapper { -public: - // This struct is used to store kernel launch properties. - // std::optional is used to indicate that the property is not set. - // In some code paths, kernel launch properties are set multiple times - // for the same kernel, that is why using std::optional to avoid overriding - // previously set properties. - // This struct is used to pass kernel launch properties across the ABI - // boundary. - struct KernelLaunchPropertiesT { - struct ScopeForwardProgressProperty { - std::optional - Guarantee; - std::optional ExecScope; - std::optional - CoordinationScope; - }; - - std::optional MCacheConfig = std::nullopt; - std::optional MIsCooperative = std::nullopt; - std::optional MWorkGroupMemorySize = std::nullopt; - std::optional MUsesClusterLaunch = std::nullopt; - size_t MClusterDims = 0; - std::array MClusterSize = {0, 0, 0}; +inline namespace kernel_launch_properties_v1 { + +// This struct is used to store kernel launch properties. +// std::optional is used to indicate that the property is not set. +// This struct is used to pass kernel launch properties across the ABI +// boundary. +struct KernelLaunchPropertiesTy { + // Modeled after ur_kernel_cache_config_t + enum class StableKernelCacheConfig : int32_t { + Default = 0, + LargeSLM = 1, + LargeData = 2 + }; - // Forward progress guarantee properties for work_item, sub_group and - // work_group scopes. We need to store them for validation later. - std::array MForwardProgressProperties; + struct ScopeForwardProgressProperty { + std::optional + Guarantee; + std::optional ExecScope; + std::optional + CoordinationScope; }; - /// Process runtime kernel properties. - template - static KernelLaunchPropertiesT - processKernelLaunchProperties(PropertiesT Props) { - using namespace sycl::ext::oneapi::experimental; - using namespace sycl::ext::oneapi::experimental::detail; - KernelLaunchPropertiesT retval; - - // Process Kernel cache configuration property. - { - if constexpr (PropertiesT::template has_property< - sycl::ext::intel::experimental::cache_config_key>()) { - auto Config = Props.template get_property< - sycl::ext::intel::experimental::cache_config_key>(); - if (Config == sycl::ext::intel::experimental::large_slm) { - retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; - } else if (Config == sycl::ext::intel::experimental::large_data) { - retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; - } - } else { - std::ignore = Props; - } - } + std::optional MCacheConfig = std::nullopt; + std::optional MIsCooperative = std::nullopt; + std::optional MWorkGroupMemorySize = std::nullopt; + std::optional MUsesClusterLaunch = std::nullopt; + size_t MClusterDims = 0; + std::array MClusterSize = {0, 0, 0}; - // Process Kernel cooperative property. - { - if constexpr (PropertiesT::template has_property()) - retval.MIsCooperative = true; - } + // Forward progress guarantee properties for work_item, sub_group and + // work_group scopes. We need to store them for validation later. + std::array MForwardProgressProperties; +}; - // Process device progress properties. - { - using forward_progress = - sycl::ext::oneapi::experimental::forward_progress_guarantee; - if constexpr (PropertiesT::template has_property< - work_group_progress_key>()) { - auto prop = Props.template get_property(); - retval.MForwardProgressProperties[0].Guarantee = prop.guarantee; - retval.MForwardProgressProperties[0].ExecScope = - execution_scope::work_group; - retval.MForwardProgressProperties[0].CoordinationScope = - prop.coordinationScope; - - // If we are here, the device supports the guarantee required but there - // is a caveat in that if the guarantee required is a concurrent - // guarantee, then we most likely also need to enable cooperative launch - // of the kernel. That is, although the device supports the required - // guarantee, some setup work is needed to truly make the device provide - // that guarantee at runtime. Otherwise, we will get the default - // guarantee which is weaker than concurrent. Same reasoning applies for - // sub_group but not for work_item. - // TODO: Further design work is probably needed to reflect this behavior - // in Unified Runtime. - if constexpr (prop.guarantee == forward_progress::concurrent) - retval.MIsCooperative = true; - } - if constexpr (PropertiesT::template has_property< - sub_group_progress_key>()) { - auto prop = Props.template get_property(); - retval.MForwardProgressProperties[1].Guarantee = prop.guarantee; - retval.MForwardProgressProperties[1].ExecScope = - execution_scope::sub_group; - retval.MForwardProgressProperties[1].CoordinationScope = - prop.coordinationScope; - - // Same reasoning as above for work_group applies here. - if constexpr (prop.guarantee == forward_progress::concurrent) - retval.MIsCooperative = true; - } - if constexpr (PropertiesT::template has_property< - work_item_progress_key>()) { - auto prop = Props.template get_property(); - retval.MForwardProgressProperties[2].Guarantee = prop.guarantee; - retval.MForwardProgressProperties[2].ExecScope = - execution_scope::work_item; - retval.MForwardProgressProperties[2].CoordinationScope = - prop.coordinationScope; +template +constexpr KernelLaunchPropertiesTy +processKernelLaunchProperties(PropertiesT Props) { + using namespace sycl::ext::oneapi::experimental; + using namespace sycl::ext::oneapi::experimental::detail; + KernelLaunchPropertiesTy retval; + + // Process Kernel cache configuration property. + { + if constexpr (PropertiesT::template has_property< + sycl::ext::intel::experimental::cache_config_key>()) { + auto Config = Props.template get_property< + sycl::ext::intel::experimental::cache_config_key>(); + if (Config == sycl::ext::intel::experimental::large_slm) { + retval.MCacheConfig = + KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeSLM; + } else if (Config == sycl::ext::intel::experimental::large_data) { + retval.MCacheConfig = + KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeData; } + } else { + std::ignore = Props; } + } - // Process work group scratch memory property. - { - if constexpr (PropertiesT::template has_property< - work_group_scratch_size>()) { - auto WorkGroupMemSize = - Props.template get_property(); - retval.MWorkGroupMemorySize = WorkGroupMemSize.size; - } - } + // Process Kernel cooperative property. + { + if constexpr (PropertiesT::template has_property()) + retval.MIsCooperative = true; + } - // Parse cluster properties. - { - constexpr std::size_t ClusterDim = getClusterDim(); - if constexpr (ClusterDim > 0) { - - auto ClusterSize = - Props.template get_property>() - .get_cluster_size(); - retval.MUsesClusterLaunch = true; - retval.MClusterDims = ClusterDim; - if (ClusterDim == 1) { - retval.MClusterSize[0] = ClusterSize[0]; - } else if (ClusterDim == 2) { - retval.MClusterSize[0] = ClusterSize[0]; - retval.MClusterSize[1] = ClusterSize[1]; - } else if (ClusterDim == 3) { - retval.MClusterSize[0] = ClusterSize[0]; - retval.MClusterSize[1] = ClusterSize[1]; - retval.MClusterSize[2] = ClusterSize[2]; - } else { - assert(ClusterDim <= 3 && - "Only 1D, 2D, and 3D cluster launch is supported."); - } - } + // Process device progress properties. + { + if constexpr (PropertiesT::template has_property< + work_group_progress_key>()) { + auto prop = Props.template get_property(); + retval.MForwardProgressProperties[0].Guarantee = prop.guarantee; + retval.MForwardProgressProperties[0].ExecScope = + execution_scope::work_group; + retval.MForwardProgressProperties[0].CoordinationScope = + prop.coordinationScope; } + if constexpr (PropertiesT::template has_property< + sub_group_progress_key>()) { + auto prop = Props.template get_property(); + retval.MForwardProgressProperties[1].Guarantee = prop.guarantee; + retval.MForwardProgressProperties[1].ExecScope = + execution_scope::sub_group; + retval.MForwardProgressProperties[1].CoordinationScope = + prop.coordinationScope; + } + if constexpr (PropertiesT::template has_property< + work_item_progress_key>()) { + auto prop = Props.template get_property(); + retval.MForwardProgressProperties[2].Guarantee = prop.guarantee; + retval.MForwardProgressProperties[2].ExecScope = + execution_scope::work_item; + retval.MForwardProgressProperties[2].CoordinationScope = + prop.coordinationScope; + } + } - return retval; + // Process work group scratch memory property. + { + if constexpr (PropertiesT::template has_property< + work_group_scratch_size>()) { + auto WorkGroupMemSize = + Props.template get_property(); + retval.MWorkGroupMemorySize = WorkGroupMemSize.size; + } } - /// Process kernel properties. - /// Note: it is important that this function *does not* depend on kernel - /// name or kernel type, because then it will be instantiated for every - /// kernel, even though body of those instantiated functions could be almost - /// the same, thus unnecessary increasing compilation time. - template < - bool IsESIMDKernel, - typename PropertiesT = ext::oneapi::experimental::empty_properties_t> - static KernelLaunchPropertiesT processKernelProperties(PropertiesT Props) { - static_assert( - ext::oneapi::experimental::is_property_list::value, - "Template type is not a property list."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() || - (PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() && - IsESIMDKernel), - "Floating point control property is supported for ESIMD kernels only."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::oneapi::experimental::indirectly_callable_key>(), - "indirectly_callable property cannot be applied to SYCL kernels"); + // Parse cluster properties. + { + constexpr std::size_t ClusterDim = getClusterDim(); + if constexpr (ClusterDim > 0) { + static_assert(ClusterDim <= 3, + "Only 1D, 2D, and 3D cluster launch is supported."); + + auto ClusterSize = + Props.template get_property>() + .get_cluster_size(); - return processKernelLaunchProperties(Props); + retval.MUsesClusterLaunch = true; + retval.MClusterDims = ClusterDim; + + for (size_t dim = 0; dim < ClusterDim; dim++) + retval.MClusterSize[dim] = ClusterSize[dim]; + } } - // Returns KernelLaunchPropertiesT or std::nullopt based on whether the - // kernel functor has a get method that returns properties. - template - static std::optional - parseProperties([[maybe_unused]] const KernelType &KernelFunc) { + return retval; +} + +/// Note: it is important that this function *does not* depend on kernel +/// name or kernel type, because then it will be instantiated for every +/// kernel, even though body of those instantiated functions could be almost +/// the same, thus unnecessary increasing compilation time. +template +constexpr KernelLaunchPropertiesTy processKernelProperties(PropertiesT Props) { + static_assert(ext::oneapi::experimental::is_property_list::value, + "Template type is not a property list."); + static_assert( + !PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() || + (PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() && + IsESIMDKernel), + "Floating point control property is supported for ESIMD kernels only."); + static_assert( + !PropertiesT::template has_property< + sycl::ext::oneapi::experimental::indirectly_callable_key>(), + "indirectly_callable property cannot be applied to SYCL kernels"); + + return processKernelLaunchProperties(Props); +} + +// Returns KernelLaunchPropertiesTy or std::nullopt based on whether the +// kernel functor has a get method that returns properties. +template +constexpr std::optional +parseProperties([[maybe_unused]] const KernelType &KernelFunc) { #ifndef __SYCL_DEVICE_ONLY__ - // If there are properties provided by get method then process them. - if constexpr (ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod::value) { + // If there are properties provided by get method then process them. + if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< + const KernelType &>::value) { - return processKernelProperties( - KernelFunc.get(ext::oneapi::experimental::properties_tag{})); - } -#endif - // If there are no properties provided by get method then return empty - // optional. - return std::nullopt; + return processKernelProperties( + KernelFunc.get(ext::oneapi::experimental::properties_tag{})); } -}; // KernelLaunchPropertyWrapper struct +#endif + // If there are no properties provided by get method then return empty + // optional. + return std::nullopt; +} +} // namespace kernel_launch_properties_v1 } // namespace detail } // namespace _V1 diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 49aa3a252682..9aaf817d9997 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -299,11 +299,11 @@ void nd_launch(queue Q, launch_config, Properties> Config, ext::oneapi::experimental::detail::LaunchConfigAccess, Properties> - ConfigAccess(Config); + LaunchConfigAccess(Config); - detail::submit_kernel_direct(std::move(Q), - ConfigAccess.getRange(), KernelObj, - Config.getProperties()); + detail::submit_kernel_direct( + std::move(Q), LaunchConfigAccess.getRange(), KernelObj, + LaunchConfigAccess.getProperties()); } else { submit(std::move(Q), [&](handler &CGH) { nd_launch(CGH, Config, KernelObj, diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 78d58c80d6b4..30629f0c5589 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -902,9 +902,8 @@ class __SYCL_EXPORT handler { /// Stores information about kernel properties into the handler. template void processLaunchProperties(PropertiesT Props) { - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = - detail::KernelLaunchPropertyWrapper::processKernelLaunchProperties< - PropertiesT>(Props); + detail::KernelLaunchPropertiesTy ParsedProp = + detail::processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); } @@ -920,9 +919,8 @@ class __SYCL_EXPORT handler { bool IsESIMDKernel, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> void processProperties(PropertiesT Props) { - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = - detail::KernelLaunchPropertyWrapper::processKernelProperties< - IsESIMDKernel>(Props); + detail::KernelLaunchPropertiesTy ParsedProp = + detail::processKernelProperties(Props); setKernelLaunchProperties(ParsedProp); } #endif // INTEL_PREVIEW_BREAKING_CHANGES @@ -1233,8 +1231,7 @@ class __SYCL_EXPORT handler { decltype(Wrapper), TransformedArgType, PropertiesT>::wrap(Wrapper); - if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties< - KName, Info.IsESIMD>(Wrapper)) { + if (auto prop = detail::parseProperties(Wrapper)) { setKernelLaunchProperties(*prop); } @@ -1261,16 +1258,14 @@ class __SYCL_EXPORT handler { // kernel is generated detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties< - NameT, Info.IsESIMD>(KernelFunc)) { + if (auto prop = + detail::parseProperties(KernelFunc)) { setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal(Info.Name); - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT - ProcessedProps = - detail::KernelLaunchPropertyWrapper::processKernelProperties< - Info.IsESIMD, PropertiesT>(Props); + detail::KernelLaunchPropertiesTy ProcessedProps = + detail::processKernelProperties(Props); setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); @@ -1300,9 +1295,8 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = - detail::KernelLaunchPropertyWrapper::processKernelLaunchProperties< - PropertiesT>(Props); + detail::KernelLaunchPropertiesTy ParsedProp = + detail::processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif @@ -1326,9 +1320,8 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT ParsedProp = - detail::KernelLaunchPropertyWrapper::processKernelLaunchProperties< - PropertiesT>(Props); + detail::KernelLaunchPropertiesTy ParsedProp = + detail::processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif @@ -1349,8 +1342,7 @@ class __SYCL_EXPORT handler { constexpr auto Info = detail::CompileTimeKernelInfo; detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties< - NameT, Info.IsESIMD>(KernelFunc)) { + if (auto prop = detail::parseProperties(KernelFunc)) { setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ @@ -1369,10 +1361,8 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT - ProcessedProps = - detail::KernelLaunchPropertyWrapper::processKernelProperties< - Info.IsESIMD, PropertiesT>(Props); + detail::KernelLaunchPropertiesTy ProcessedProps = + detail::processKernelProperties(Props); setKernelLaunchProperties(ProcessedProps); #endif } @@ -1396,8 +1386,7 @@ class __SYCL_EXPORT handler { (void)Kernel; detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties( - KernelFunc)) { + if (auto prop = detail::parseProperties(KernelFunc)) { setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ @@ -1426,10 +1415,8 @@ class __SYCL_EXPORT handler { "the kernel name must match the name of the lambda"); } StoreLambda(std::move(KernelFunc)); - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT - ProcessedProps = - detail::KernelLaunchPropertyWrapper::processKernelProperties< - Info.IsESIMD, PropertiesT>(Props); + detail::KernelLaunchPropertiesTy ProcessedProps = + detail::processKernelProperties(Props); setKernelLaunchProperties(ProcessedProps); #endif } @@ -3478,8 +3465,7 @@ class __SYCL_EXPORT handler { #endif void setKernelLaunchProperties( - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT - &KernelLaunchProperties); + const detail::KernelLaunchPropertiesTy &KernelLaunchProperties); // Various checks that are only meaningful for host compilation, because they // result in runtime errors (i.e. exceptions being thrown). To save time diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 24edef878a3b..ded855a70416 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -68,7 +68,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -76,7 +76,7 @@ void __SYCL_EXPORT submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); namespace detail { @@ -200,22 +200,18 @@ auto submit_kernel_direct( // Assumption: If user specify properties via launch_config or explicitly // then we don't check for properties specified via get() method. - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT parsedProps; + KernelLaunchPropertiesTy parsedProps; if constexpr (std::is_same_v) { // Use properties passed via. get() method. if constexpr (ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod::value) { auto prop = KernelFunc.get(ext::oneapi::experimental::properties_tag{}); - parsedProps = - detail::KernelLaunchPropertyWrapper::processKernelProperties( - prop); + parsedProps = detail::processKernelProperties(prop); } } else { // Use ExtraProps - parsedProps = - detail::KernelLaunchPropertyWrapper::processKernelProperties( - ExtraProps); + parsedProps = detail::processKernelProperties(ExtraProps); } // Instantiating the kernel on the host improves debugging. diff --git a/sycl/source/detail/kernel_data.hpp b/sycl/source/detail/kernel_data.hpp index d73229a909fa..f78845e22305 100644 --- a/sycl/source/detail/kernel_data.hpp +++ b/sycl/source/detail/kernel_data.hpp @@ -168,49 +168,70 @@ class KernelData { } void validateAndSetKernelLaunchProperties( - const KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Kprop, - bool HasGraph, const device_impl &dev) { + const detail::KernelLaunchPropertiesTy &Kprop, bool HasGraph, + const device_impl &dev) { + using execScope = ext::oneapi::experimental::execution_scope; // Validate properties before setting. - { - if (HasGraph) { - if (Kprop.MWorkGroupMemorySize) { - throw sycl::exception( - sycl::make_error_code(errc::invalid), - "Setting work group scratch memory size is not yet supported " - "for use with the SYCL Graph extension."); - } + if (HasGraph) { + if (Kprop.MWorkGroupMemorySize) { + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "Setting work group scratch memory size is not yet supported " + "for use with the SYCL Graph extension."); + } - if (Kprop.MUsesClusterLaunch && *Kprop.MUsesClusterLaunch) { - throw sycl::exception(sycl::make_error_code(errc::invalid), - "Cluster launch is not yet supported " - "for use with the SYCL Graph extension."); - } + if (Kprop.MUsesClusterLaunch && *Kprop.MUsesClusterLaunch) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Cluster launch is not yet supported " + "for use with the SYCL Graph extension."); } + } - for (int i = 0; i < 3; i++) { - if (Kprop.MForwardProgressProperties[i].Guarantee.has_value()) { - - if (!dev.supportsForwardProgress( - *Kprop.MForwardProgressProperties[i].Guarantee, - *Kprop.MForwardProgressProperties[i].ExecScope, - *Kprop.MForwardProgressProperties[i].CoordinationScope)) { - // TODO: Make the error message more descriptive. - throw sycl::exception( - sycl::make_error_code(errc::feature_not_supported), - "The device associated with the queue does not support the " - "requested forward progress guarantee."); - } + // Validate and set forward progress guarantees. + for (int i = 0; i < 3; i++) { + if (Kprop.MForwardProgressProperties[i].Guarantee.has_value()) { + + if (!dev.supportsForwardProgress( + *Kprop.MForwardProgressProperties[i].Guarantee, + *Kprop.MForwardProgressProperties[i].ExecScope, + *Kprop.MForwardProgressProperties[i].CoordinationScope)) { + throw sycl::exception( + sycl::make_error_code(errc::feature_not_supported), + "The device associated with the queue does not support the " + "requested forward progress guarantee."); + } + + auto execScope = *Kprop.MForwardProgressProperties[i].ExecScope; + // If we are here, the device supports the guarantee required but there + // is a caveat in that if the guarantee required is a concurrent + // guarantee, then we most likely also need to enable cooperative launch + // of the kernel. That is, although the device supports the required + // guarantee, some setup work is needed to truly make the device provide + // that guarantee at runtime. Otherwise, we will get the default + // guarantee which is weaker than concurrent. Same reasoning applies for + // sub_group but not for work_item. + // TODO: Further design work is probably needed to reflect this behavior + // in Unified Runtime. + if ((execScope == execScope::work_group || + execScope == execScope::sub_group) && + (*Kprop.MForwardProgressProperties[i].Guarantee == + ext::oneapi::experimental::forward_progress_guarantee:: + concurrent)) { + setCooperative(true); } } } - // Set properties. if (Kprop.MIsCooperative) setCooperative(*Kprop.MIsCooperative); - if (Kprop.MCacheConfig) - setKernelCacheConfig(*Kprop.MCacheConfig); + if (Kprop.MCacheConfig) { + // KernelLaunchPropertiesTy::StableKernelCacheConfig is modeled after + // ur_kernel_cache_config_t, so this cast is safe. + setKernelCacheConfig( + static_cast(*Kprop.MCacheConfig)); + } if (Kprop.MWorkGroupMemorySize) setKernelWorkGroupMemorySize(*Kprop.MWorkGroupMemorySize); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index c1e0452796fb..3c970e2d57c5 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -567,7 +567,7 @@ EventImplPtr queue_impl::submit_command_to_graph( EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { KernelData KData; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 4528752ba95f..f57297d9f3b3 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -363,7 +363,7 @@ class queue_impl : public std::enable_shared_from_this { event submit_kernel_direct_with_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::EventImplPtr EventImpl = submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, @@ -375,7 +375,7 @@ class queue_impl : public std::enable_shared_from_this { void submit_kernel_direct_without_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, false, Props, CodeLoc, IsTopCodeLoc); @@ -931,7 +931,7 @@ class queue_impl : public std::enable_shared_from_this { EventImplPtr submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 619f8b121aa6..07ec95f8ed39 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1914,7 +1914,7 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest, } void handler::setKernelLaunchProperties( - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Kprop) { + const detail::KernelLaunchPropertiesTy &Kprop) { impl->MKernelData.validateAndSetKernelLaunchProperties( Kprop, getCommandGraph() != nullptr /*hasGraph?*/, impl->get_device() /*device_impl*/); diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 73a53a01abda..c97798b80bdb 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -476,7 +476,7 @@ event submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); @@ -486,21 +486,21 @@ template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -508,7 +508,7 @@ void submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); @@ -518,21 +518,21 @@ template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &Props, + const detail::KernelLaunchPropertiesTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); } // namespace _V1 From 19d81cac3d557db2901b62d0ec96ec652f6eedb1 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 27 Oct 2025 19:13:55 +0100 Subject: [PATCH 05/10] Refactor property parsing via template --- .../sycl/detail/kernel_launch_helper.hpp | 199 +++++++++--------- .../experimental/cluster_group_prop.hpp | 2 +- sycl/include/sycl/handler.hpp | 16 +- sycl/include/sycl/queue.hpp | 6 +- sycl/source/detail/kernel_data.hpp | 148 +++++++++---- sycl/source/detail/queue_impl.cpp | 2 +- sycl/source/detail/queue_impl.hpp | 6 +- sycl/source/handler.cpp | 2 +- sycl/source/queue.cpp | 16 +- 9 files changed, 224 insertions(+), 173 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 05b76bd0e1b5..cb8aef61b4cc 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -260,17 +260,49 @@ struct KernelWrapper< // properties. inline namespace kernel_launch_properties_v1 { -// This struct is used to store kernel launch properties. -// std::optional is used to indicate that the property is not set. -// This struct is used to pass kernel launch properties across the ABI -// boundary. -struct KernelLaunchPropertiesTy { - // Modeled after ur_kernel_cache_config_t - enum class StableKernelCacheConfig : int32_t { - Default = 0, - LargeSLM = 1, - LargeData = 2 - }; +template struct MarshalledProperty; + +// Generic implementation for runtime properties. +template +struct MarshalledProperty>> { + std::optional property; + + template + MarshalledProperty([[maybe_unused]] const InputPropertyTy &InputProperties) { + if constexpr (ext::oneapi::experimental::is_property_list_v< + InputPropertyTy>) + if constexpr (InputPropertyTy::template has_property()) { + std::cout << "Got property: " << typeid(PropertyTy).name() << "\n"; + property = InputProperties.template get_property(); + } + } + + MarshalledProperty() = default; +}; + +// Specialization for use_root_sync_key property. +template <> +struct MarshalledProperty { + + bool isRootSyncPropPresent = false; + + template + MarshalledProperty([[maybe_unused]] const InputPropertyTy &Props) { + using namespace sycl::ext::oneapi::experimental; + if constexpr (ext::oneapi::experimental::is_property_list_v< + InputPropertyTy>) + if constexpr (InputPropertyTy::template has_property()) + isRootSyncPropPresent = true; + } + + MarshalledProperty() = default; +}; + +// Specialization for work group progress property. +template <> +struct MarshalledProperty< + sycl::ext::oneapi::experimental::work_group_progress_key> { struct ScopeForwardProgressProperty { std::optional @@ -280,110 +312,69 @@ struct KernelLaunchPropertiesTy { CoordinationScope; }; - std::optional MCacheConfig = std::nullopt; - std::optional MIsCooperative = std::nullopt; - std::optional MWorkGroupMemorySize = std::nullopt; - std::optional MUsesClusterLaunch = std::nullopt; - size_t MClusterDims = 0; - std::array MClusterSize = {0, 0, 0}; - // Forward progress guarantee properties for work_item, sub_group and // work_group scopes. We need to store them for validation later. std::array MForwardProgressProperties; -}; -template -constexpr KernelLaunchPropertiesTy -processKernelLaunchProperties(PropertiesT Props) { - using namespace sycl::ext::oneapi::experimental; - using namespace sycl::ext::oneapi::experimental::detail; - KernelLaunchPropertiesTy retval; - - // Process Kernel cache configuration property. - { - if constexpr (PropertiesT::template has_property< - sycl::ext::intel::experimental::cache_config_key>()) { - auto Config = Props.template get_property< - sycl::ext::intel::experimental::cache_config_key>(); - if (Config == sycl::ext::intel::experimental::large_slm) { - retval.MCacheConfig = - KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeSLM; - } else if (Config == sycl::ext::intel::experimental::large_data) { - retval.MCacheConfig = - KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeData; + template >> + MarshalledProperty([[maybe_unused]] const InputPropertyTy &Props) { + using namespace sycl::ext::oneapi::experimental; + + if constexpr (ext::oneapi::experimental::is_property_list_v< + InputPropertyTy>) { + if constexpr (InputPropertyTy::template has_property< + work_group_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[0].Guarantee = prop.guarantee; + MForwardProgressProperties[0].ExecScope = execution_scope::work_group; + MForwardProgressProperties[0].CoordinationScope = + prop.coordinationScope; + } + if constexpr (InputPropertyTy::template has_property< + sub_group_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[1].Guarantee = prop.guarantee; + MForwardProgressProperties[1].ExecScope = execution_scope::sub_group; + MForwardProgressProperties[1].CoordinationScope = + prop.coordinationScope; + } + if constexpr (InputPropertyTy::template has_property< + work_item_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[2].Guarantee = prop.guarantee; + MForwardProgressProperties[2].ExecScope = execution_scope::work_item; + MForwardProgressProperties[2].CoordinationScope = + prop.coordinationScope; } - } else { - std::ignore = Props; - } - } - - // Process Kernel cooperative property. - { - if constexpr (PropertiesT::template has_property()) - retval.MIsCooperative = true; - } - - // Process device progress properties. - { - if constexpr (PropertiesT::template has_property< - work_group_progress_key>()) { - auto prop = Props.template get_property(); - retval.MForwardProgressProperties[0].Guarantee = prop.guarantee; - retval.MForwardProgressProperties[0].ExecScope = - execution_scope::work_group; - retval.MForwardProgressProperties[0].CoordinationScope = - prop.coordinationScope; - } - if constexpr (PropertiesT::template has_property< - sub_group_progress_key>()) { - auto prop = Props.template get_property(); - retval.MForwardProgressProperties[1].Guarantee = prop.guarantee; - retval.MForwardProgressProperties[1].ExecScope = - execution_scope::sub_group; - retval.MForwardProgressProperties[1].CoordinationScope = - prop.coordinationScope; - } - if constexpr (PropertiesT::template has_property< - work_item_progress_key>()) { - auto prop = Props.template get_property(); - retval.MForwardProgressProperties[2].Guarantee = prop.guarantee; - retval.MForwardProgressProperties[2].ExecScope = - execution_scope::work_item; - retval.MForwardProgressProperties[2].CoordinationScope = - prop.coordinationScope; } } - // Process work group scratch memory property. - { - if constexpr (PropertiesT::template has_property< - work_group_scratch_size>()) { - auto WorkGroupMemSize = - Props.template get_property(); - retval.MWorkGroupMemorySize = WorkGroupMemSize.size; - } - } + MarshalledProperty() = default; +}; - // Parse cluster properties. - { - constexpr std::size_t ClusterDim = getClusterDim(); - if constexpr (ClusterDim > 0) { - static_assert(ClusterDim <= 3, - "Only 1D, 2D, and 3D cluster launch is supported."); +template struct PropsHolder : MarshalledProperty... { - auto ClusterSize = - Props.template get_property>() - .get_cluster_size(); + template + PropsHolder(PropertiesT Props) : MarshalledProperty(Props)... {} - retval.MUsesClusterLaunch = true; - retval.MClusterDims = ClusterDim; + PropsHolder() = default; +}; - for (size_t dim = 0; dim < ClusterDim; dim++) - retval.MClusterSize[dim] = ClusterSize[dim]; - } - } +using KernelPropertyHolderStructTy = + PropsHolder, + sycl::ext::oneapi::experimental::cuda::cluster_size_key<2>, + sycl::ext::oneapi::experimental::cuda::cluster_size_key<3>>; - return retval; +template +constexpr auto processKernelLaunchProperties(PropertiesT Props) { + KernelPropertyHolderStructTy prop(Props); + return prop; } /// Note: it is important that this function *does not* depend on kernel @@ -392,7 +383,7 @@ processKernelLaunchProperties(PropertiesT Props) { /// the same, thus unnecessary increasing compilation time. template -constexpr KernelLaunchPropertiesTy processKernelProperties(PropertiesT Props) { +constexpr auto processKernelProperties(PropertiesT Props) { static_assert(ext::oneapi::experimental::is_property_list::value, "Template type is not a property list."); static_assert( @@ -413,7 +404,7 @@ constexpr KernelLaunchPropertiesTy processKernelProperties(PropertiesT Props) { // Returns KernelLaunchPropertiesTy or std::nullopt based on whether the // kernel functor has a get method that returns properties. template -constexpr std::optional +constexpr std::optional parseProperties([[maybe_unused]] const KernelType &KernelFunc) { #ifndef __SYCL_DEVICE_ONLY__ // If there are properties provided by get method then process them. diff --git a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp index e7eae5563662..9e0d84afb660 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp @@ -22,7 +22,7 @@ struct cluster_size cluster_size, ::sycl::ext::oneapi::experimental::detail::ClusterLaunch> { cluster_size(const range &size) : size(size) {} - sycl::range get_cluster_size() { return size; } + sycl::range get_cluster_size() const { return size; } private: range size; diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 30629f0c5589..ca9a9a8dc301 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -902,7 +902,7 @@ class __SYCL_EXPORT handler { /// Stores information about kernel properties into the handler. template void processLaunchProperties(PropertiesT Props) { - detail::KernelLaunchPropertiesTy ParsedProp = + detail::KernelPropertyHolderStructTy ParsedProp = detail::processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); } @@ -919,7 +919,7 @@ class __SYCL_EXPORT handler { bool IsESIMDKernel, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> void processProperties(PropertiesT Props) { - detail::KernelLaunchPropertiesTy ParsedProp = + detail::KernelPropertyHolderStructTy ParsedProp = detail::processKernelProperties(Props); setKernelLaunchProperties(ParsedProp); } @@ -1264,7 +1264,7 @@ class __SYCL_EXPORT handler { } #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal(Info.Name); - detail::KernelLaunchPropertiesTy ProcessedProps = + detail::KernelPropertyHolderStructTy ProcessedProps = detail::processKernelProperties(Props); setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); @@ -1295,7 +1295,7 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); - detail::KernelLaunchPropertiesTy ParsedProp = + detail::KernelPropertyHolderStructTy ParsedProp = detail::processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); @@ -1320,7 +1320,7 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); - detail::KernelLaunchPropertiesTy ParsedProp = + detail::KernelPropertyHolderStructTy ParsedProp = detail::processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); @@ -1361,7 +1361,7 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - detail::KernelLaunchPropertiesTy ProcessedProps = + detail::KernelPropertyHolderStructTy ProcessedProps = detail::processKernelProperties(Props); setKernelLaunchProperties(ProcessedProps); #endif @@ -1415,7 +1415,7 @@ class __SYCL_EXPORT handler { "the kernel name must match the name of the lambda"); } StoreLambda(std::move(KernelFunc)); - detail::KernelLaunchPropertiesTy ProcessedProps = + detail::KernelPropertyHolderStructTy ProcessedProps = detail::processKernelProperties(Props); setKernelLaunchProperties(ProcessedProps); #endif @@ -3465,7 +3465,7 @@ class __SYCL_EXPORT handler { #endif void setKernelLaunchProperties( - const detail::KernelLaunchPropertiesTy &KernelLaunchProperties); + const detail::KernelPropertyHolderStructTy &KernelLaunchProperties); // Various checks that are only meaningful for host compilation, because they // result in runtime errors (i.e. exceptions being thrown). To save time diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index ded855a70416..36d504d42f4d 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -68,7 +68,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -76,7 +76,7 @@ void __SYCL_EXPORT submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); namespace detail { @@ -200,7 +200,7 @@ auto submit_kernel_direct( // Assumption: If user specify properties via launch_config or explicitly // then we don't check for properties specified via get() method. - KernelLaunchPropertiesTy parsedProps; + KernelPropertyHolderStructTy parsedProps; if constexpr (std::is_same_v) { // Use properties passed via. get() method. diff --git a/sycl/source/detail/kernel_data.hpp b/sycl/source/detail/kernel_data.hpp index f78845e22305..1866b53c94f8 100644 --- a/sycl/source/detail/kernel_data.hpp +++ b/sycl/source/detail/kernel_data.hpp @@ -29,6 +29,11 @@ class KernelData { using DynamicParametersVecT = std::vector>; using ArgsVecT = std::vector; + template + using MarshalledProperty = detail::MarshalledProperty; + template + using cluster_size_key = + sycl::ext::oneapi::experimental::cuda::cluster_size_key; KernelData() = default; ~KernelData() = default; @@ -167,21 +172,84 @@ class KernelData { MKernelWorkGroupMemorySize = Size; } + void parseAndSetCacheConfigProperty( + const sycl::ext::intel::experimental::cache_config_key &prop) { + using namespace sycl::ext::intel::experimental; + + ur_kernel_cache_config_t CacheConfig = + ur_kernel_cache_config_t::UR_KERNEL_CACHE_CONFIG_DEFAULT; + if (prop == large_slm) { + CacheConfig = ur_kernel_cache_config_t::UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + } else if (prop == large_data) { + CacheConfig = ur_kernel_cache_config_t::UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + } else + assert(false && "unknown cache property type"); + + MKernelCacheConfig = CacheConfig; + } + + template + void parseAndSetClusterDimProperty( + const std::optional> &prop) { + if (prop) { + static_assert(ClusterDims < 4 && ClusterDims > 0, + "Invalid cluster dimensions"); + + auto ClusterSize = prop->get_cluster_size(); + MKernelUsesClusterLaunch = true; + + if constexpr (ClusterDims == 1) + MNDRDesc.setClusterDimensions(sycl::range<1>{ClusterSize[0]}); + else if constexpr (ClusterDims == 2) + MNDRDesc.setClusterDimensions( + sycl::range<2>{ClusterSize[0], ClusterSize[1]}); + else if constexpr (ClusterDims == 3) + MNDRDesc.setClusterDimensions( + sycl::range<3>{ClusterSize[0], ClusterSize[1], ClusterSize[2]}); + } + } + void validateAndSetKernelLaunchProperties( - const detail::KernelLaunchPropertiesTy &Kprop, bool HasGraph, + const detail::KernelPropertyHolderStructTy Kprop, bool HasGraph, const device_impl &dev) { using execScope = ext::oneapi::experimental::execution_scope; - - // Validate properties before setting. + using namespace sycl::ext::oneapi::experimental; + using namespace sycl::ext::oneapi::experimental::detail; + + const auto *WorkGroupMemSizeProp = + static_cast *>( + &Kprop); + const auto *CacheConfigProp = static_cast *>(&Kprop); + const auto *UseRootSyncProp = + static_cast *>(&Kprop); + const auto *ForwardProgressProp = + static_cast *>( + &Kprop); + const auto *ClusterLaunchPropDim1 = + static_cast> *>( + &Kprop); + const auto *ClusterLaunchPropDim2 = + static_cast> *>( + &Kprop); + const auto *ClusterLaunchPropDim3 = + static_cast> *>( + &Kprop); + + const bool isClusterDimPropPresent = ClusterLaunchPropDim1->property || + ClusterLaunchPropDim2->property || + ClusterLaunchPropDim3->property; + + // Early validation for graph-incompatible properties if (HasGraph) { - if (Kprop.MWorkGroupMemorySize) { + if (WorkGroupMemSizeProp->property) { throw sycl::exception( sycl::make_error_code(errc::invalid), "Setting work group scratch memory size is not yet supported " "for use with the SYCL Graph extension."); } - if (Kprop.MUsesClusterLaunch && *Kprop.MUsesClusterLaunch) { + if (isClusterDimPropPresent) { throw sycl::exception(sycl::make_error_code(errc::invalid), "Cluster launch is not yet supported " "for use with the SYCL Graph extension."); @@ -190,62 +258,54 @@ class KernelData { // Validate and set forward progress guarantees. for (int i = 0; i < 3; i++) { - if (Kprop.MForwardProgressProperties[i].Guarantee.has_value()) { + if (ForwardProgressProp->MForwardProgressProperties[i] + .Guarantee.has_value()) { if (!dev.supportsForwardProgress( - *Kprop.MForwardProgressProperties[i].Guarantee, - *Kprop.MForwardProgressProperties[i].ExecScope, - *Kprop.MForwardProgressProperties[i].CoordinationScope)) { + *ForwardProgressProp->MForwardProgressProperties[i].Guarantee, + *ForwardProgressProp->MForwardProgressProperties[i].ExecScope, + *ForwardProgressProp->MForwardProgressProperties[i] + .CoordinationScope)) { throw sycl::exception( sycl::make_error_code(errc::feature_not_supported), "The device associated with the queue does not support the " "requested forward progress guarantee."); } - auto execScope = *Kprop.MForwardProgressProperties[i].ExecScope; - // If we are here, the device supports the guarantee required but there - // is a caveat in that if the guarantee required is a concurrent - // guarantee, then we most likely also need to enable cooperative launch - // of the kernel. That is, although the device supports the required - // guarantee, some setup work is needed to truly make the device provide - // that guarantee at runtime. Otherwise, we will get the default - // guarantee which is weaker than concurrent. Same reasoning applies for - // sub_group but not for work_item. - // TODO: Further design work is probably needed to reflect this behavior - // in Unified Runtime. + auto execScope = + *ForwardProgressProp->MForwardProgressProperties[i].ExecScope; + // If we are here, the device supports the guarantee required but + // there is a caveat in that if the guarantee required is a concurrent + // guarantee, then we most likely also need to enable cooperative + // launch of the kernel. That is, although the device supports the + // required guarantee, some setup work is needed to truly make the + // device provide that guarantee at runtime. Otherwise, we will get + // the default guarantee which is weaker than concurrent. Same + // reasoning applies for sub_group but not for work_item. + // TODO: Further design work is probably needed to reflect this + // behavior in Unified Runtime. if ((execScope == execScope::work_group || execScope == execScope::sub_group) && - (*Kprop.MForwardProgressProperties[i].Guarantee == - ext::oneapi::experimental::forward_progress_guarantee:: - concurrent)) { + (*ForwardProgressProp->MForwardProgressProperties[i].Guarantee == + forward_progress_guarantee::concurrent)) { setCooperative(true); } } } - if (Kprop.MIsCooperative) - setCooperative(*Kprop.MIsCooperative); + if (UseRootSyncProp->isRootSyncPropPresent) + setCooperative(true); - if (Kprop.MCacheConfig) { - // KernelLaunchPropertiesTy::StableKernelCacheConfig is modeled after - // ur_kernel_cache_config_t, so this cast is safe. - setKernelCacheConfig( - static_cast(*Kprop.MCacheConfig)); - } + if (CacheConfigProp->property) + parseAndSetCacheConfigProperty(*(CacheConfigProp->property)); + + if (WorkGroupMemSizeProp->property) + setKernelWorkGroupMemorySize((*WorkGroupMemSizeProp->property).size); - if (Kprop.MWorkGroupMemorySize) - setKernelWorkGroupMemorySize(*Kprop.MWorkGroupMemorySize); - - if (Kprop.MUsesClusterLaunch && *Kprop.MUsesClusterLaunch) { - if (Kprop.MClusterDims == 1) - setClusterDimensions(sycl::range<1>{Kprop.MClusterSize[0]}); - else if (Kprop.MClusterDims == 2) - setClusterDimensions( - sycl::range<2>{Kprop.MClusterSize[0], Kprop.MClusterSize[1]}); - else if (Kprop.MClusterDims == 3) - setClusterDimensions(sycl::range<3>{Kprop.MClusterSize[0], - Kprop.MClusterSize[1], - Kprop.MClusterSize[2]}); + if (isClusterDimPropPresent) { + parseAndSetClusterDimProperty(ClusterLaunchPropDim1->property); + parseAndSetClusterDimProperty(ClusterLaunchPropDim2->property); + parseAndSetClusterDimProperty(ClusterLaunchPropDim3->property); } } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 3c970e2d57c5..d62366e14083 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -567,7 +567,7 @@ EventImplPtr queue_impl::submit_command_to_graph( EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { KernelData KData; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index f57297d9f3b3..031b0a01f56b 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -363,7 +363,7 @@ class queue_impl : public std::enable_shared_from_this { event submit_kernel_direct_with_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::EventImplPtr EventImpl = submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, @@ -375,7 +375,7 @@ class queue_impl : public std::enable_shared_from_this { void submit_kernel_direct_without_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, false, Props, CodeLoc, IsTopCodeLoc); @@ -931,7 +931,7 @@ class queue_impl : public std::enable_shared_from_this { EventImplPtr submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 07ec95f8ed39..e9f139126c4c 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1914,7 +1914,7 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest, } void handler::setKernelLaunchProperties( - const detail::KernelLaunchPropertiesTy &Kprop) { + const detail::KernelPropertyHolderStructTy &Kprop) { impl->MKernelData.validateAndSetKernelLaunchProperties( Kprop, getCommandGraph() != nullptr /*hasGraph?*/, impl->get_device() /*device_impl*/); diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index c97798b80bdb..7fe5649aecc2 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -476,7 +476,7 @@ event submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); @@ -486,21 +486,21 @@ template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -508,7 +508,7 @@ void submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); @@ -518,21 +518,21 @@ template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::KernelLaunchPropertiesTy &Props, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); } // namespace _V1 From c29b6c7fd6d1c08feeacc586f86224e8f9f7012b Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 27 Oct 2025 19:31:58 +0100 Subject: [PATCH 06/10] Update ABI and include_deps test --- sycl/test/abi/sycl_symbols_linux.dump | 13 ++++---- .../sycl_khr_includes_context.hpp.cpp | 2 +- .../sycl_khr_includes_functional.hpp.cpp | 1 - .../sycl_khr_includes_handler.hpp.cpp | 30 +++++++++---------- .../sycl_khr_includes_kernel_bundle.hpp.cpp | 30 +++++++++---------- .../sycl_khr_includes_math.hpp.cpp | 2 +- .../sycl_khr_includes_queue.hpp.cpp | 4 +-- .../sycl_khr_includes_reduction.hpp.cpp | 30 +++++++++---------- .../sycl_khr_includes_stream.hpp.cpp | 30 +++++++++---------- .../sycl_khr_includes_usm.hpp.cpp | 6 ++-- .../sycl_khr_includes_vec.hpp.cpp | 2 +- 11 files changed, 75 insertions(+), 75 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 032b82ae7429..83292f20db59 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,12 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_4cuda12cluster_sizeILi1EEENSR_ILi2EEENSR_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_4cuda12cluster_sizeILi1EEENSR_ILi2EEENSR_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_4cuda12cluster_sizeILi1EEENSR_ILi2EEENSR_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_4cuda12cluster_sizeILi1EEENSQ_ILi2EEENSQ_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_4cuda12cluster_sizeILi1EEENSQ_ILi2EEENSQ_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_4cuda12cluster_sizeILi1EEENSQ_ILi2EEENSQ_ILi3EEEEEERKNS9_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv @@ -3618,6 +3618,7 @@ _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb +_ZN4sycl3_V17handler25setKernelLaunchPropertiesERKNS0_6detail27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENS5_5intel12experimental12cache_configENS7_17use_root_sync_keyENS7_23work_group_progress_keyENS7_4cuda12cluster_sizeILi1EEENSF_ILi2EEENSF_ILi3EEEEEE _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi diff --git a/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp index ddaf62e33ae8..3608d2902d69 100644 --- a/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp @@ -10,8 +10,8 @@ // CHECK-NEXT: feature_test.hpp // CHECK-NEXT: context.hpp // CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: backend_types.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: detail/export.hpp // CHECK-NEXT: detail/info_desc_helpers.hpp // CHECK-NEXT: aspects.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp index 692e49c927e5..8b19443974f6 100644 --- a/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp @@ -9,5 +9,4 @@ // CHECK-NEXT: detail/defines_elementary.hpp // CHECK-NEXT: feature_test.hpp // CHECK-NEXT: functional.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp index 9d5f05f24d95..cb34a26c6ef3 100644 --- a/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp @@ -116,10 +116,23 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/properties/property_utils.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def +// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp +// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp // CHECK-NEXT: detail/kernel_name_str_t.hpp @@ -136,20 +149,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp // CHECK-NEXT: ext/oneapi/device_global/device_global.hpp // CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp index 3c05eb671525..23e72bf910ad 100644 --- a/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp @@ -124,21 +124,9 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/kernel_name_str_t.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp // CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp +// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp @@ -150,8 +138,20 @@ // CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp // CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp +// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp +// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp +// CHECK-NEXT: detail/kernel_name_str_t.hpp +// CHECK-NEXT: detail/reduction_forward.hpp +// CHECK-NEXT: event.hpp +// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp +// CHECK-NEXT: ext/oneapi/interop_common.hpp +// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp +// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp +// CHECK-NEXT: ext/oneapi/device_global/properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: sycl_span.hpp // CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp index ea5a9c8b7f04..8dcf562a657e 100644 --- a/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp @@ -34,11 +34,11 @@ // CHECK-NEXT: vector.hpp // CHECK-NEXT: detail/named_swizzles_mixin.hpp // CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: detail/fwd/accessor.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: marray.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/address_space_cast.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp index a37e63c0cc7d..7a73642eda28 100644 --- a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp @@ -142,6 +142,8 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp @@ -153,9 +155,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp index d233d264267a..defd42a0bfd3 100644 --- a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp @@ -152,21 +152,9 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/kernel_name_str_t.hpp -// CHECK-NEXT: detail/ur.hpp -// CHECK-NEXT: ur_api_funcs.def -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp // CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp +// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp @@ -178,8 +166,20 @@ // CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp // CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp +// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp +// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp +// CHECK-NEXT: detail/kernel_name_str_t.hpp +// CHECK-NEXT: detail/ur.hpp +// CHECK-NEXT: ur_api_funcs.def +// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp +// CHECK-NEXT: ext/oneapi/interop_common.hpp +// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp +// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp +// CHECK-NEXT: ext/oneapi/device_global/properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: queue.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp index 0be5245e5215..ef7d9acea668 100644 --- a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp @@ -135,10 +135,23 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/properties/property_utils.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def +// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp +// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp // CHECK-NEXT: detail/kernel_name_str_t.hpp @@ -155,20 +168,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp // CHECK-NEXT: ext/oneapi/device_global/device_global.hpp // CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index 87de587378f4..c1d74a687701 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -35,11 +35,11 @@ // CHECK-NEXT: vector.hpp // CHECK-NEXT: detail/named_swizzles_mixin.hpp // CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: detail/fwd/accessor.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: marray.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/address_space_cast.hpp @@ -157,6 +157,8 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp @@ -168,9 +170,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: usm/usm_pointer_info.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp index 6de63c741a4c..5baff0436851 100644 --- a/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp @@ -19,7 +19,6 @@ // CHECK-NEXT: detail/type_traits.hpp // CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp // CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: detail/export.hpp // CHECK-NEXT: stl_wrappers/cassert @@ -28,4 +27,5 @@ // CHECK-NEXT: detail/fwd/accessor.hpp // CHECK-NEXT: detail/defines.hpp // CHECK-NEXT: detail/memcpy.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-EMPTY: From bf9401d0fdd59a6b6fee53f345d2c69787e22702 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 27 Oct 2025 21:40:31 +0100 Subject: [PATCH 07/10] clang format --- .../ext/oneapi/experimental/enqueue_functions.hpp | 4 ++-- sycl/include/sycl/khr/free_function_commands.hpp | 15 ++++++--------- sycl/include/sycl/queue.hpp | 7 +++---- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index eae2dcbce91b..673d4c703fe5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -276,8 +276,8 @@ void nd_launch(queue Q, nd_range Range, const KernelType &KernelObj, if constexpr (sizeof...(ReductionsT) == 0 && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { - detail::submit_kernel_direct_parallel_for( - std::move(Q), Range, KernelObj); + detail::submit_kernel_direct_parallel_for(std::move(Q), Range, + KernelObj); } else { submit(std::move(Q), [&](handler &CGH) { nd_launch(CGH, Range, KernelObj, diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index 3c46e4509bc9..04f73dac91ae 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -164,9 +164,8 @@ void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k, const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<1>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, - nd_range<1>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<1>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -182,9 +181,8 @@ void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k, // kernel_handler type argument yet. if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<2>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, - nd_range<2>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<2>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -200,9 +198,8 @@ void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k, // kernel_handler type argument yet. if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<3>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, - nd_range<3>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<3>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 7b0f974041a4..e0c4de7635d6 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -164,8 +164,8 @@ template auto submit_kernel_direct( - const queue &Queue, - const nd_range &Range, KernelTypeUniversalRef &&KernelFunc, + const queue &Queue, const nd_range &Range, + KernelTypeUniversalRef &&KernelFunc, PropertiesT ExtraProps = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { @@ -3385,8 +3385,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, Range, - Rest..., + *this, Range, Rest..., ext::oneapi::experimental::empty_properties_t{}, TlsCodeLocCapture.query()); } else { From 98b640059998eaaaff053fe5f7d314da418987af Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 28 Oct 2025 03:38:50 +0100 Subject: [PATCH 08/10] Address feedback --- .../sycl/detail/kernel_launch_helper.hpp | 90 ++++++++----------- sycl/include/sycl/handler.hpp | 6 +- sycl/source/detail/kernel_data.hpp | 15 ++-- sycl/test/abi/sycl_symbols_windows.dump | 13 +-- 4 files changed, 52 insertions(+), 72 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index cb8aef61b4cc..98ff4e544a1f 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -264,18 +264,16 @@ template struct MarshalledProperty; // Generic implementation for runtime properties. template -struct MarshalledProperty>> { +struct MarshalledProperty< + PropertyTy, + std::enable_if_t && + std::is_same_v>> { std::optional property; template MarshalledProperty([[maybe_unused]] const InputPropertyTy &InputProperties) { - if constexpr (ext::oneapi::experimental::is_property_list_v< - InputPropertyTy>) - if constexpr (InputPropertyTy::template has_property()) { - std::cout << "Got property: " << typeid(PropertyTy).name() << "\n"; - property = InputProperties.template get_property(); - } + if constexpr (InputPropertyTy::template has_property()) + property = InputProperties.template get_property(); } MarshalledProperty() = default; @@ -290,10 +288,8 @@ struct MarshalledProperty { template MarshalledProperty([[maybe_unused]] const InputPropertyTy &Props) { using namespace sycl::ext::oneapi::experimental; - if constexpr (ext::oneapi::experimental::is_property_list_v< - InputPropertyTy>) - if constexpr (InputPropertyTy::template has_property()) - isRootSyncPropPresent = true; + isRootSyncPropPresent = + InputPropertyTy::template has_property(); } MarshalledProperty() = default; @@ -305,16 +301,15 @@ struct MarshalledProperty< sycl::ext::oneapi::experimental::work_group_progress_key> { struct ScopeForwardProgressProperty { - std::optional - Guarantee; - std::optional ExecScope; - std::optional - CoordinationScope; + sycl::ext::oneapi::experimental::forward_progress_guarantee Guarantee; + sycl::ext::oneapi::experimental::execution_scope ExecScope; + sycl::ext::oneapi::experimental::execution_scope CoordinationScope; }; // Forward progress guarantee properties for work_item, sub_group and // work_group scopes. We need to store them for validation later. - std::array MForwardProgressProperties; + std::array, 3> + MForwardProgressProperties; template ) { - if constexpr (InputPropertyTy::template has_property< - work_group_progress_key>()) { - auto prop = Props.template get_property(); - MForwardProgressProperties[0].Guarantee = prop.guarantee; - MForwardProgressProperties[0].ExecScope = execution_scope::work_group; - MForwardProgressProperties[0].CoordinationScope = - prop.coordinationScope; - } - if constexpr (InputPropertyTy::template has_property< - sub_group_progress_key>()) { - auto prop = Props.template get_property(); - MForwardProgressProperties[1].Guarantee = prop.guarantee; - MForwardProgressProperties[1].ExecScope = execution_scope::sub_group; - MForwardProgressProperties[1].CoordinationScope = - prop.coordinationScope; - } - if constexpr (InputPropertyTy::template has_property< - work_item_progress_key>()) { - auto prop = Props.template get_property(); - MForwardProgressProperties[2].Guarantee = prop.guarantee; - MForwardProgressProperties[2].ExecScope = execution_scope::work_item; - MForwardProgressProperties[2].CoordinationScope = - prop.coordinationScope; - } + if constexpr (InputPropertyTy::template has_property< + work_group_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[0] = { + prop.guarantee, execution_scope::work_group, prop.coordinationScope}; + } + if constexpr (InputPropertyTy::template has_property< + sub_group_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[1] = { + prop.guarantee, execution_scope::sub_group, prop.coordinationScope}; + } + if constexpr (InputPropertyTy::template has_property< + work_item_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[2] = { + prop.guarantee, execution_scope::work_item, prop.coordinationScope}; } } @@ -371,21 +357,14 @@ using KernelPropertyHolderStructTy = sycl::ext::oneapi::experimental::cuda::cluster_size_key<2>, sycl::ext::oneapi::experimental::cuda::cluster_size_key<3>>; -template -constexpr auto processKernelLaunchProperties(PropertiesT Props) { - KernelPropertyHolderStructTy prop(Props); - return prop; -} - /// Note: it is important that this function *does not* depend on kernel /// name or kernel type, because then it will be instantiated for every /// kernel, even though body of those instantiated functions could be almost /// the same, thus unnecessary increasing compilation time. -template +template >> constexpr auto processKernelProperties(PropertiesT Props) { - static_assert(ext::oneapi::experimental::is_property_list::value, - "Template type is not a property list."); static_assert( !PropertiesT::template has_property< sycl::ext::intel::experimental::fp_control_key>() || @@ -398,7 +377,8 @@ constexpr auto processKernelProperties(PropertiesT Props) { sycl::ext::oneapi::experimental::indirectly_callable_key>(), "indirectly_callable property cannot be applied to SYCL kernels"); - return processKernelLaunchProperties(Props); + KernelPropertyHolderStructTy prop(Props); + return prop; } // Returns KernelLaunchPropertiesTy or std::nullopt based on whether the diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index ca9a9a8dc301..bf003be86e80 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -903,7 +903,7 @@ class __SYCL_EXPORT handler { template void processLaunchProperties(PropertiesT Props) { detail::KernelPropertyHolderStructTy ParsedProp = - detail::processKernelLaunchProperties(Props); + detail::processKernelProperties(Props); setKernelLaunchProperties(ParsedProp); } @@ -1296,7 +1296,7 @@ class __SYCL_EXPORT handler { detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); detail::KernelPropertyHolderStructTy ParsedProp = - detail::processKernelLaunchProperties(Props); + detail::processKernelProperties(Props); setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif @@ -1321,7 +1321,7 @@ class __SYCL_EXPORT handler { detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); detail::KernelPropertyHolderStructTy ParsedProp = - detail::processKernelLaunchProperties(Props); + detail::processKernelProperties(Props); setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif diff --git a/sycl/source/detail/kernel_data.hpp b/sycl/source/detail/kernel_data.hpp index 1866b53c94f8..9e838aaf1015 100644 --- a/sycl/source/detail/kernel_data.hpp +++ b/sycl/source/detail/kernel_data.hpp @@ -258,14 +258,13 @@ class KernelData { // Validate and set forward progress guarantees. for (int i = 0; i < 3; i++) { - if (ForwardProgressProp->MForwardProgressProperties[i] - .Guarantee.has_value()) { + if (ForwardProgressProp->MForwardProgressProperties[i].has_value()) { if (!dev.supportsForwardProgress( - *ForwardProgressProp->MForwardProgressProperties[i].Guarantee, - *ForwardProgressProp->MForwardProgressProperties[i].ExecScope, - *ForwardProgressProp->MForwardProgressProperties[i] - .CoordinationScope)) { + ForwardProgressProp->MForwardProgressProperties[i]->Guarantee, + ForwardProgressProp->MForwardProgressProperties[i]->ExecScope, + ForwardProgressProp->MForwardProgressProperties[i] + ->CoordinationScope)) { throw sycl::exception( sycl::make_error_code(errc::feature_not_supported), "The device associated with the queue does not support the " @@ -273,7 +272,7 @@ class KernelData { } auto execScope = - *ForwardProgressProp->MForwardProgressProperties[i].ExecScope; + ForwardProgressProp->MForwardProgressProperties[i]->ExecScope; // If we are here, the device supports the guarantee required but // there is a caveat in that if the guarantee required is a concurrent // guarantee, then we most likely also need to enable cooperative @@ -286,7 +285,7 @@ class KernelData { // behavior in Unified Runtime. if ((execScope == execScope::work_group || execScope == execScope::sub_group) && - (*ForwardProgressProp->MForwardProgressProperties[i].Guarantee == + (ForwardProgressProp->MForwardProgressProperties[i]->Guarantee == forward_progress_guarantee::concurrent)) { setCooperative(true); } diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d5f53a5bbb50..e11cca8be25c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,12 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z @@ -4428,6 +4428,7 @@ ?setKernelFunc@handler@_V1@sycl@@AEAAXPEAX@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z +?setKernelLaunchProperties@handler@_V1@sycl@@AEAAXAEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@detail@23@@Z ?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z From b043a2d61a1a11fcb0314a91bf29583c1271919d Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 28 Oct 2025 03:44:49 +0100 Subject: [PATCH 09/10] Fix test failures --- sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp | 2 +- sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp | 1 + sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp | 2 +- sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp | 2 +- sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp index 3608d2902d69..ddaf62e33ae8 100644 --- a/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_context.hpp.cpp @@ -10,8 +10,8 @@ // CHECK-NEXT: feature_test.hpp // CHECK-NEXT: context.hpp // CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: backend_types.hpp // CHECK-NEXT: stl_wrappers/cstdlib +// CHECK-NEXT: backend_types.hpp // CHECK-NEXT: detail/export.hpp // CHECK-NEXT: detail/info_desc_helpers.hpp // CHECK-NEXT: aspects.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp index 8b19443974f6..692e49c927e5 100644 --- a/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_functional.hpp.cpp @@ -9,4 +9,5 @@ // CHECK-NEXT: detail/defines_elementary.hpp // CHECK-NEXT: feature_test.hpp // CHECK-NEXT: functional.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp index 8dcf562a657e..ea5a9c8b7f04 100644 --- a/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_math.hpp.cpp @@ -34,11 +34,11 @@ // CHECK-NEXT: vector.hpp // CHECK-NEXT: detail/named_swizzles_mixin.hpp // CHECK-NEXT: detail/vector_arith.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: marray.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/address_space_cast.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index c1d74a687701..99f87aee43e6 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -35,11 +35,11 @@ // CHECK-NEXT: vector.hpp // CHECK-NEXT: detail/named_swizzles_mixin.hpp // CHECK-NEXT: detail/vector_arith.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: marray.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/address_space_cast.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp index 5baff0436851..6de63c741a4c 100644 --- a/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_vec.hpp.cpp @@ -19,6 +19,7 @@ // CHECK-NEXT: detail/type_traits.hpp // CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp // CHECK-NEXT: detail/fwd/multi_ptr.hpp +// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: detail/export.hpp // CHECK-NEXT: stl_wrappers/cassert @@ -27,5 +28,4 @@ // CHECK-NEXT: detail/fwd/accessor.hpp // CHECK-NEXT: detail/defines.hpp // CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: stl_wrappers/cstdlib // CHECK-EMPTY: From 0a3b4e6557c4d7fc395e861f01a7235f66ed22cd Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 28 Oct 2025 05:52:59 +0100 Subject: [PATCH 10/10] Fix RHEL build --- sycl/include/sycl/detail/kernel_launch_helper.hpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 98ff4e544a1f..af26b611c3b6 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -271,9 +271,10 @@ struct MarshalledProperty< std::optional property; template - MarshalledProperty([[maybe_unused]] const InputPropertyTy &InputProperties) { + MarshalledProperty(const InputPropertyTy &Props) { + (void)Props; if constexpr (InputPropertyTy::template has_property()) - property = InputProperties.template get_property(); + property = Props.template get_property(); } MarshalledProperty() = default; @@ -286,8 +287,10 @@ struct MarshalledProperty { bool isRootSyncPropPresent = false; template - MarshalledProperty([[maybe_unused]] const InputPropertyTy &Props) { + MarshalledProperty(const InputPropertyTy &Props) { using namespace sycl::ext::oneapi::experimental; + + (void)Props; isRootSyncPropPresent = InputPropertyTy::template has_property(); } @@ -314,8 +317,9 @@ struct MarshalledProperty< template >> - MarshalledProperty([[maybe_unused]] const InputPropertyTy &Props) { + MarshalledProperty(const InputPropertyTy &Props) { using namespace sycl::ext::oneapi::experimental; + (void)Props; if constexpr (InputPropertyTy::template has_property< work_group_progress_key>()) {