diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index a80ddc9feb83f..af26b611c3b6f 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -253,23 +256,154 @@ struct KernelWrapper< } }; // KernelWrapper struct -struct KernelLaunchPropertyWrapper { - template - static void parseProperties([[maybe_unused]] PropertyProcessor h, - [[maybe_unused]] const KernelType &KernelFunc) { -#ifndef __SYCL_DEVICE_ONLY__ - // If there are properties provided by get method then process them. - if constexpr (ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod::value) { +// This namespace encapsulates everything related to parsing kernel launch +// properties. +inline namespace kernel_launch_properties_v1 { + +template struct MarshalledProperty; + +// Generic implementation for runtime properties. +template +struct MarshalledProperty< + PropertyTy, + std::enable_if_t && + std::is_same_v>> { + std::optional property; + + template + MarshalledProperty(const InputPropertyTy &Props) { + (void)Props; + if constexpr (InputPropertyTy::template has_property()) + property = Props.template get_property(); + } + + MarshalledProperty() = default; +}; + +// Specialization for use_root_sync_key property. +template <> +struct MarshalledProperty { + + bool isRootSyncPropPresent = false; + + template + MarshalledProperty(const InputPropertyTy &Props) { + using namespace sycl::ext::oneapi::experimental; - h->template processProperties< - detail::CompileTimeKernelInfo.IsESIMD>( - KernelFunc.get(ext::oneapi::experimental::properties_tag{})); + (void)Props; + isRootSyncPropPresent = + InputPropertyTy::template has_property(); + } + + MarshalledProperty() = default; +}; + +// Specialization for work group progress property. +template <> +struct MarshalledProperty< + sycl::ext::oneapi::experimental::work_group_progress_key> { + + struct ScopeForwardProgressProperty { + sycl::ext::oneapi::experimental::forward_progress_guarantee Guarantee; + sycl::ext::oneapi::experimental::execution_scope ExecScope; + sycl::ext::oneapi::experimental::execution_scope CoordinationScope; + }; + + // Forward progress guarantee properties for work_item, sub_group and + // work_group scopes. We need to store them for validation later. + std::array, 3> + MForwardProgressProperties; + + template >> + MarshalledProperty(const InputPropertyTy &Props) { + using namespace sycl::ext::oneapi::experimental; + (void)Props; + + if constexpr (InputPropertyTy::template has_property< + work_group_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[0] = { + prop.guarantee, execution_scope::work_group, prop.coordinationScope}; } -#endif + if constexpr (InputPropertyTy::template has_property< + sub_group_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[1] = { + prop.guarantee, execution_scope::sub_group, prop.coordinationScope}; + } + if constexpr (InputPropertyTy::template has_property< + work_item_progress_key>()) { + auto prop = Props.template get_property(); + MForwardProgressProperties[2] = { + prop.guarantee, execution_scope::work_item, prop.coordinationScope}; + } + } + + MarshalledProperty() = default; +}; + +template struct PropsHolder : MarshalledProperty... { + + template + PropsHolder(PropertiesT Props) : MarshalledProperty(Props)... {} + + PropsHolder() = default; +}; + +using KernelPropertyHolderStructTy = + PropsHolder, + sycl::ext::oneapi::experimental::cuda::cluster_size_key<2>, + sycl::ext::oneapi::experimental::cuda::cluster_size_key<3>>; + +/// Note: it is important that this function *does not* depend on kernel +/// name or kernel type, because then it will be instantiated for every +/// kernel, even though body of those instantiated functions could be almost +/// the same, thus unnecessary increasing compilation time. +template >> +constexpr auto processKernelProperties(PropertiesT Props) { + static_assert( + !PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() || + (PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() && + IsESIMDKernel), + "Floating point control property is supported for ESIMD kernels only."); + static_assert( + !PropertiesT::template has_property< + sycl::ext::oneapi::experimental::indirectly_callable_key>(), + "indirectly_callable property cannot be applied to SYCL kernels"); + + KernelPropertyHolderStructTy prop(Props); + return prop; +} + +// Returns KernelLaunchPropertiesTy or std::nullopt based on whether the +// kernel functor has a get method that returns properties. +template +constexpr std::optional +parseProperties([[maybe_unused]] const KernelType &KernelFunc) { +#ifndef __SYCL_DEVICE_ONLY__ + // If there are properties provided by get method then process them. + if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< + const KernelType &>::value) { + + return processKernelProperties( + KernelFunc.get(ext::oneapi::experimental::properties_tag{})); } -}; // KernelLaunchPropertyWrapper struct +#endif + // If there are no properties provided by get method then return empty + // optional. + return std::nullopt; +} +} // namespace kernel_launch_properties_v1 } // namespace detail } // namespace _V1 diff --git a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp index e7eae55636622..9e0d84afb660f 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp @@ -22,7 +22,7 @@ struct cluster_size cluster_size, ::sycl::ext::oneapi::experimental::detail::ClusterLaunch> { cluster_size(const range &size) : size(size) {} - sycl::range get_cluster_size() { return size; } + sycl::range get_cluster_size() const { return size; } private: range size; diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 8c8488a99e354..673d4c703fe52 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -271,17 +271,13 @@ template void nd_launch(queue Q, nd_range Range, const KernelType &KernelObj, ReductionsT &&...Reductions) { - // TODO The handler-less path does not support reductions, kernel - // function properties and kernel functions with the kernel_handler - // type argument yet. + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. if constexpr (sizeof...(ReductionsT) == 0 && - !(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { - detail::submit_kernel_direct_parallel_for( - std::move(Q), empty_properties_t{}, Range, KernelObj); + detail::submit_kernel_direct_parallel_for(std::move(Q), Range, + KernelObj); } else { submit(std::move(Q), [&](handler &CGH) { nd_launch(CGH, Range, KernelObj, @@ -308,13 +304,25 @@ template void nd_launch(queue Q, launch_config, Properties> Config, const KernelType &KernelObj, ReductionsT &&...Reductions) { - // TODO This overload of the nd_launch function takes the kernel function - // properties, which are not yet supported for the handler-less path, - // so it only supports handler based submission for now - submit(std::move(Q), [&](handler &CGH) { - nd_launch(CGH, Config, KernelObj, - std::forward(Reductions)...); - }); + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(ReductionsT) == 0 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + + ext::oneapi::experimental::detail::LaunchConfigAccess, + Properties> + LaunchConfigAccess(Config); + + detail::submit_kernel_direct_parallel_for( + std::move(Q), LaunchConfigAccess.getRange(), KernelObj, + LaunchConfigAccess.getProperties()); + } else { + submit(std::move(Q), [&](handler &CGH) { + nd_launch(CGH, Config, KernelObj, + std::forward(Reductions)...); + }); + } } template diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 67f21bc05857f..bf003be86e80a 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -877,6 +877,7 @@ class __SYCL_EXPORT handler { } } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, @@ -901,64 +902,9 @@ class __SYCL_EXPORT handler { /// Stores information about kernel properties into the handler. template void processLaunchProperties(PropertiesT Props) { - if constexpr (PropertiesT::template has_property< - sycl::ext::intel::experimental::cache_config_key>()) { - auto Config = Props.template get_property< - sycl::ext::intel::experimental::cache_config_key>(); - if (Config == sycl::ext::intel::experimental::large_slm) { - setKernelCacheConfig(StableKernelCacheConfig::LargeSLM); - } else if (Config == sycl::ext::intel::experimental::large_data) { - setKernelCacheConfig(StableKernelCacheConfig::LargeData); - } - } else { - (void)Props; - } - - constexpr bool UsesRootSync = PropertiesT::template has_property< - sycl::ext::oneapi::experimental::use_root_sync_key>(); - if (UsesRootSync) { - setKernelIsCooperative(UsesRootSync); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - sub_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::sub_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::sub_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_item_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_item_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_item, - prop.coordinationScope); - } - - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_scratch_size>()) { - auto WorkGroupMemSize = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_scratch_size>(); - setKernelWorkGroupMem(WorkGroupMemSize.size); - } - - checkAndSetClusterRange(Props); + detail::KernelPropertyHolderStructTy ParsedProp = + detail::processKernelProperties(Props); + setKernelLaunchProperties(ParsedProp); } /// Process kernel properties. @@ -973,23 +919,11 @@ class __SYCL_EXPORT handler { bool IsESIMDKernel, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> void processProperties(PropertiesT Props) { - static_assert( - ext::oneapi::experimental::is_property_list::value, - "Template type is not a property list."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() || - (PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() && - IsESIMDKernel), - "Floating point control property is supported for ESIMD kernels only."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::oneapi::experimental::indirectly_callable_key>(), - "indirectly_callable property cannot be applied to SYCL kernels"); - - processLaunchProperties(Props); + detail::KernelPropertyHolderStructTy ParsedProp = + detail::processKernelProperties(Props); + setKernelLaunchProperties(ParsedProp); } +#endif // INTEL_PREVIEW_BREAKING_CHANGES /// Checks whether it is possible to copy the source shape to the destination /// shape(the shapes are described by the accessor ranges) by using @@ -1297,8 +1231,10 @@ class __SYCL_EXPORT handler { decltype(Wrapper), TransformedArgType, PropertiesT>::wrap(Wrapper); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - Wrapper); + if (auto prop = detail::parseProperties(Wrapper)) { + setKernelLaunchProperties(*prop); + } + #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal(Info.Name); // We are executing over the rounded range, but there are still @@ -1322,11 +1258,15 @@ class __SYCL_EXPORT handler { // kernel is generated detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = + detail::parseProperties(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal(Info.Name); - processProperties(Props); + detail::KernelPropertyHolderStructTy ProcessedProps = + detail::processKernelProperties(Props); + setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); StoreLambda( @@ -1355,7 +1295,9 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); - processLaunchProperties(Props); + detail::KernelPropertyHolderStructTy ParsedProp = + detail::processKernelProperties(Props); + setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif } @@ -1378,7 +1320,9 @@ class __SYCL_EXPORT handler { setDeviceKernelInfo(std::move(Kernel)); detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); - processLaunchProperties(Props); + detail::KernelPropertyHolderStructTy ParsedProp = + detail::processKernelProperties(Props); + setKernelLaunchProperties(ParsedProp); extractArgsAndReqs(); #endif } @@ -1395,12 +1339,13 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; (void)Props; + constexpr auto Info = detail::CompileTimeKernelInfo; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = detail::parseProperties(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ - constexpr auto Info = detail::CompileTimeKernelInfo; if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(Info); } @@ -1416,7 +1361,9 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - processProperties(Props); + detail::KernelPropertyHolderStructTy ProcessedProps = + detail::processKernelProperties(Props); + setKernelLaunchProperties(ProcessedProps); #endif } @@ -1439,8 +1386,9 @@ class __SYCL_EXPORT handler { (void)Kernel; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = detail::parseProperties(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ constexpr auto Info = detail::CompileTimeKernelInfo; if constexpr (WrapAsVal == detail::WrapAs::single_task) { @@ -1467,7 +1415,9 @@ class __SYCL_EXPORT handler { "the kernel name must match the name of the lambda"); } StoreLambda(std::move(KernelFunc)); - processProperties(Props); + detail::KernelPropertyHolderStructTy ProcessedProps = + detail::processKernelProperties(Props); + setKernelLaunchProperties(ProcessedProps); #endif } #endif // __INTEL_PREVIEW_BREAKING_CHANGES @@ -3490,7 +3440,9 @@ class __SYCL_EXPORT handler { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset); - // Changing values in this will break ABI/API. +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Modeled after ur_kernel_cache_config_t + // Used as an argument to setKernelCacheConfig that's part of the ABI. enum class StableKernelCacheConfig : int32_t { Default = 0, LargeSLM = 1, @@ -3503,15 +3455,17 @@ class __SYCL_EXPORT handler { void setKernelIsCooperative(bool); // Set using cuda thread block cluster launch flag and set the launch bounds. -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); -#endif void setKernelClusterLaunch(sycl::range<3> ClusterSize); void setKernelClusterLaunch(sycl::range<2> ClusterSize); void setKernelClusterLaunch(sycl::range<1> ClusterSize); // Set the request work group memory size (work_group_static ext). void setKernelWorkGroupMem(size_t Size); +#endif + + void setKernelLaunchProperties( + const detail::KernelPropertyHolderStructTy &KernelLaunchProperties); // Various checks that are only meaningful for host compilation, because they // result in runtime errors (i.e. exceptions being thrown). To save time @@ -3668,7 +3622,6 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; - friend struct detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index 68dd159bf8211..04f73dac91ae0 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -157,16 +157,15 @@ template r, range<1> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. if constexpr (!(ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod< const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<1>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<1>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<1>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -178,16 +177,12 @@ template r, range<2> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT< + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<2>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<2>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<2>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, @@ -199,16 +194,12 @@ template r, range<3> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { - // TODO The handler-less path does not support kernel function properties - // and kernel functions with the kernel_handler type argument yet. - if constexpr (!(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && - !(detail::KernelLambdaHasKernelHandlerArgT< + // TODO The handler-less path does not support kernel functions with the + // kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item<3>>::value)) { - detail::submit_kernel_direct_parallel_for( - q, ext::oneapi::experimental::empty_properties_t{}, - nd_range<3>(r, size), std::forward(k)); + detail::submit_kernel_direct_parallel_for(q, nd_range<3>(r, size), + std::forward(k)); } else { submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 4a7f1fac789a3..e0c4de7635d68 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -68,6 +68,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -75,6 +76,7 @@ void __SYCL_EXPORT submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); namespace detail { @@ -159,16 +161,14 @@ class __SYCL_EXPORT SubmissionInfo { template + typename PropertiesT = ext::oneapi::experimental::empty_properties_t, + typename KernelTypeUniversalRef, int Dims> auto submit_kernel_direct( - const queue &Queue, [[maybe_unused]] PropertiesT Props, - const nd_range &Range, KernelTypeUniversalRef &&KernelFunc, + const queue &Queue, const nd_range &Range, + KernelTypeUniversalRef &&KernelFunc, + PropertiesT ExtraProps = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { - // TODO Properties not supported yet - static_assert( - std::is_same_v, - "Setting properties not supported yet for no-CGH kernel submit."); + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = @@ -210,22 +210,46 @@ auto submit_kernel_direct( "-fsycl-host-compiler-options='/std:c++latest' " "might also help."); + // Get Kernel Launch properties. User can specify properties either + // via specifying get(property_tag{}) method in kernel type or by using + // launch_config API or by explicitly passing them in call to + // parallel_for (deprecated API). + // ExtraProps are properties passed explicitly or via launch_config. + + // Assumption: If user specify properties via launch_config or explicitly + // then we don't check for properties specified via get() method. + KernelPropertyHolderStructTy parsedProps; + if constexpr (std::is_same_v) { + // Use properties passed via. get() method. + if constexpr (ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod::value) { + auto prop = KernelFunc.get(ext::oneapi::experimental::properties_tag{}); + parsedProps = detail::processKernelProperties(prop); + } + } else { + // Use ExtraProps + parsedProps = detail::processKernelProperties(ExtraProps); + } + if constexpr (EventNeeded) { return submit_kernel_direct_with_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, Range, HostKernel, DeviceKernelInfoPtr, parsedProps, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } else { submit_kernel_direct_without_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, Range, HostKernel, DeviceKernelInfoPtr, parsedProps, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } } template + typename PropertiesT = ext::oneapi::experimental::empty_properties_t, + typename KernelTypeUniversalRef, int Dims> auto submit_kernel_direct_parallel_for( - const queue &Queue, PropertiesT Props, const nd_range &Range, + const queue &Queue, const nd_range &Range, KernelTypeUniversalRef &&KernelFunc, + PropertiesT Props = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { using KernelType = @@ -246,7 +270,7 @@ auto submit_kernel_direct_parallel_for( return submit_kernel_direct( - Queue, Props, Range, std::forward(KernelFunc), + Queue, Range, std::forward(KernelFunc), Props, CodeLoc); } @@ -259,8 +283,8 @@ auto submit_kernel_direct_single_task( return submit_kernel_direct( - Queue, Props, nd_range<1>{1, 1}, - std::forward(KernelFunc), CodeLoc); + Queue, nd_range<1>{1, 1}, + std::forward(KernelFunc), Props, CodeLoc); } } // namespace detail @@ -3323,11 +3347,22 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.template parallel_for(Range, Properties, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + + return detail::submit_kernel_direct_parallel_for( + *this, Range, Rest..., Properties, TlsCodeLocCapture.query()); + } else + return submit( + [&](handler &CGH) { + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); } /// parallel_for version with a kernel represented as a lambda + nd_range that @@ -3344,18 +3379,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = std::tuple_element_t<0, std::tuple>; - // TODO The handler-less path does not support reductions, kernel - // function properties and kernel functions with the kernel_handler - // type argument yet. + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. if constexpr (sizeof...(RestT) == 1 && - !(ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod< - const KernelType &>::value) && !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, ext::oneapi::experimental::empty_properties_t{}, Range, - Rest..., TlsCodeLocCapture.query()); + *this, Range, Rest..., + ext::oneapi::experimental::empty_properties_t{}, + TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { diff --git a/sycl/source/detail/kernel_data.hpp b/sycl/source/detail/kernel_data.hpp index 7ba849dc33f1f..9e838aaf10154 100644 --- a/sycl/source/detail/kernel_data.hpp +++ b/sycl/source/detail/kernel_data.hpp @@ -8,12 +8,14 @@ #pragma once +#include #include #include #include #include #include +#include #include @@ -27,6 +29,11 @@ class KernelData { using DynamicParametersVecT = std::vector>; using ArgsVecT = std::vector; + template + using MarshalledProperty = detail::MarshalledProperty; + template + using cluster_size_key = + sycl::ext::oneapi::experimental::cuda::cluster_size_key; KernelData() = default; ~KernelData() = default; @@ -107,6 +114,7 @@ class KernelData { void setDeviceKernelInfoPtr(DeviceKernelInfo *Ptr) { MDeviceKernelInfoPtr = Ptr; } + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelInfo(void *KernelFuncPtr, int KernelNumArgs, KernelParamDescGetterT KernelParamDescGetter, @@ -134,6 +142,7 @@ class KernelData { return MDeviceKernelInfoPtr->usesAssert(); } + // Kernel launch properties getter and setters. ur_kernel_cache_config_t getKernelCacheConfig() const { return MKernelCacheConfig; } @@ -163,6 +172,142 @@ class KernelData { MKernelWorkGroupMemorySize = Size; } + void parseAndSetCacheConfigProperty( + const sycl::ext::intel::experimental::cache_config_key &prop) { + using namespace sycl::ext::intel::experimental; + + ur_kernel_cache_config_t CacheConfig = + ur_kernel_cache_config_t::UR_KERNEL_CACHE_CONFIG_DEFAULT; + if (prop == large_slm) { + CacheConfig = ur_kernel_cache_config_t::UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + } else if (prop == large_data) { + CacheConfig = ur_kernel_cache_config_t::UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + } else + assert(false && "unknown cache property type"); + + MKernelCacheConfig = CacheConfig; + } + + template + void parseAndSetClusterDimProperty( + const std::optional> &prop) { + if (prop) { + static_assert(ClusterDims < 4 && ClusterDims > 0, + "Invalid cluster dimensions"); + + auto ClusterSize = prop->get_cluster_size(); + MKernelUsesClusterLaunch = true; + + if constexpr (ClusterDims == 1) + MNDRDesc.setClusterDimensions(sycl::range<1>{ClusterSize[0]}); + else if constexpr (ClusterDims == 2) + MNDRDesc.setClusterDimensions( + sycl::range<2>{ClusterSize[0], ClusterSize[1]}); + else if constexpr (ClusterDims == 3) + MNDRDesc.setClusterDimensions( + sycl::range<3>{ClusterSize[0], ClusterSize[1], ClusterSize[2]}); + } + } + + void validateAndSetKernelLaunchProperties( + const detail::KernelPropertyHolderStructTy Kprop, bool HasGraph, + const device_impl &dev) { + using execScope = ext::oneapi::experimental::execution_scope; + using namespace sycl::ext::oneapi::experimental; + using namespace sycl::ext::oneapi::experimental::detail; + + const auto *WorkGroupMemSizeProp = + static_cast *>( + &Kprop); + const auto *CacheConfigProp = static_cast *>(&Kprop); + const auto *UseRootSyncProp = + static_cast *>(&Kprop); + const auto *ForwardProgressProp = + static_cast *>( + &Kprop); + const auto *ClusterLaunchPropDim1 = + static_cast> *>( + &Kprop); + const auto *ClusterLaunchPropDim2 = + static_cast> *>( + &Kprop); + const auto *ClusterLaunchPropDim3 = + static_cast> *>( + &Kprop); + + const bool isClusterDimPropPresent = ClusterLaunchPropDim1->property || + ClusterLaunchPropDim2->property || + ClusterLaunchPropDim3->property; + + // Early validation for graph-incompatible properties + if (HasGraph) { + if (WorkGroupMemSizeProp->property) { + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "Setting work group scratch memory size is not yet supported " + "for use with the SYCL Graph extension."); + } + + if (isClusterDimPropPresent) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Cluster launch is not yet supported " + "for use with the SYCL Graph extension."); + } + } + + // Validate and set forward progress guarantees. + for (int i = 0; i < 3; i++) { + if (ForwardProgressProp->MForwardProgressProperties[i].has_value()) { + + if (!dev.supportsForwardProgress( + ForwardProgressProp->MForwardProgressProperties[i]->Guarantee, + ForwardProgressProp->MForwardProgressProperties[i]->ExecScope, + ForwardProgressProp->MForwardProgressProperties[i] + ->CoordinationScope)) { + throw sycl::exception( + sycl::make_error_code(errc::feature_not_supported), + "The device associated with the queue does not support the " + "requested forward progress guarantee."); + } + + auto execScope = + ForwardProgressProp->MForwardProgressProperties[i]->ExecScope; + // If we are here, the device supports the guarantee required but + // there is a caveat in that if the guarantee required is a concurrent + // guarantee, then we most likely also need to enable cooperative + // launch of the kernel. That is, although the device supports the + // required guarantee, some setup work is needed to truly make the + // device provide that guarantee at runtime. Otherwise, we will get + // the default guarantee which is weaker than concurrent. Same + // reasoning applies for sub_group but not for work_item. + // TODO: Further design work is probably needed to reflect this + // behavior in Unified Runtime. + if ((execScope == execScope::work_group || + execScope == execScope::sub_group) && + (ForwardProgressProp->MForwardProgressProperties[i]->Guarantee == + forward_progress_guarantee::concurrent)) { + setCooperative(true); + } + } + } + + if (UseRootSyncProp->isRootSyncPropPresent) + setCooperative(true); + + if (CacheConfigProp->property) + parseAndSetCacheConfigProperty(*(CacheConfigProp->property)); + + if (WorkGroupMemSizeProp->property) + setKernelWorkGroupMemorySize((*WorkGroupMemSizeProp->property).size); + + if (isClusterDimPropPresent) { + parseAndSetClusterDimProperty(ClusterLaunchPropDim1->property); + parseAndSetClusterDimProperty(ClusterLaunchPropDim2->property); + parseAndSetClusterDimProperty(ClusterLaunchPropDim3->property); + } + } + KernelNameStrRefT getKernelName() const { assert(MDeviceKernelInfoPtr); return static_cast(MDeviceKernelInfoPtr->Name); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d86f6a5c6aac2..3ec6cc2ec2fce 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -567,6 +567,7 @@ EventImplPtr queue_impl::submit_command_to_graph( EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { KernelData KData; @@ -574,6 +575,12 @@ EventImplPtr queue_impl::submit_kernel_direct_impl( KData.setDeviceKernelInfoPtr(DeviceKernelInfo); KData.setNDRDesc(NDRDesc); + // Validate and set kernel launch properties. + KData.validateAndSetKernelLaunchProperties( + Props, getCommandGraph() != nullptr, /*HasGraph?*/ + getDeviceImpl() /*device_impl*/ + ); + auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData, bool SchedulerBypass) -> EventImplPtr { if (SchedulerBypass) { diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 49da7aee8c448..031b0a01f56bc 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -363,10 +363,11 @@ class queue_impl : public std::enable_shared_from_this { event submit_kernel_direct_with_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::EventImplPtr EventImpl = submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - true, CodeLoc, IsTopCodeLoc); + true, Props, CodeLoc, IsTopCodeLoc); return createSyclObjFromImpl(EventImpl); } @@ -374,9 +375,10 @@ class queue_impl : public std::enable_shared_from_this { void submit_kernel_direct_without_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - false, CodeLoc, IsTopCodeLoc); + false, Props, CodeLoc, IsTopCodeLoc); } void submit_without_event(const detail::type_erased_cgfo_ty &CGF, @@ -929,6 +931,7 @@ class queue_impl : public std::enable_shared_from_this { EventImplPtr submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 26477c99be62c..e9f139126c4c6 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1752,10 +1752,12 @@ static bool checkContextSupports(detail::context_impl &ContextImpl, return SupportsOp; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { + using execution_scope = sycl::ext::oneapi::experimental::execution_scope; using forward_progress = sycl::ext::oneapi::experimental::forward_progress_guarantee; @@ -1797,6 +1799,7 @@ void handler::verifyDeviceHasProgressGuarantee( } } } +#endif bool handler::supportsUSMMemcpy2D() { if (impl->get_graph_or_null()) @@ -1910,6 +1913,13 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest, }); } +void handler::setKernelLaunchProperties( + const detail::KernelPropertyHolderStructTy &Kprop) { + impl->MKernelData.validateAndSetKernelLaunchProperties( + Kprop, getCommandGraph() != nullptr /*hasGraph?*/, + impl->get_device() /*device_impl*/); +} + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES const std::shared_ptr & handler::getContextImplPtr() const { @@ -1927,6 +1937,7 @@ detail::context_impl &handler::getContextImpl() const { return impl->get_queue().getContextImpl(); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { switch (Config) { case handler::StableKernelCacheConfig::Default: @@ -1945,7 +1956,6 @@ void handler::setKernelIsCooperative(bool KernelIsCooperative) { impl->MKernelData.setCooperative(KernelIsCooperative); } -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: @@ -1961,7 +1971,6 @@ void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { impl->MKernelData.setClusterDimensions(ClusterSize); } } -#endif void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< @@ -1989,6 +1998,7 @@ void handler::setKernelWorkGroupMem(size_t Size) { sycl_ext_oneapi_work_group_scratch_memory>(); impl->MKernelData.setKernelWorkGroupMemorySize(Size); } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES void handler::ext_oneapi_graph( ext::oneapi::experimental::command_graph< diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index f34da47852266..7fe5649aecc2a 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -476,27 +476,31 @@ event submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); } template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template @@ -504,27 +508,31 @@ void submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); } template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); } // namespace _V1 diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 032b82ae74293..83292f20db599 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,12 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_4cuda12cluster_sizeILi1EEENSR_ILi2EEENSR_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_4cuda12cluster_sizeILi1EEENSR_ILi2EEENSR_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_4cuda12cluster_sizeILi1EEENSR_ILi2EEENSR_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_4cuda12cluster_sizeILi1EEENSQ_ILi2EEENSQ_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_4cuda12cluster_sizeILi1EEENSQ_ILi2EEENSQ_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_4cuda12cluster_sizeILi1EEENSQ_ILi2EEENSQ_ILi3EEEEEERKNS9_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv @@ -3618,6 +3618,7 @@ _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb +_ZN4sycl3_V17handler25setKernelLaunchPropertiesERKNS0_6detail27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENS5_5intel12experimental12cache_configENS7_17use_root_sync_keyENS7_23work_group_progress_keyENS7_4cuda12cluster_sizeILi1EEENSF_ILi2EEENSF_ILi3EEEEEE _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d5f53a5bbb505..e11cca8be25cb 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,12 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z @@ -4428,6 +4428,7 @@ ?setKernelFunc@handler@_V1@sycl@@AEAAXPEAX@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z +?setKernelLaunchProperties@handler@_V1@sycl@@AEAAXAEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@detail@23@@Z ?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z diff --git a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp index 46d11eccdfe54..e6910484bf52f 100644 --- a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp +++ b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp @@ -20,7 +20,7 @@ struct ESIMDKernel { int main(void) { queue q; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} syclex::properties properties7{ intelex::fp_control}; @@ -28,7 +28,7 @@ int main(void) { cgh.single_task(properties7, [=]() {}); }); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} ESIMDKernel Kern; q.submit([&](handler &cgh) { cgh.parallel_for(range<1>(1), Kern); }); diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index cf98e8708254a..9f68a2bbe4d9d 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -138,6 +138,8 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp @@ -149,9 +151,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: feature_test.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp index 9d5f05f24d95d..cb34a26c6ef34 100644 --- a/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp @@ -116,10 +116,23 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/properties/property_utils.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def +// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp +// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp // CHECK-NEXT: detail/kernel_name_str_t.hpp @@ -136,20 +149,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp // CHECK-NEXT: ext/oneapi/device_global/device_global.hpp // CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp index 3c05eb6715259..23e72bf910ad4 100644 --- a/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp @@ -124,21 +124,9 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/kernel_name_str_t.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp // CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp +// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp @@ -150,8 +138,20 @@ // CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp // CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp +// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp +// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp +// CHECK-NEXT: detail/kernel_name_str_t.hpp +// CHECK-NEXT: detail/reduction_forward.hpp +// CHECK-NEXT: event.hpp +// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp +// CHECK-NEXT: ext/oneapi/interop_common.hpp +// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp +// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp +// CHECK-NEXT: ext/oneapi/device_global/properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: sycl_span.hpp // CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp index a37e63c0cc7de..7a73642eda287 100644 --- a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp @@ -142,6 +142,8 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp @@ -153,9 +155,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp index d233d264267ac..defd42a0bfd30 100644 --- a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp @@ -152,21 +152,9 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/kernel_name_str_t.hpp -// CHECK-NEXT: detail/ur.hpp -// CHECK-NEXT: ur_api_funcs.def -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp // CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp +// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp @@ -178,8 +166,20 @@ // CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp // CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp // CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp +// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp +// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp +// CHECK-NEXT: detail/kernel_name_str_t.hpp +// CHECK-NEXT: detail/ur.hpp +// CHECK-NEXT: ur_api_funcs.def +// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp +// CHECK-NEXT: ext/oneapi/interop_common.hpp +// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp +// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp +// CHECK-NEXT: ext/oneapi/device_global/properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: queue.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp index 0be5245e52157..ef7d9acea668b 100644 --- a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp @@ -135,10 +135,23 @@ // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/properties/property_utils.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def +// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def +// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp +// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp +// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp // CHECK-NEXT: detail/kernel_name_str_t.hpp @@ -155,20 +168,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp // CHECK-NEXT: ext/oneapi/device_global/device_global.hpp // CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index 87de587378f40..99f87aee43e62 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -157,6 +157,8 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp @@ -168,9 +170,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: usm/usm_pointer_info.hpp diff --git a/sycl/test/virtual-functions/properties-negative.cpp b/sycl/test/virtual-functions/properties-negative.cpp index b8e1b75f1d9a9..0ef06b3652ad1 100644 --- a/sycl/test/virtual-functions/properties-negative.cpp +++ b/sycl/test/virtual-functions/properties-negative.cpp @@ -17,15 +17,15 @@ int main() { oneapi::properties props_int{oneapi::indirectly_callable_in}; oneapi::properties props_user{oneapi::indirectly_callable_in}; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_empty, [=]() {}); // When both "props_empty" and "props_void" are in use, we won't see the // static assert firing for the second one, because there will be only one // instantiation of handler::processProperties. q.single_task(props_void, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_int, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_user, [=]() {}); return 0;