@@ -256,203 +256,179 @@ struct KernelWrapper<
256256 }
257257}; // KernelWrapper struct
258258
259- // This class encapsulates everything related to parsing kernel launch
259+ // This namespace encapsulates everything related to parsing kernel launch
260260// properties.
261- class KernelLaunchPropertyWrapper {
262- public:
263- // This struct is used to store kernel launch properties.
264- // std::optional is used to indicate that the property is not set.
265- // In some code paths, kernel launch properties are set multiple times
266- // for the same kernel, that is why using std::optional to avoid overriding
267- // previously set properties.
268- // This struct is used to pass kernel launch properties across the ABI
269- // boundary.
270- struct KernelLaunchPropertiesT {
271- struct ScopeForwardProgressProperty {
272- std::optional<sycl::ext::oneapi::experimental::forward_progress_guarantee>
273- Guarantee;
274- std::optional<sycl::ext::oneapi::experimental::execution_scope> ExecScope;
275- std::optional<sycl::ext::oneapi::experimental::execution_scope>
276- CoordinationScope;
277- };
278-
279- std::optional<ur_kernel_cache_config_t > MCacheConfig = std::nullopt ;
280- std::optional<bool > MIsCooperative = std::nullopt ;
281- std::optional<uint32_t > MWorkGroupMemorySize = std::nullopt ;
282- std::optional<bool > MUsesClusterLaunch = std::nullopt ;
283- size_t MClusterDims = 0 ;
284- std::array<size_t , 3 > MClusterSize = {0 , 0 , 0 };
261+ inline namespace kernel_launch_properties_v1 {
262+
263+ // This struct is used to store kernel launch properties.
264+ // std::optional is used to indicate that the property is not set.
265+ // This struct is used to pass kernel launch properties across the ABI
266+ // boundary.
267+ struct KernelLaunchPropertiesTy {
268+ // Modeled after ur_kernel_cache_config_t
269+ enum class StableKernelCacheConfig : int32_t {
270+ Default = 0 ,
271+ LargeSLM = 1 ,
272+ LargeData = 2
273+ };
285274
286- // Forward progress guarantee properties for work_item, sub_group and
287- // work_group scopes. We need to store them for validation later.
288- std::array<ScopeForwardProgressProperty, 3 > MForwardProgressProperties;
275+ struct ScopeForwardProgressProperty {
276+ std::optional<sycl::ext::oneapi::experimental::forward_progress_guarantee>
277+ Guarantee;
278+ std::optional<sycl::ext::oneapi::experimental::execution_scope> ExecScope;
279+ std::optional<sycl::ext::oneapi::experimental::execution_scope>
280+ CoordinationScope;
289281 };
290282
291- // / Process runtime kernel properties.
292- template <typename PropertiesT>
293- static KernelLaunchPropertiesT
294- processKernelLaunchProperties (PropertiesT Props) {
295- using namespace sycl ::ext::oneapi::experimental;
296- using namespace sycl ::ext::oneapi::experimental::detail;
297- KernelLaunchPropertiesT retval;
298-
299- // Process Kernel cache configuration property.
300- {
301- if constexpr (PropertiesT::template has_property<
302- sycl::ext::intel::experimental::cache_config_key>()) {
303- auto Config = Props.template get_property <
304- sycl::ext::intel::experimental::cache_config_key>();
305- if (Config == sycl::ext::intel::experimental::large_slm) {
306- retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM;
307- } else if (Config == sycl::ext::intel::experimental::large_data) {
308- retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA;
309- }
310- } else {
311- std::ignore = Props;
312- }
313- }
283+ std::optional<StableKernelCacheConfig> MCacheConfig = std::nullopt ;
284+ std::optional<bool > MIsCooperative = std::nullopt ;
285+ std::optional<uint32_t > MWorkGroupMemorySize = std::nullopt ;
286+ std::optional<bool > MUsesClusterLaunch = std::nullopt ;
287+ size_t MClusterDims = 0 ;
288+ std::array<size_t , 3 > MClusterSize = {0 , 0 , 0 };
314289
315- // Process Kernel cooperative property.
316- {
317- if constexpr (PropertiesT::template has_property<use_root_sync_key>())
318- retval.MIsCooperative = true ;
319- }
290+ // Forward progress guarantee properties for work_item, sub_group and
291+ // work_group scopes. We need to store them for validation later.
292+ std::array<ScopeForwardProgressProperty, 3 > MForwardProgressProperties;
293+ };
320294
321- // Process device progress properties.
322- {
323- using forward_progress =
324- sycl::ext::oneapi::experimental::forward_progress_guarantee;
325- if constexpr (PropertiesT::template has_property<
326- work_group_progress_key>()) {
327- auto prop = Props.template get_property <work_group_progress_key>();
328- retval.MForwardProgressProperties [0 ].Guarantee = prop.guarantee ;
329- retval.MForwardProgressProperties [0 ].ExecScope =
330- execution_scope::work_group;
331- retval.MForwardProgressProperties [0 ].CoordinationScope =
332- prop.coordinationScope ;
333-
334- // If we are here, the device supports the guarantee required but there
335- // is a caveat in that if the guarantee required is a concurrent
336- // guarantee, then we most likely also need to enable cooperative launch
337- // of the kernel. That is, although the device supports the required
338- // guarantee, some setup work is needed to truly make the device provide
339- // that guarantee at runtime. Otherwise, we will get the default
340- // guarantee which is weaker than concurrent. Same reasoning applies for
341- // sub_group but not for work_item.
342- // TODO: Further design work is probably needed to reflect this behavior
343- // in Unified Runtime.
344- if constexpr (prop.guarantee == forward_progress::concurrent)
345- retval.MIsCooperative = true ;
346- }
347- if constexpr (PropertiesT::template has_property<
348- sub_group_progress_key>()) {
349- auto prop = Props.template get_property <sub_group_progress_key>();
350- retval.MForwardProgressProperties [1 ].Guarantee = prop.guarantee ;
351- retval.MForwardProgressProperties [1 ].ExecScope =
352- execution_scope::sub_group;
353- retval.MForwardProgressProperties [1 ].CoordinationScope =
354- prop.coordinationScope ;
355-
356- // Same reasoning as above for work_group applies here.
357- if constexpr (prop.guarantee == forward_progress::concurrent)
358- retval.MIsCooperative = true ;
359- }
360- if constexpr (PropertiesT::template has_property<
361- work_item_progress_key>()) {
362- auto prop = Props.template get_property <work_item_progress_key>();
363- retval.MForwardProgressProperties [2 ].Guarantee = prop.guarantee ;
364- retval.MForwardProgressProperties [2 ].ExecScope =
365- execution_scope::work_item;
366- retval.MForwardProgressProperties [2 ].CoordinationScope =
367- prop.coordinationScope ;
295+ template <typename PropertiesT>
296+ constexpr KernelLaunchPropertiesTy
297+ processKernelLaunchProperties (PropertiesT Props) {
298+ using namespace sycl ::ext::oneapi::experimental;
299+ using namespace sycl ::ext::oneapi::experimental::detail;
300+ KernelLaunchPropertiesTy retval;
301+
302+ // Process Kernel cache configuration property.
303+ {
304+ if constexpr (PropertiesT::template has_property<
305+ sycl::ext::intel::experimental::cache_config_key>()) {
306+ auto Config = Props.template get_property <
307+ sycl::ext::intel::experimental::cache_config_key>();
308+ if (Config == sycl::ext::intel::experimental::large_slm) {
309+ retval.MCacheConfig =
310+ KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeSLM;
311+ } else if (Config == sycl::ext::intel::experimental::large_data) {
312+ retval.MCacheConfig =
313+ KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeData;
368314 }
315+ } else {
316+ std::ignore = Props;
369317 }
318+ }
370319
371- // Process work group scratch memory property.
372- {
373- if constexpr (PropertiesT::template has_property<
374- work_group_scratch_size>()) {
375- auto WorkGroupMemSize =
376- Props.template get_property <work_group_scratch_size>();
377- retval.MWorkGroupMemorySize = WorkGroupMemSize.size ;
378- }
379- }
320+ // Process Kernel cooperative property.
321+ {
322+ if constexpr (PropertiesT::template has_property<use_root_sync_key>())
323+ retval.MIsCooperative = true ;
324+ }
380325
381- // Parse cluster properties.
382- {
383- constexpr std::size_t ClusterDim = getClusterDim<PropertiesT>();
384- if constexpr (ClusterDim > 0 ) {
385-
386- auto ClusterSize =
387- Props.template get_property <cuda::cluster_size_key<ClusterDim>>()
388- .get_cluster_size ();
389- retval.MUsesClusterLaunch = true ;
390- retval.MClusterDims = ClusterDim;
391- if (ClusterDim == 1 ) {
392- retval.MClusterSize [0 ] = ClusterSize[0 ];
393- } else if (ClusterDim == 2 ) {
394- retval.MClusterSize [0 ] = ClusterSize[0 ];
395- retval.MClusterSize [1 ] = ClusterSize[1 ];
396- } else if (ClusterDim == 3 ) {
397- retval.MClusterSize [0 ] = ClusterSize[0 ];
398- retval.MClusterSize [1 ] = ClusterSize[1 ];
399- retval.MClusterSize [2 ] = ClusterSize[2 ];
400- } else {
401- assert (ClusterDim <= 3 &&
402- " Only 1D, 2D, and 3D cluster launch is supported." );
403- }
404- }
326+ // Process device progress properties.
327+ {
328+ if constexpr (PropertiesT::template has_property<
329+ work_group_progress_key>()) {
330+ auto prop = Props.template get_property <work_group_progress_key>();
331+ retval.MForwardProgressProperties [0 ].Guarantee = prop.guarantee ;
332+ retval.MForwardProgressProperties [0 ].ExecScope =
333+ execution_scope::work_group;
334+ retval.MForwardProgressProperties [0 ].CoordinationScope =
335+ prop.coordinationScope ;
405336 }
337+ if constexpr (PropertiesT::template has_property<
338+ sub_group_progress_key>()) {
339+ auto prop = Props.template get_property <sub_group_progress_key>();
340+ retval.MForwardProgressProperties [1 ].Guarantee = prop.guarantee ;
341+ retval.MForwardProgressProperties [1 ].ExecScope =
342+ execution_scope::sub_group;
343+ retval.MForwardProgressProperties [1 ].CoordinationScope =
344+ prop.coordinationScope ;
345+ }
346+ if constexpr (PropertiesT::template has_property<
347+ work_item_progress_key>()) {
348+ auto prop = Props.template get_property <work_item_progress_key>();
349+ retval.MForwardProgressProperties [2 ].Guarantee = prop.guarantee ;
350+ retval.MForwardProgressProperties [2 ].ExecScope =
351+ execution_scope::work_item;
352+ retval.MForwardProgressProperties [2 ].CoordinationScope =
353+ prop.coordinationScope ;
354+ }
355+ }
406356
407- return retval;
357+ // Process work group scratch memory property.
358+ {
359+ if constexpr (PropertiesT::template has_property<
360+ work_group_scratch_size>()) {
361+ auto WorkGroupMemSize =
362+ Props.template get_property <work_group_scratch_size>();
363+ retval.MWorkGroupMemorySize = WorkGroupMemSize.size ;
364+ }
408365 }
409366
410- // / Process kernel properties.
411- // / Note: it is important that this function *does not* depend on kernel
412- // / name or kernel type, because then it will be instantiated for every
413- // / kernel, even though body of those instantiated functions could be almost
414- // / the same, thus unnecessary increasing compilation time.
415- template <
416- bool IsESIMDKernel,
417- typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
418- static KernelLaunchPropertiesT processKernelProperties (PropertiesT Props) {
419- static_assert (
420- ext::oneapi::experimental::is_property_list<PropertiesT>::value,
421- " Template type is not a property list." );
422- static_assert (
423- !PropertiesT::template has_property<
424- sycl::ext::intel::experimental::fp_control_key>() ||
425- (PropertiesT::template has_property<
426- sycl::ext::intel::experimental::fp_control_key>() &&
427- IsESIMDKernel),
428- " Floating point control property is supported for ESIMD kernels only." );
429- static_assert (
430- !PropertiesT::template has_property<
431- sycl::ext::oneapi::experimental::indirectly_callable_key>(),
432- " indirectly_callable property cannot be applied to SYCL kernels" );
367+ // Parse cluster properties.
368+ {
369+ constexpr std::size_t ClusterDim = getClusterDim<PropertiesT>();
370+ if constexpr (ClusterDim > 0 ) {
371+ static_assert (ClusterDim <= 3 ,
372+ " Only 1D, 2D, and 3D cluster launch is supported." );
373+
374+ auto ClusterSize =
375+ Props.template get_property <cuda::cluster_size_key<ClusterDim>>()
376+ .get_cluster_size ();
433377
434- return processKernelLaunchProperties (Props);
378+ retval.MUsesClusterLaunch = true ;
379+ retval.MClusterDims = ClusterDim;
380+
381+ for (size_t dim = 0 ; dim < ClusterDim; dim++)
382+ retval.MClusterSize [dim] = ClusterSize[dim];
383+ }
435384 }
436385
437- // Returns KernelLaunchPropertiesT or std::nullopt based on whether the
438- // kernel functor has a get method that returns properties.
439- template <typename KernelName, bool isESIMD, typename KernelType>
440- static std::optional<KernelLaunchPropertiesT>
441- parseProperties ([[maybe_unused]] const KernelType &KernelFunc) {
386+ return retval;
387+ }
388+
389+ // / Note: it is important that this function *does not* depend on kernel
390+ // / name or kernel type, because then it will be instantiated for every
391+ // / kernel, even though body of those instantiated functions could be almost
392+ // / the same, thus unnecessary increasing compilation time.
393+ template <bool IsESIMDKernel,
394+ typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
395+ constexpr KernelLaunchPropertiesTy processKernelProperties (PropertiesT Props) {
396+ static_assert (ext::oneapi::experimental::is_property_list<PropertiesT>::value,
397+ " Template type is not a property list." );
398+ static_assert (
399+ !PropertiesT::template has_property<
400+ sycl::ext::intel::experimental::fp_control_key>() ||
401+ (PropertiesT::template has_property<
402+ sycl::ext::intel::experimental::fp_control_key>() &&
403+ IsESIMDKernel),
404+ " Floating point control property is supported for ESIMD kernels only." );
405+ static_assert (
406+ !PropertiesT::template has_property<
407+ sycl::ext::oneapi::experimental::indirectly_callable_key>(),
408+ " indirectly_callable property cannot be applied to SYCL kernels" );
409+
410+ return processKernelLaunchProperties (Props);
411+ }
412+
413+ // Returns KernelLaunchPropertiesTy or std::nullopt based on whether the
414+ // kernel functor has a get method that returns properties.
415+ template <typename KernelName, bool isESIMD, typename KernelType>
416+ constexpr std::optional<KernelLaunchPropertiesTy>
417+ parseProperties ([[maybe_unused]] const KernelType &KernelFunc) {
442418#ifndef __SYCL_DEVICE_ONLY__
443- // If there are properties provided by get method then process them.
444- if constexpr (ext::oneapi::experimental::detail::
445- HasKernelPropertiesGetMethod< const KernelType &>::value) {
419+ // If there are properties provided by get method then process them.
420+ if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
421+ const KernelType &>::value) {
446422
447- return processKernelProperties<isESIMD>(
448- KernelFunc.get (ext::oneapi::experimental::properties_tag{}));
449- }
450- #endif
451- // If there are no properties provided by get method then return empty
452- // optional.
453- return std::nullopt ;
423+ return processKernelProperties<isESIMD>(
424+ KernelFunc.get (ext::oneapi::experimental::properties_tag{}));
454425 }
455- }; // KernelLaunchPropertyWrapper struct
426+ #endif
427+ // If there are no properties provided by get method then return empty
428+ // optional.
429+ return std::nullopt ;
430+ }
431+ } // namespace kernel_launch_properties_v1
456432
457433} // namespace detail
458434} // namespace _V1
0 commit comments