@@ -260,17 +260,49 @@ struct KernelWrapper<
260260// properties.
261261inline namespace kernel_launch_properties_v1 {
262262
263- // This struct is used to store kernel launch properties.
264- // std::optional is used to indicate that the property is not set.
265- // This struct is used to pass kernel launch properties across the ABI
266- // boundary.
267- struct KernelLaunchPropertiesTy {
268- // Modeled after ur_kernel_cache_config_t
269- enum class StableKernelCacheConfig : int32_t {
270- Default = 0 ,
271- LargeSLM = 1 ,
272- LargeData = 2
273- };
263+ template <typename key, typename = void > struct MarshalledProperty ;
264+
265+ // Generic implementation for runtime properties.
266+ template <typename PropertyTy>
267+ struct MarshalledProperty <PropertyTy,
268+ std::enable_if_t <!std::is_empty_v<PropertyTy>>> {
269+ std::optional<PropertyTy> property;
270+
271+ template <typename InputPropertyTy>
272+ MarshalledProperty ([[maybe_unused]] const InputPropertyTy &InputProperties) {
273+ if constexpr (ext::oneapi::experimental::is_property_list_v<
274+ InputPropertyTy>)
275+ if constexpr (InputPropertyTy::template has_property<PropertyTy>()) {
276+ std::cout << " Got property: " << typeid (PropertyTy).name () << " \n " ;
277+ property = InputProperties.template get_property <PropertyTy>();
278+ }
279+ }
280+
281+ MarshalledProperty () = default ;
282+ };
283+
284+ // Specialization for use_root_sync_key property.
285+ template <>
286+ struct MarshalledProperty <sycl::ext::oneapi::experimental::use_root_sync_key> {
287+
288+ bool isRootSyncPropPresent = false ;
289+
290+ template <typename InputPropertyTy>
291+ MarshalledProperty ([[maybe_unused]] const InputPropertyTy &Props) {
292+ using namespace sycl ::ext::oneapi::experimental;
293+ if constexpr (ext::oneapi::experimental::is_property_list_v<
294+ InputPropertyTy>)
295+ if constexpr (InputPropertyTy::template has_property<use_root_sync_key>())
296+ isRootSyncPropPresent = true ;
297+ }
298+
299+ MarshalledProperty () = default ;
300+ };
301+
302+ // Specialization for work group progress property.
303+ template <>
304+ struct MarshalledProperty <
305+ sycl::ext::oneapi::experimental::work_group_progress_key> {
274306
275307 struct ScopeForwardProgressProperty {
276308 std::optional<sycl::ext::oneapi::experimental::forward_progress_guarantee>
@@ -280,110 +312,69 @@ struct KernelLaunchPropertiesTy {
280312 CoordinationScope;
281313 };
282314
283- std::optional<StableKernelCacheConfig> MCacheConfig = std::nullopt ;
284- std::optional<bool > MIsCooperative = std::nullopt ;
285- std::optional<uint32_t > MWorkGroupMemorySize = std::nullopt ;
286- std::optional<bool > MUsesClusterLaunch = std::nullopt ;
287- size_t MClusterDims = 0 ;
288- std::array<size_t , 3 > MClusterSize = {0 , 0 , 0 };
289-
290315 // Forward progress guarantee properties for work_item, sub_group and
291316 // work_group scopes. We need to store them for validation later.
292317 std::array<ScopeForwardProgressProperty, 3 > MForwardProgressProperties;
293- };
294318
295- template <typename PropertiesT>
296- constexpr KernelLaunchPropertiesTy
297- processKernelLaunchProperties (PropertiesT Props) {
298- using namespace sycl ::ext::oneapi::experimental;
299- using namespace sycl ::ext::oneapi::experimental::detail;
300- KernelLaunchPropertiesTy retval;
301-
302- // Process Kernel cache configuration property.
303- {
304- if constexpr (PropertiesT::template has_property<
305- sycl::ext::intel::experimental::cache_config_key>()) {
306- auto Config = Props.template get_property <
307- sycl::ext::intel::experimental::cache_config_key>();
308- if (Config == sycl::ext::intel::experimental::large_slm) {
309- retval.MCacheConfig =
310- KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeSLM;
311- } else if (Config == sycl::ext::intel::experimental::large_data) {
312- retval.MCacheConfig =
313- KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeData;
319+ template <typename InputPropertyTy,
320+ class = typename std::enable_if_t <
321+ ext::oneapi::experimental::is_property_list_v<InputPropertyTy>>>
322+ MarshalledProperty ([[maybe_unused]] const InputPropertyTy &Props) {
323+ using namespace sycl ::ext::oneapi::experimental;
324+
325+ if constexpr (ext::oneapi::experimental::is_property_list_v<
326+ InputPropertyTy>) {
327+ if constexpr (InputPropertyTy::template has_property<
328+ work_group_progress_key>()) {
329+ auto prop = Props.template get_property <work_group_progress_key>();
330+ MForwardProgressProperties[0 ].Guarantee = prop.guarantee ;
331+ MForwardProgressProperties[0 ].ExecScope = execution_scope::work_group;
332+ MForwardProgressProperties[0 ].CoordinationScope =
333+ prop.coordinationScope ;
334+ }
335+ if constexpr (InputPropertyTy::template has_property<
336+ sub_group_progress_key>()) {
337+ auto prop = Props.template get_property <sub_group_progress_key>();
338+ MForwardProgressProperties[1 ].Guarantee = prop.guarantee ;
339+ MForwardProgressProperties[1 ].ExecScope = execution_scope::sub_group;
340+ MForwardProgressProperties[1 ].CoordinationScope =
341+ prop.coordinationScope ;
342+ }
343+ if constexpr (InputPropertyTy::template has_property<
344+ work_item_progress_key>()) {
345+ auto prop = Props.template get_property <work_item_progress_key>();
346+ MForwardProgressProperties[2 ].Guarantee = prop.guarantee ;
347+ MForwardProgressProperties[2 ].ExecScope = execution_scope::work_item;
348+ MForwardProgressProperties[2 ].CoordinationScope =
349+ prop.coordinationScope ;
314350 }
315- } else {
316- std::ignore = Props;
317- }
318- }
319-
320- // Process Kernel cooperative property.
321- {
322- if constexpr (PropertiesT::template has_property<use_root_sync_key>())
323- retval.MIsCooperative = true ;
324- }
325-
326- // Process device progress properties.
327- {
328- if constexpr (PropertiesT::template has_property<
329- work_group_progress_key>()) {
330- auto prop = Props.template get_property <work_group_progress_key>();
331- retval.MForwardProgressProperties [0 ].Guarantee = prop.guarantee ;
332- retval.MForwardProgressProperties [0 ].ExecScope =
333- execution_scope::work_group;
334- retval.MForwardProgressProperties [0 ].CoordinationScope =
335- prop.coordinationScope ;
336- }
337- if constexpr (PropertiesT::template has_property<
338- sub_group_progress_key>()) {
339- auto prop = Props.template get_property <sub_group_progress_key>();
340- retval.MForwardProgressProperties [1 ].Guarantee = prop.guarantee ;
341- retval.MForwardProgressProperties [1 ].ExecScope =
342- execution_scope::sub_group;
343- retval.MForwardProgressProperties [1 ].CoordinationScope =
344- prop.coordinationScope ;
345- }
346- if constexpr (PropertiesT::template has_property<
347- work_item_progress_key>()) {
348- auto prop = Props.template get_property <work_item_progress_key>();
349- retval.MForwardProgressProperties [2 ].Guarantee = prop.guarantee ;
350- retval.MForwardProgressProperties [2 ].ExecScope =
351- execution_scope::work_item;
352- retval.MForwardProgressProperties [2 ].CoordinationScope =
353- prop.coordinationScope ;
354351 }
355352 }
356353
357- // Process work group scratch memory property.
358- {
359- if constexpr (PropertiesT::template has_property<
360- work_group_scratch_size>()) {
361- auto WorkGroupMemSize =
362- Props.template get_property <work_group_scratch_size>();
363- retval.MWorkGroupMemorySize = WorkGroupMemSize.size ;
364- }
365- }
354+ MarshalledProperty () = default ;
355+ };
366356
367- // Parse cluster properties.
368- {
369- constexpr std::size_t ClusterDim = getClusterDim<PropertiesT>();
370- if constexpr (ClusterDim > 0 ) {
371- static_assert (ClusterDim <= 3 ,
372- " Only 1D, 2D, and 3D cluster launch is supported." );
357+ template <typename ... keys> struct PropsHolder : MarshalledProperty<keys>... {
373358
374- auto ClusterSize =
375- Props.template get_property <cuda::cluster_size_key<ClusterDim>>()
376- .get_cluster_size ();
359+ template <typename PropertiesT>
360+ PropsHolder (PropertiesT Props) : MarshalledProperty<keys>(Props)... {}
377361
378- retval. MUsesClusterLaunch = true ;
379- retval. MClusterDims = ClusterDim ;
362+ PropsHolder () = default ;
363+ } ;
380364
381- for (size_t dim = 0 ; dim < ClusterDim; dim++)
382- retval.MClusterSize [dim] = ClusterSize[dim];
383- }
384- }
365+ using KernelPropertyHolderStructTy =
366+ PropsHolder<sycl::ext::oneapi::experimental::work_group_scratch_size,
367+ sycl::ext::intel::experimental::cache_config_key,
368+ sycl::ext::oneapi::experimental::use_root_sync_key,
369+ sycl::ext::oneapi::experimental::work_group_progress_key,
370+ sycl::ext::oneapi::experimental::cuda::cluster_size_key<1 >,
371+ sycl::ext::oneapi::experimental::cuda::cluster_size_key<2 >,
372+ sycl::ext::oneapi::experimental::cuda::cluster_size_key<3 >>;
385373
386- return retval;
374+ template <typename PropertiesT>
375+ constexpr auto processKernelLaunchProperties (PropertiesT Props) {
376+ KernelPropertyHolderStructTy prop (Props);
377+ return prop;
387378}
388379
389380// / Note: it is important that this function *does not* depend on kernel
@@ -392,7 +383,7 @@ processKernelLaunchProperties(PropertiesT Props) {
392383// / the same, thus unnecessary increasing compilation time.
393384template <bool IsESIMDKernel,
394385 typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
395- constexpr KernelLaunchPropertiesTy processKernelProperties (PropertiesT Props) {
386+ constexpr auto processKernelProperties (PropertiesT Props) {
396387 static_assert (ext::oneapi::experimental::is_property_list<PropertiesT>::value,
397388 " Template type is not a property list." );
398389 static_assert (
@@ -413,7 +404,7 @@ constexpr KernelLaunchPropertiesTy processKernelProperties(PropertiesT Props) {
413404// Returns KernelLaunchPropertiesTy or std::nullopt based on whether the
414405// kernel functor has a get method that returns properties.
415406template <typename KernelName, bool isESIMD, typename KernelType>
416- constexpr std::optional<KernelLaunchPropertiesTy >
407+ constexpr std::optional<KernelPropertyHolderStructTy >
417408parseProperties ([[maybe_unused]] const KernelType &KernelFunc) {
418409#ifndef __SYCL_DEVICE_ONLY__
419410 // If there are properties provided by get method then process them.
0 commit comments