Skip to content

Commit 926830a

Browse files
committed
Address feedback
1 parent 6f12dff commit 926830a

File tree

9 files changed

+247
-268
lines changed

9 files changed

+247
-268
lines changed

sycl/include/sycl/detail/kernel_launch_helper.hpp

Lines changed: 153 additions & 177 deletions
Original file line numberDiff line numberDiff line change
@@ -256,203 +256,179 @@ struct KernelWrapper<
256256
}
257257
}; // KernelWrapper struct
258258

259-
// This class encapsulates everything related to parsing kernel launch
259+
// This namespace encapsulates everything related to parsing kernel launch
260260
// properties.
261-
class KernelLaunchPropertyWrapper {
262-
public:
263-
// This struct is used to store kernel launch properties.
264-
// std::optional is used to indicate that the property is not set.
265-
// In some code paths, kernel launch properties are set multiple times
266-
// for the same kernel, that is why using std::optional to avoid overriding
267-
// previously set properties.
268-
// This struct is used to pass kernel launch properties across the ABI
269-
// boundary.
270-
struct KernelLaunchPropertiesT {
271-
struct ScopeForwardProgressProperty {
272-
std::optional<sycl::ext::oneapi::experimental::forward_progress_guarantee>
273-
Guarantee;
274-
std::optional<sycl::ext::oneapi::experimental::execution_scope> ExecScope;
275-
std::optional<sycl::ext::oneapi::experimental::execution_scope>
276-
CoordinationScope;
277-
};
278-
279-
std::optional<ur_kernel_cache_config_t> MCacheConfig = std::nullopt;
280-
std::optional<bool> MIsCooperative = std::nullopt;
281-
std::optional<uint32_t> MWorkGroupMemorySize = std::nullopt;
282-
std::optional<bool> MUsesClusterLaunch = std::nullopt;
283-
size_t MClusterDims = 0;
284-
std::array<size_t, 3> MClusterSize = {0, 0, 0};
261+
inline namespace kernel_launch_properties_v1 {
262+
263+
// This struct is used to store kernel launch properties.
264+
// std::optional is used to indicate that the property is not set.
265+
// This struct is used to pass kernel launch properties across the ABI
266+
// boundary.
267+
struct KernelLaunchPropertiesTy {
268+
// Modeled after ur_kernel_cache_config_t
269+
enum class StableKernelCacheConfig : int32_t {
270+
Default = 0,
271+
LargeSLM = 1,
272+
LargeData = 2
273+
};
285274

286-
// Forward progress guarantee properties for work_item, sub_group and
287-
// work_group scopes. We need to store them for validation later.
288-
std::array<ScopeForwardProgressProperty, 3> MForwardProgressProperties;
275+
struct ScopeForwardProgressProperty {
276+
std::optional<sycl::ext::oneapi::experimental::forward_progress_guarantee>
277+
Guarantee;
278+
std::optional<sycl::ext::oneapi::experimental::execution_scope> ExecScope;
279+
std::optional<sycl::ext::oneapi::experimental::execution_scope>
280+
CoordinationScope;
289281
};
290282

291-
/// Process runtime kernel properties.
292-
template <typename PropertiesT>
293-
static KernelLaunchPropertiesT
294-
processKernelLaunchProperties(PropertiesT Props) {
295-
using namespace sycl::ext::oneapi::experimental;
296-
using namespace sycl::ext::oneapi::experimental::detail;
297-
KernelLaunchPropertiesT retval;
298-
299-
// Process Kernel cache configuration property.
300-
{
301-
if constexpr (PropertiesT::template has_property<
302-
sycl::ext::intel::experimental::cache_config_key>()) {
303-
auto Config = Props.template get_property<
304-
sycl::ext::intel::experimental::cache_config_key>();
305-
if (Config == sycl::ext::intel::experimental::large_slm) {
306-
retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM;
307-
} else if (Config == sycl::ext::intel::experimental::large_data) {
308-
retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA;
309-
}
310-
} else {
311-
std::ignore = Props;
312-
}
313-
}
283+
std::optional<StableKernelCacheConfig> MCacheConfig = std::nullopt;
284+
std::optional<bool> MIsCooperative = std::nullopt;
285+
std::optional<uint32_t> MWorkGroupMemorySize = std::nullopt;
286+
std::optional<bool> MUsesClusterLaunch = std::nullopt;
287+
size_t MClusterDims = 0;
288+
std::array<size_t, 3> MClusterSize = {0, 0, 0};
314289

315-
// Process Kernel cooperative property.
316-
{
317-
if constexpr (PropertiesT::template has_property<use_root_sync_key>())
318-
retval.MIsCooperative = true;
319-
}
290+
// Forward progress guarantee properties for work_item, sub_group and
291+
// work_group scopes. We need to store them for validation later.
292+
std::array<ScopeForwardProgressProperty, 3> MForwardProgressProperties;
293+
};
320294

321-
// Process device progress properties.
322-
{
323-
using forward_progress =
324-
sycl::ext::oneapi::experimental::forward_progress_guarantee;
325-
if constexpr (PropertiesT::template has_property<
326-
work_group_progress_key>()) {
327-
auto prop = Props.template get_property<work_group_progress_key>();
328-
retval.MForwardProgressProperties[0].Guarantee = prop.guarantee;
329-
retval.MForwardProgressProperties[0].ExecScope =
330-
execution_scope::work_group;
331-
retval.MForwardProgressProperties[0].CoordinationScope =
332-
prop.coordinationScope;
333-
334-
// If we are here, the device supports the guarantee required but there
335-
// is a caveat in that if the guarantee required is a concurrent
336-
// guarantee, then we most likely also need to enable cooperative launch
337-
// of the kernel. That is, although the device supports the required
338-
// guarantee, some setup work is needed to truly make the device provide
339-
// that guarantee at runtime. Otherwise, we will get the default
340-
// guarantee which is weaker than concurrent. Same reasoning applies for
341-
// sub_group but not for work_item.
342-
// TODO: Further design work is probably needed to reflect this behavior
343-
// in Unified Runtime.
344-
if constexpr (prop.guarantee == forward_progress::concurrent)
345-
retval.MIsCooperative = true;
346-
}
347-
if constexpr (PropertiesT::template has_property<
348-
sub_group_progress_key>()) {
349-
auto prop = Props.template get_property<sub_group_progress_key>();
350-
retval.MForwardProgressProperties[1].Guarantee = prop.guarantee;
351-
retval.MForwardProgressProperties[1].ExecScope =
352-
execution_scope::sub_group;
353-
retval.MForwardProgressProperties[1].CoordinationScope =
354-
prop.coordinationScope;
355-
356-
// Same reasoning as above for work_group applies here.
357-
if constexpr (prop.guarantee == forward_progress::concurrent)
358-
retval.MIsCooperative = true;
359-
}
360-
if constexpr (PropertiesT::template has_property<
361-
work_item_progress_key>()) {
362-
auto prop = Props.template get_property<work_item_progress_key>();
363-
retval.MForwardProgressProperties[2].Guarantee = prop.guarantee;
364-
retval.MForwardProgressProperties[2].ExecScope =
365-
execution_scope::work_item;
366-
retval.MForwardProgressProperties[2].CoordinationScope =
367-
prop.coordinationScope;
295+
template <typename PropertiesT>
296+
constexpr KernelLaunchPropertiesTy
297+
processKernelLaunchProperties(PropertiesT Props) {
298+
using namespace sycl::ext::oneapi::experimental;
299+
using namespace sycl::ext::oneapi::experimental::detail;
300+
KernelLaunchPropertiesTy retval;
301+
302+
// Process Kernel cache configuration property.
303+
{
304+
if constexpr (PropertiesT::template has_property<
305+
sycl::ext::intel::experimental::cache_config_key>()) {
306+
auto Config = Props.template get_property<
307+
sycl::ext::intel::experimental::cache_config_key>();
308+
if (Config == sycl::ext::intel::experimental::large_slm) {
309+
retval.MCacheConfig =
310+
KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeSLM;
311+
} else if (Config == sycl::ext::intel::experimental::large_data) {
312+
retval.MCacheConfig =
313+
KernelLaunchPropertiesTy::StableKernelCacheConfig::LargeData;
368314
}
315+
} else {
316+
std::ignore = Props;
369317
}
318+
}
370319

371-
// Process work group scratch memory property.
372-
{
373-
if constexpr (PropertiesT::template has_property<
374-
work_group_scratch_size>()) {
375-
auto WorkGroupMemSize =
376-
Props.template get_property<work_group_scratch_size>();
377-
retval.MWorkGroupMemorySize = WorkGroupMemSize.size;
378-
}
379-
}
320+
// Process Kernel cooperative property.
321+
{
322+
if constexpr (PropertiesT::template has_property<use_root_sync_key>())
323+
retval.MIsCooperative = true;
324+
}
380325

381-
// Parse cluster properties.
382-
{
383-
constexpr std::size_t ClusterDim = getClusterDim<PropertiesT>();
384-
if constexpr (ClusterDim > 0) {
385-
386-
auto ClusterSize =
387-
Props.template get_property<cuda::cluster_size_key<ClusterDim>>()
388-
.get_cluster_size();
389-
retval.MUsesClusterLaunch = true;
390-
retval.MClusterDims = ClusterDim;
391-
if (ClusterDim == 1) {
392-
retval.MClusterSize[0] = ClusterSize[0];
393-
} else if (ClusterDim == 2) {
394-
retval.MClusterSize[0] = ClusterSize[0];
395-
retval.MClusterSize[1] = ClusterSize[1];
396-
} else if (ClusterDim == 3) {
397-
retval.MClusterSize[0] = ClusterSize[0];
398-
retval.MClusterSize[1] = ClusterSize[1];
399-
retval.MClusterSize[2] = ClusterSize[2];
400-
} else {
401-
assert(ClusterDim <= 3 &&
402-
"Only 1D, 2D, and 3D cluster launch is supported.");
403-
}
404-
}
326+
// Process device progress properties.
327+
{
328+
if constexpr (PropertiesT::template has_property<
329+
work_group_progress_key>()) {
330+
auto prop = Props.template get_property<work_group_progress_key>();
331+
retval.MForwardProgressProperties[0].Guarantee = prop.guarantee;
332+
retval.MForwardProgressProperties[0].ExecScope =
333+
execution_scope::work_group;
334+
retval.MForwardProgressProperties[0].CoordinationScope =
335+
prop.coordinationScope;
405336
}
337+
if constexpr (PropertiesT::template has_property<
338+
sub_group_progress_key>()) {
339+
auto prop = Props.template get_property<sub_group_progress_key>();
340+
retval.MForwardProgressProperties[1].Guarantee = prop.guarantee;
341+
retval.MForwardProgressProperties[1].ExecScope =
342+
execution_scope::sub_group;
343+
retval.MForwardProgressProperties[1].CoordinationScope =
344+
prop.coordinationScope;
345+
}
346+
if constexpr (PropertiesT::template has_property<
347+
work_item_progress_key>()) {
348+
auto prop = Props.template get_property<work_item_progress_key>();
349+
retval.MForwardProgressProperties[2].Guarantee = prop.guarantee;
350+
retval.MForwardProgressProperties[2].ExecScope =
351+
execution_scope::work_item;
352+
retval.MForwardProgressProperties[2].CoordinationScope =
353+
prop.coordinationScope;
354+
}
355+
}
406356

407-
return retval;
357+
// Process work group scratch memory property.
358+
{
359+
if constexpr (PropertiesT::template has_property<
360+
work_group_scratch_size>()) {
361+
auto WorkGroupMemSize =
362+
Props.template get_property<work_group_scratch_size>();
363+
retval.MWorkGroupMemorySize = WorkGroupMemSize.size;
364+
}
408365
}
409366

410-
/// Process kernel properties.
411-
/// Note: it is important that this function *does not* depend on kernel
412-
/// name or kernel type, because then it will be instantiated for every
413-
/// kernel, even though body of those instantiated functions could be almost
414-
/// the same, thus unnecessary increasing compilation time.
415-
template <
416-
bool IsESIMDKernel,
417-
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
418-
static KernelLaunchPropertiesT processKernelProperties(PropertiesT Props) {
419-
static_assert(
420-
ext::oneapi::experimental::is_property_list<PropertiesT>::value,
421-
"Template type is not a property list.");
422-
static_assert(
423-
!PropertiesT::template has_property<
424-
sycl::ext::intel::experimental::fp_control_key>() ||
425-
(PropertiesT::template has_property<
426-
sycl::ext::intel::experimental::fp_control_key>() &&
427-
IsESIMDKernel),
428-
"Floating point control property is supported for ESIMD kernels only.");
429-
static_assert(
430-
!PropertiesT::template has_property<
431-
sycl::ext::oneapi::experimental::indirectly_callable_key>(),
432-
"indirectly_callable property cannot be applied to SYCL kernels");
367+
// Parse cluster properties.
368+
{
369+
constexpr std::size_t ClusterDim = getClusterDim<PropertiesT>();
370+
if constexpr (ClusterDim > 0) {
371+
static_assert(ClusterDim <= 3,
372+
"Only 1D, 2D, and 3D cluster launch is supported.");
373+
374+
auto ClusterSize =
375+
Props.template get_property<cuda::cluster_size_key<ClusterDim>>()
376+
.get_cluster_size();
433377

434-
return processKernelLaunchProperties(Props);
378+
retval.MUsesClusterLaunch = true;
379+
retval.MClusterDims = ClusterDim;
380+
381+
for (size_t dim = 0; dim < ClusterDim; dim++)
382+
retval.MClusterSize[dim] = ClusterSize[dim];
383+
}
435384
}
436385

437-
// Returns KernelLaunchPropertiesT or std::nullopt based on whether the
438-
// kernel functor has a get method that returns properties.
439-
template <typename KernelName, bool isESIMD, typename KernelType>
440-
static std::optional<KernelLaunchPropertiesT>
441-
parseProperties([[maybe_unused]] const KernelType &KernelFunc) {
386+
return retval;
387+
}
388+
389+
/// Note: it is important that this function *does not* depend on kernel
390+
/// name or kernel type, because then it will be instantiated for every
391+
/// kernel, even though body of those instantiated functions could be almost
392+
/// the same, thus unnecessary increasing compilation time.
393+
template <bool IsESIMDKernel,
394+
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
395+
constexpr KernelLaunchPropertiesTy processKernelProperties(PropertiesT Props) {
396+
static_assert(ext::oneapi::experimental::is_property_list<PropertiesT>::value,
397+
"Template type is not a property list.");
398+
static_assert(
399+
!PropertiesT::template has_property<
400+
sycl::ext::intel::experimental::fp_control_key>() ||
401+
(PropertiesT::template has_property<
402+
sycl::ext::intel::experimental::fp_control_key>() &&
403+
IsESIMDKernel),
404+
"Floating point control property is supported for ESIMD kernels only.");
405+
static_assert(
406+
!PropertiesT::template has_property<
407+
sycl::ext::oneapi::experimental::indirectly_callable_key>(),
408+
"indirectly_callable property cannot be applied to SYCL kernels");
409+
410+
return processKernelLaunchProperties(Props);
411+
}
412+
413+
// Returns KernelLaunchPropertiesTy or std::nullopt based on whether the
414+
// kernel functor has a get method that returns properties.
415+
template <typename KernelName, bool isESIMD, typename KernelType>
416+
constexpr std::optional<KernelLaunchPropertiesTy>
417+
parseProperties([[maybe_unused]] const KernelType &KernelFunc) {
442418
#ifndef __SYCL_DEVICE_ONLY__
443-
// If there are properties provided by get method then process them.
444-
if constexpr (ext::oneapi::experimental::detail::
445-
HasKernelPropertiesGetMethod<const KernelType &>::value) {
419+
// If there are properties provided by get method then process them.
420+
if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
421+
const KernelType &>::value) {
446422

447-
return processKernelProperties<isESIMD>(
448-
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
449-
}
450-
#endif
451-
// If there are no properties provided by get method then return empty
452-
// optional.
453-
return std::nullopt;
423+
return processKernelProperties<isESIMD>(
424+
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
454425
}
455-
}; // KernelLaunchPropertyWrapper struct
426+
#endif
427+
// If there are no properties provided by get method then return empty
428+
// optional.
429+
return std::nullopt;
430+
}
431+
} // namespace kernel_launch_properties_v1
456432

457433
} // namespace detail
458434
} // namespace _V1

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,11 +299,11 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
299299

300300
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
301301
Properties>
302-
ConfigAccess(Config);
302+
LaunchConfigAccess(Config);
303303

304-
detail::submit_kernel_direct<KernelName>(std::move(Q),
305-
ConfigAccess.getRange(), KernelObj,
306-
Config.getProperties());
304+
detail::submit_kernel_direct<KernelName>(
305+
std::move(Q), LaunchConfigAccess.getRange(), KernelObj,
306+
LaunchConfigAccess.getProperties());
307307
} else {
308308
submit(std::move(Q), [&](handler &CGH) {
309309
nd_launch<KernelName>(CGH, Config, KernelObj,

0 commit comments

Comments
 (0)