Skip to content
162 changes: 148 additions & 14 deletions sycl/include/sycl/detail/kernel_launch_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#include <sycl/detail/is_device_copyable.hpp>
#include <sycl/ext/intel/experimental/fp_control_kernel_properties.hpp>
#include <sycl/ext/intel/experimental/kernel_execution_properties.hpp>
#include <sycl/ext/oneapi/experimental/cluster_group_prop.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/experimental/use_root_sync_prop.hpp>
#include <sycl/ext/oneapi/experimental/virtual_functions.hpp>
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
#include <sycl/ext/oneapi/work_group_scratch_memory.hpp>
Expand Down Expand Up @@ -253,23 +256,154 @@ struct KernelWrapper<
}
}; // KernelWrapper struct

struct KernelLaunchPropertyWrapper {
template <typename KernelName, typename PropertyProcessor,
typename KernelType>
static void parseProperties([[maybe_unused]] PropertyProcessor h,
[[maybe_unused]] const KernelType &KernelFunc) {
#ifndef __SYCL_DEVICE_ONLY__
// If there are properties provided by get method then process them.
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<const KernelType &>::value) {
// This namespace encapsulates everything related to parsing kernel launch
// properties.
inline namespace kernel_launch_properties_v1 {

template <typename key, typename = void> struct MarshalledProperty;

// Generic implementation for runtime properties.
template <typename PropertyTy>
struct MarshalledProperty<
PropertyTy,
std::enable_if_t<!std::is_empty_v<PropertyTy> &&
std::is_same_v<PropertyTy, typename PropertyTy::key_t>>> {
std::optional<PropertyTy> property;

template <typename InputPropertyTy>
MarshalledProperty(const InputPropertyTy &Props) {
(void)Props;
if constexpr (InputPropertyTy::template has_property<PropertyTy>())
property = Props.template get_property<PropertyTy>();
}

MarshalledProperty() = default;
};

// Specialization for use_root_sync_key property.
template <>
struct MarshalledProperty<sycl::ext::oneapi::experimental::use_root_sync_key> {

bool isRootSyncPropPresent = false;

template <typename InputPropertyTy>
MarshalledProperty(const InputPropertyTy &Props) {
using namespace sycl::ext::oneapi::experimental;

h->template processProperties<
detail::CompileTimeKernelInfo<KernelName>.IsESIMD>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
(void)Props;
isRootSyncPropPresent =
InputPropertyTy::template has_property<use_root_sync_key>();
}

MarshalledProperty() = default;
};

// Specialization for work group progress property.
template <>
struct MarshalledProperty<
sycl::ext::oneapi::experimental::work_group_progress_key> {

struct ScopeForwardProgressProperty {
sycl::ext::oneapi::experimental::forward_progress_guarantee Guarantee;
sycl::ext::oneapi::experimental::execution_scope ExecScope;
sycl::ext::oneapi::experimental::execution_scope CoordinationScope;
};

// Forward progress guarantee properties for work_item, sub_group and
// work_group scopes. We need to store them for validation later.
std::array<std::optional<ScopeForwardProgressProperty>, 3>
MForwardProgressProperties;

template <typename InputPropertyTy,
class = typename std::enable_if_t<
ext::oneapi::experimental::is_property_list_v<InputPropertyTy>>>
MarshalledProperty(const InputPropertyTy &Props) {
using namespace sycl::ext::oneapi::experimental;
(void)Props;

if constexpr (InputPropertyTy::template has_property<
work_group_progress_key>()) {
auto prop = Props.template get_property<work_group_progress_key>();
MForwardProgressProperties[0] = {
prop.guarantee, execution_scope::work_group, prop.coordinationScope};
}
#endif
if constexpr (InputPropertyTy::template has_property<
sub_group_progress_key>()) {
auto prop = Props.template get_property<sub_group_progress_key>();
MForwardProgressProperties[1] = {
prop.guarantee, execution_scope::sub_group, prop.coordinationScope};
}
if constexpr (InputPropertyTy::template has_property<
work_item_progress_key>()) {
auto prop = Props.template get_property<work_item_progress_key>();
MForwardProgressProperties[2] = {
prop.guarantee, execution_scope::work_item, prop.coordinationScope};
}
}

MarshalledProperty() = default;
};

template <typename... keys> struct PropsHolder : MarshalledProperty<keys>... {

template <typename PropertiesT>
PropsHolder(PropertiesT Props) : MarshalledProperty<keys>(Props)... {}

PropsHolder() = default;
};

using KernelPropertyHolderStructTy =
PropsHolder<sycl::ext::oneapi::experimental::work_group_scratch_size,
sycl::ext::intel::experimental::cache_config_key,
sycl::ext::oneapi::experimental::use_root_sync_key,
sycl::ext::oneapi::experimental::work_group_progress_key,
sycl::ext::oneapi::experimental::cuda::cluster_size_key<1>,
sycl::ext::oneapi::experimental::cuda::cluster_size_key<2>,
sycl::ext::oneapi::experimental::cuda::cluster_size_key<3>>;

/// Note: it is important that this function *does not* depend on kernel
/// name or kernel type, because then it will be instantiated for every
/// kernel, even though body of those instantiated functions could be almost
/// the same, thus unnecessary increasing compilation time.
template <bool IsESIMDKernel = false, typename PropertiesT,
class = typename std::enable_if_t<
ext::oneapi::experimental::is_property_list_v<PropertiesT>>>
constexpr auto processKernelProperties(PropertiesT Props) {
static_assert(
!PropertiesT::template has_property<
sycl::ext::intel::experimental::fp_control_key>() ||
(PropertiesT::template has_property<
sycl::ext::intel::experimental::fp_control_key>() &&
IsESIMDKernel),
"Floating point control property is supported for ESIMD kernels only.");
static_assert(
!PropertiesT::template has_property<
sycl::ext::oneapi::experimental::indirectly_callable_key>(),
"indirectly_callable property cannot be applied to SYCL kernels");

KernelPropertyHolderStructTy prop(Props);
return prop;
}

// Returns KernelLaunchPropertiesTy or std::nullopt based on whether the
// kernel functor has a get method that returns properties.
template <typename KernelName, bool isESIMD, typename KernelType>
constexpr std::optional<KernelPropertyHolderStructTy>
parseProperties([[maybe_unused]] const KernelType &KernelFunc) {
#ifndef __SYCL_DEVICE_ONLY__
// If there are properties provided by get method then process them.
if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
const KernelType &>::value) {

return processKernelProperties<isESIMD>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
}
}; // KernelLaunchPropertyWrapper struct
#endif
// If there are no properties provided by get method then return empty
// optional.
return std::nullopt;
}
} // namespace kernel_launch_properties_v1

} // namespace detail
} // namespace _V1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct cluster_size
cluster_size<Dim>,
::sycl::ext::oneapi::experimental::detail::ClusterLaunch> {
cluster_size(const range<Dim> &size) : size(size) {}
sycl::range<Dim> get_cluster_size() { return size; }
sycl::range<Dim> get_cluster_size() const { return size; }

private:
range<Dim> size;
Expand Down
38 changes: 23 additions & 15 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,17 +271,13 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
typename KernelType, typename... ReductionsT>
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
ReductionsT &&...Reductions) {
// TODO The handler-less path does not support reductions, kernel
// function properties and kernel functions with the kernel_handler
// type argument yet.
// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(ReductionsT) == 0 &&
!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {
detail::submit_kernel_direct_parallel_for<KernelName>(
std::move(Q), empty_properties_t{}, Range, KernelObj);
detail::submit_kernel_direct_parallel_for<KernelName>(std::move(Q), Range,
KernelObj);
} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Range, KernelObj,
Expand All @@ -308,13 +304,25 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
typename Properties, typename KernelType, typename... ReductionsT>
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
const KernelType &KernelObj, ReductionsT &&...Reductions) {
// TODO This overload of the nd_launch function takes the kernel function
// properties, which are not yet supported for the handler-less path,
// so it only supports handler based submission for now
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Config, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(ReductionsT) == 0 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {

ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
LaunchConfigAccess(Config);

detail::submit_kernel_direct_parallel_for<KernelName>(
std::move(Q), LaunchConfigAccess.getRange(), KernelObj,
LaunchConfigAccess.getProperties());
} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Config, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
}
}

template <int Dimensions, typename... ArgsT>
Expand Down
Loading
Loading