Skip to content

Commit

Permalink
[SYCL] Implement device_has kernel property and macro (intel#7159)
Browse files Browse the repository at this point in the history
This commit implements the `device_has` kernel property and the
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY macro from the

[sycl_ext_oneapi_kernel_properties](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc)
extension.

Known current limitations:
- The LLVM IR attributes from add_ir_attributes_function are not
correctly generated on SYCL_EXTERNAL functions.
- The SYCL_EXT_ONEAPI_FUNCTION_PROPERTY cannot currently be placed after
SYCL_EXTERNAL.

Signed-off-by: Larsen, Steffen <[email protected]>
  • Loading branch information
steffenlarsen authored Oct 26, 2022
1 parent efa7b0d commit 430c722
Show file tree
Hide file tree
Showing 7 changed files with 453 additions and 19 deletions.
49 changes: 49 additions & 0 deletions sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#pragma once

#include <sycl/aspects.hpp>
#include <sycl/ext/oneapi/properties/property.hpp>
#include <sycl/ext/oneapi/properties/property_value.hpp>

Expand Down Expand Up @@ -53,9 +54,18 @@ struct SizeListToStrHelper<SizeList<0, Values...>, CharList<ParsedChars...>,
Chars...>
: SizeListToStrHelper<SizeList<Values...>,
CharList<ParsedChars..., Chars..., ','>> {};
template <size_t... Values, char... ParsedChars>
struct SizeListToStrHelper<SizeList<0, Values...>, CharList<ParsedChars...>>
: SizeListToStrHelper<SizeList<Values...>,
CharList<ParsedChars..., '0', ','>> {};
template <char... ParsedChars, char... Chars>
struct SizeListToStrHelper<SizeList<0>, CharList<ParsedChars...>, Chars...>
: CharsToStr<ParsedChars..., Chars...> {};
template <char... ParsedChars>
struct SizeListToStrHelper<SizeList<0>, CharList<ParsedChars...>>
: CharsToStr<ParsedChars..., '0'> {};
template <>
struct SizeListToStrHelper<SizeList<>, CharList<>> : CharsToStr<> {};

// Converts size_t values to a comma-separated string representation.
template <size_t... Sizes>
Expand All @@ -82,6 +92,12 @@ struct sub_group_size_key {
std::integral_constant<uint32_t, Size>>;
};

struct device_has_key {
template <aspect... Aspects>
using value_t = property_value<device_has_key,
std::integral_constant<aspect, Aspects>...>;
};

template <size_t Dim0, size_t... Dims>
struct property_value<work_group_size_key, std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dims>...> {
Expand Down Expand Up @@ -127,6 +143,13 @@ struct property_value<sub_group_size_key,
static constexpr uint32_t value = Size;
};

template <aspect... Aspects>
struct property_value<device_has_key,
std::integral_constant<aspect, Aspects>...> {
using key_t = device_has_key;
static constexpr std::array<aspect, sizeof...(Aspects)> value{Aspects...};
};

template <size_t Dim0, size_t... Dims>
inline constexpr work_group_size_key::value_t<Dim0, Dims...> work_group_size;

Expand All @@ -137,10 +160,14 @@ inline constexpr work_group_size_hint_key::value_t<Dim0, Dims...>
template <uint32_t Size>
inline constexpr sub_group_size_key::value_t<Size> sub_group_size;

template <aspect... Aspects>
inline constexpr device_has_key::value_t<Aspects...> device_has;

template <> struct is_property_key<work_group_size_key> : std::true_type {};
template <>
struct is_property_key<work_group_size_hint_key> : std::true_type {};
template <> struct is_property_key<sub_group_size_key> : std::true_type {};
template <> struct is_property_key<device_has_key> : std::true_type {};

namespace detail {
template <> struct PropertyToKind<work_group_size_key> {
Expand All @@ -152,13 +179,17 @@ template <> struct PropertyToKind<work_group_size_hint_key> {
template <> struct PropertyToKind<sub_group_size_key> {
static constexpr PropKind Kind = PropKind::SubGroupSize;
};
template <> struct PropertyToKind<device_has_key> {
static constexpr PropKind Kind = PropKind::DeviceHas;
};

template <>
struct IsCompileTimeProperty<work_group_size_key> : std::true_type {};
template <>
struct IsCompileTimeProperty<work_group_size_hint_key> : std::true_type {};
template <>
struct IsCompileTimeProperty<sub_group_size_key> : std::true_type {};
template <> struct IsCompileTimeProperty<device_has_key> : std::true_type {};

template <size_t Dim0, size_t... Dims>
struct PropertyMetaInfo<work_group_size_key::value_t<Dim0, Dims...>> {
Expand All @@ -175,6 +206,12 @@ struct PropertyMetaInfo<sub_group_size_key::value_t<Size>> {
static constexpr const char *name = "sycl-sub-group-size";
static constexpr uint32_t value = Size;
};
template <aspect... Aspects>
struct PropertyMetaInfo<device_has_key::value_t<Aspects...>> {
static constexpr const char *name = "sycl-device-has";
static constexpr const char *value =
SizeListToStr<static_cast<size_t>(Aspects)...>::value;
};

template <typename T, typename = void>
struct HasKernelPropertiesGetMethod : std::false_type {};
Expand All @@ -193,3 +230,15 @@ struct HasKernelPropertiesGetMethod<
} // namespace ext
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl

#ifdef __SYCL_DEVICE_ONLY__
#define SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(PROP) \
[[__sycl_detail__::add_ir_attributes_function( \
{"sycl-device-has"}, \
sycl::ext::oneapi::experimental::detail::PropertyMetaInfo< \
std::remove_cv_t<std::remove_reference_t<decltype(PROP)>>>::name, \
sycl::ext::oneapi::experimental::detail::PropertyMetaInfo< \
std::remove_cv_t<std::remove_reference_t<decltype(PROP)>>>::value)]]
#else
#define SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(PROP)
#endif
3 changes: 2 additions & 1 deletion sycl/include/sycl/ext/oneapi/properties/property.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,9 @@ enum PropKind : uint32_t {
WorkGroupSize = 6,
WorkGroupSizeHint = 7,
SubGroupSize = 8,
DeviceHas = 9,
// PropKindSize must always be the last value.
PropKindSize = 9,
PropKindSize = 10,
};

// This trait must be specialized for all properties and must have a unique
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/sycl/ext/oneapi/properties/property_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace oneapi {
namespace experimental {

// Forward declaration
template <typename PropertyT, typename T, typename... Ts> struct property_value;
template <typename PropertyT, typename... Ts> struct property_value;

namespace detail {

Expand Down
27 changes: 10 additions & 17 deletions sycl/include/sycl/ext/oneapi/properties/property_value.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,26 @@ namespace oneapi {
namespace experimental {
namespace detail {

// Base class for property values with a single type value.
struct SingleTypePropertyValueBase {};

// Base class for properties with 0 or more than 1 values.
struct EmptyPropertyValueBase {};

// Base class for property values with a single non-type value
template <typename T> struct SingleNontypePropertyValueBase {
template <typename T, typename = void> struct SingleNontypePropertyValueBase {};

template <typename T>
struct SingleNontypePropertyValueBase<T, std::enable_if_t<HasValue<T>::value>> {
static constexpr auto value = T::value;
};

// Helper class for property values with a single value
// Helper base class for property_value.
template <typename... Ts> struct PropertyValueBase {};

template <typename T>
struct SinglePropertyValue
: public sycl::detail::conditional_t<HasValue<T>::value,
SingleNontypePropertyValueBase<T>,
SingleTypePropertyValueBase> {
struct PropertyValueBase<T> : public detail::SingleNontypePropertyValueBase<T> {
using value_t = T;
};

} // namespace detail

template <typename PropertyT, typename T = void, typename... Ts>
struct property_value
: public sycl::detail::conditional_t<
sizeof...(Ts) == 0 && !std::is_same<T, void>::value,
detail::SinglePropertyValue<T>, detail::EmptyPropertyValueBase> {
template <typename PropertyT, typename... Ts>
struct property_value : public detail::PropertyValueBase<Ts...> {
using key_t = PropertyT;
};

Expand Down
123 changes: 123 additions & 0 deletions sycl/test/extensions/properties/properties_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,44 @@

#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::oneapi::experimental;

using device_has_all =
decltype(device_has<
aspect::host, aspect::cpu, aspect::gpu, aspect::accelerator,
aspect::custom, aspect::fp16, aspect::fp64, aspect::image,
aspect::online_compiler, aspect::online_linker,
aspect::queue_profiling, aspect::usm_device_allocations,
aspect::usm_host_allocations, aspect::usm_shared_allocations,
aspect::usm_restricted_shared_allocations,
aspect::usm_system_allocations, aspect::ext_intel_pci_address,
aspect::ext_intel_gpu_eu_count,
aspect::ext_intel_gpu_eu_simd_width, aspect::ext_intel_gpu_slices,
aspect::ext_intel_gpu_subslices_per_slice,
aspect::ext_intel_gpu_eu_count_per_subslice,
aspect::ext_intel_max_mem_bandwidth, aspect::ext_intel_mem_channel,
aspect::usm_atomic_host_allocations,
aspect::usm_atomic_shared_allocations, aspect::atomic64,
aspect::ext_intel_device_info_uuid, aspect::ext_oneapi_srgb,
aspect::ext_oneapi_native_assert, aspect::host_debuggable,
aspect::ext_intel_gpu_hw_threads_per_eu,
aspect::ext_oneapi_cuda_async_barrier, aspect::ext_oneapi_bfloat16,
aspect::ext_intel_free_memory, aspect::ext_intel_device_id>);

template <aspect Aspect> inline void singleAspectDeviceHasChecks() {
static_assert(is_property_value<decltype(device_has<Aspect>)>::value);
static_assert(std::is_same_v<device_has_key,
typename decltype(device_has<Aspect>)::key_t>);
static_assert(decltype(device_has<Aspect>)::value.size() == 1);
static_assert(decltype(device_has<Aspect>)::value[0] == Aspect);
}

int main() {
static_assert(is_property_key<work_group_size_key>::value);
static_assert(is_property_key<work_group_size_hint_key>::value);
static_assert(is_property_key<sub_group_size_key>::value);
static_assert(is_property_key<device_has_key>::value);

static_assert(is_property_value<decltype(work_group_size<1>)>::value);
static_assert(is_property_value<decltype(work_group_size<2, 2>)>::value);
Expand Down Expand Up @@ -52,5 +84,96 @@ int main() {
static_assert(std::is_same_v<decltype(sub_group_size<28>)::value_t,
std::integral_constant<uint32_t, 28>>);

singleAspectDeviceHasChecks<aspect::host>();
singleAspectDeviceHasChecks<aspect::cpu>();
singleAspectDeviceHasChecks<aspect::gpu>();
singleAspectDeviceHasChecks<aspect::accelerator>();
singleAspectDeviceHasChecks<aspect::custom>();
singleAspectDeviceHasChecks<aspect::fp16>();
singleAspectDeviceHasChecks<aspect::fp64>();
singleAspectDeviceHasChecks<aspect::image>();
singleAspectDeviceHasChecks<aspect::online_compiler>();
singleAspectDeviceHasChecks<aspect::online_linker>();
singleAspectDeviceHasChecks<aspect::queue_profiling>();
singleAspectDeviceHasChecks<aspect::usm_device_allocations>();
singleAspectDeviceHasChecks<aspect::usm_host_allocations>();
singleAspectDeviceHasChecks<aspect::usm_shared_allocations>();
singleAspectDeviceHasChecks<aspect::usm_restricted_shared_allocations>();
singleAspectDeviceHasChecks<aspect::usm_system_allocations>();
singleAspectDeviceHasChecks<aspect::ext_intel_pci_address>();
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_eu_count>();
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_eu_simd_width>();
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_slices>();
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_subslices_per_slice>();
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_eu_count_per_subslice>();
singleAspectDeviceHasChecks<aspect::ext_intel_max_mem_bandwidth>();
singleAspectDeviceHasChecks<aspect::ext_intel_mem_channel>();
singleAspectDeviceHasChecks<aspect::usm_atomic_host_allocations>();
singleAspectDeviceHasChecks<aspect::usm_atomic_shared_allocations>();
singleAspectDeviceHasChecks<aspect::atomic64>();
singleAspectDeviceHasChecks<aspect::ext_intel_device_info_uuid>();
singleAspectDeviceHasChecks<aspect::ext_oneapi_srgb>();
singleAspectDeviceHasChecks<aspect::ext_oneapi_native_assert>();
singleAspectDeviceHasChecks<aspect::host_debuggable>();
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_hw_threads_per_eu>();
singleAspectDeviceHasChecks<aspect::ext_oneapi_cuda_async_barrier>();
singleAspectDeviceHasChecks<aspect::ext_oneapi_bfloat16>();
singleAspectDeviceHasChecks<aspect::ext_intel_free_memory>();
singleAspectDeviceHasChecks<aspect::ext_intel_device_id>();

static_assert(is_property_value<decltype(device_has<>)>::value);
static_assert(std::is_same_v<device_has_key, decltype(device_has<>)::key_t>);
static_assert(decltype(device_has<>)::value.size() == 0);

static_assert(is_property_value<device_has_all>::value);
static_assert(std::is_same_v<device_has_key, device_has_all::key_t>);
static_assert(device_has_all::value.size() == 36);
static_assert(device_has_all::value[0] == aspect::host);
static_assert(device_has_all::value[1] == aspect::cpu);
static_assert(device_has_all::value[2] == aspect::gpu);
static_assert(device_has_all::value[3] == aspect::accelerator);
static_assert(device_has_all::value[4] == aspect::custom);
static_assert(device_has_all::value[5] == aspect::fp16);
static_assert(device_has_all::value[6] == aspect::fp64);
static_assert(device_has_all::value[7] == aspect::image);
static_assert(device_has_all::value[8] == aspect::online_compiler);
static_assert(device_has_all::value[9] == aspect::online_linker);
static_assert(device_has_all::value[10] == aspect::queue_profiling);
static_assert(device_has_all::value[11] == aspect::usm_device_allocations);
static_assert(device_has_all::value[12] == aspect::usm_host_allocations);
static_assert(device_has_all::value[13] == aspect::usm_shared_allocations);
static_assert(device_has_all::value[14] ==
aspect::usm_restricted_shared_allocations);
static_assert(device_has_all::value[15] == aspect::usm_system_allocations);
static_assert(device_has_all::value[16] == aspect::ext_intel_pci_address);
static_assert(device_has_all::value[17] == aspect::ext_intel_gpu_eu_count);
static_assert(device_has_all::value[18] ==
aspect::ext_intel_gpu_eu_simd_width);
static_assert(device_has_all::value[19] == aspect::ext_intel_gpu_slices);
static_assert(device_has_all::value[20] ==
aspect::ext_intel_gpu_subslices_per_slice);
static_assert(device_has_all::value[21] ==
aspect::ext_intel_gpu_eu_count_per_subslice);
static_assert(device_has_all::value[22] ==
aspect::ext_intel_max_mem_bandwidth);
static_assert(device_has_all::value[23] == aspect::ext_intel_mem_channel);
static_assert(device_has_all::value[24] ==
aspect::usm_atomic_host_allocations);
static_assert(device_has_all::value[25] ==
aspect::usm_atomic_shared_allocations);
static_assert(device_has_all::value[26] == aspect::atomic64);
static_assert(device_has_all::value[27] ==
aspect::ext_intel_device_info_uuid);
static_assert(device_has_all::value[28] == aspect::ext_oneapi_srgb);
static_assert(device_has_all::value[29] == aspect::ext_oneapi_native_assert);
static_assert(device_has_all::value[30] == aspect::host_debuggable);
static_assert(device_has_all::value[31] ==
aspect::ext_intel_gpu_hw_threads_per_eu);
static_assert(device_has_all::value[32] ==
aspect::ext_oneapi_cuda_async_barrier);
static_assert(device_has_all::value[33] == aspect::ext_oneapi_bfloat16);
static_assert(device_has_all::value[34] == aspect::ext_intel_free_memory);
static_assert(device_has_all::value[35] == aspect::ext_intel_device_id);

return 0;
}
Loading

0 comments on commit 430c722

Please sign in to comment.