Skip to content

Commit

Permalink
Fix incdec/byteshifts
Browse files Browse the repository at this point in the history
  • Loading branch information
aelovikov-intel committed Jul 29, 2024
1 parent 2f566b8 commit 60dc765
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 72 deletions.
70 changes: 39 additions & 31 deletions sycl/include/sycl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,32 +114,33 @@ struct ScalarConversionOperatorMixIn<Self, T, N, std::enable_if_t<N == 1>> {
// a separate mixin for each overload/narrow set of overloads and just "merge"
// them all back later.

template <typename Self, typename DataT, bool EnablePostfix, typename = void>
template <typename SelfOperandTy, typename DataT, bool EnablePostfix,
typename = void>
struct IncDecMixin {};

template <typename Self, typename DataT>
struct IncDecMixin<Self, DataT, false,
template <typename SelfOperandTy, typename DataT>
struct IncDecMixin<SelfOperandTy, DataT, false,
std::enable_if_t<!std::is_same_v<bool, DataT>>> {
friend const Self &operator++(const Self &x) {
friend SelfOperandTy &operator++(SelfOperandTy &x) {
x += DataT{1};
return x;
}
friend const Self &operator--(const Self &x) {
friend SelfOperandTy &operator--(SelfOperandTy &x) {
x -= DataT{1};
return x;
}
};

template <typename Self, typename DataT>
struct IncDecMixin<Self, DataT, true,
template <typename SelfOperandTy, typename DataT>
struct IncDecMixin<SelfOperandTy, DataT, true,
std::enable_if_t<!std::is_same_v<bool, DataT>>>
: public IncDecMixin<Self, DataT, false> {
friend auto operator++(const Self &x, int) {
: public IncDecMixin<SelfOperandTy, DataT, false> {
friend auto operator++(SelfOperandTy &x, int) {
auto tmp = +x;
x += DataT{1};
return tmp;
}
friend auto operator--(const Self &x, int) {
friend auto operator--(SelfOperandTy &x, int) {
auto tmp = +x;
x -= DataT{1};
return tmp;
Expand All @@ -150,13 +151,13 @@ struct IncDecMixin<Self, DataT, true,
// the implementation has been doing and it seems to be a reasonable thing to
// do. Otherwise shift operators for byte element type would have to be disabled
// completely to follow C++ standard approach.
template <typename Self, typename DataT, int N, bool EnableOpAssign,
typename = void>
template <typename Self, typename OpAssignSelfOperandTy, typename DataT, int N,
bool EnableOpAssign, typename = void>
struct ByteShiftsMixin {};

#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
template <typename Self, typename DataT, int N>
struct ByteShiftsMixin<Self, DataT, N, false,
template <typename Self, typename OpAssignSelfOperandTy, typename DataT, int N>
struct ByteShiftsMixin<Self, OpAssignSelfOperandTy, DataT, N, false,
std::enable_if_t<std::is_same_v<std::byte, DataT>>> {
friend auto operator<<(const Self &lhs, int shift) {
vec<DataT, N> tmp;
Expand All @@ -172,15 +173,17 @@ struct ByteShiftsMixin<Self, DataT, N, false,
}
};

template <typename Self, typename DataT, int N>
struct ByteShiftsMixin<Self, DataT, N, true,
template <typename Self, typename OpAssignSelfOperandTy, typename DataT, int N>
struct ByteShiftsMixin<Self, OpAssignSelfOperandTy, DataT, N, true,
std::enable_if_t<std::is_same_v<std::byte, DataT>>>
: public ByteShiftsMixin<Self, DataT, N, false> {
friend const Self &operator<<=(const Self &lhs, int shift) {
: public ByteShiftsMixin<Self, OpAssignSelfOperandTy, DataT, N, false> {
friend OpAssignSelfOperandTy &operator<<=(OpAssignSelfOperandTy &lhs,
int shift) {
lhs = lhs << shift;
return lhs;
}
friend const Self &operator>>=(const Self &lhs, int shift) {
friend OpAssignSelfOperandTy &operator>>=(OpAssignSelfOperandTy &lhs,
int shift) {
lhs = lhs >> shift;
return lhs;
}
Expand Down Expand Up @@ -593,12 +596,6 @@ struct VectorImpl {
}
};

template <typename Self, typename DataT, int N, bool AllowAssignOps>
struct __SYCL_EBO CommonVecSwizzleMixins
: public ScalarConversionOperatorMixIn<Self, DataT, N>,
public IncDecMixin<Self, DataT, AllowAssignOps>,
public ByteShiftsMixin<Self, DataT, N, AllowAssignOps> {};

template <typename Self, typename VecT, typename DataT, int N,
bool AllowAssignOps>
struct __SYCL_EBO SwizzleMixins
Expand Down Expand Up @@ -677,9 +674,16 @@ class __SYCL_EBO Swizzle
: public SwizzleBase<Swizzle<VecT, Indexes...>, typename VecT::element_type,
sizeof...(Indexes),
is_assignable_swizzle<VecT, Indexes...>>,
public CommonVecSwizzleMixins<
Swizzle<VecT, Indexes...>, typename VecT::element_type,
sizeof...(Indexes), is_assignable_swizzle<VecT, Indexes...>>,
public ScalarConversionOperatorMixIn<Swizzle<VecT, Indexes...>,
typename VecT::element_type,
sizeof...(Indexes)>,
public IncDecMixin<const Swizzle<VecT, Indexes...>,
typename VecT::element_type,
is_assignable_swizzle<VecT, Indexes...>>,
public ByteShiftsMixin<Swizzle<VecT, Indexes...>,
const Swizzle<VecT, Indexes...>,
typename VecT::element_type, sizeof...(Indexes),
is_assignable_swizzle<VecT, Indexes...>>,
public SwizzleMixins<Swizzle<VecT, Indexes...>, VecT,
typename VecT::element_type, sizeof...(Indexes),
is_assignable_swizzle<VecT, Indexes...>> {
Expand Down Expand Up @@ -767,9 +771,13 @@ class __SYCL_EBO Swizzle
// SYCL devices as well as in host C++ code.
template <typename DataT, int NumElements>
class __SYCL_EBO vec
: public detail::CommonVecSwizzleMixins<vec<DataT, NumElements>, DataT,
NumElements,
/* AllowAssignOps = */ true>,
: public detail::ScalarConversionOperatorMixIn<vec<DataT, NumElements>,
DataT, NumElements>,
public detail::IncDecMixin<vec<DataT, NumElements>, DataT,
/* AllowAssignOps = */ true>,
public detail::ByteShiftsMixin<
vec<DataT, NumElements>, vec<DataT, NumElements>, DataT, NumElements,
/* AllowAssignOps = */ true>,
public detail::NamedSwizzlesMixinBoth<vec<DataT, NumElements>,
NumElements>,
public detail::NonTemplateBinaryOpsMixin<vec<DataT, NumElements>,
Expand Down
10 changes: 5 additions & 5 deletions sycl/test/check_device_code/vector/vector_bf16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.171") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.171") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.171") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.170") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.170") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.170") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16
// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -122,7 +122,7 @@ SYCL_EXTERNAL auto TestFMax(vec<bfloat16, 3> a, vec<bfloat16, 3> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.355") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.447") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.352") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.443") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8
// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4
Expand All @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestIsNan(vec<bfloat16, 4> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.813") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.813") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.805") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.805") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -184,7 +184,7 @@ SYCL_EXTERNAL auto TestFabs(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.813") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.813") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.805") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.805") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -220,7 +220,7 @@ SYCL_EXTERNAL auto TestCeil(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.997") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.997") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.997") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.997") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.987") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.987") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.987") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.987") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64
// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down
8 changes: 4 additions & 4 deletions sycl/test/check_device_code/vector/vector_convert_bfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.171") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.170") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]])
// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]]
Expand Down Expand Up @@ -92,7 +92,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.263") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.261") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]]
Expand Down Expand Up @@ -190,7 +190,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec<int, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.355") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.352") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]]
Expand All @@ -203,7 +203,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.539") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.534") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]]
Expand Down
Loading

0 comments on commit 60dc765

Please sign in to comment.