From 442f0d22b6fc9d7bb8a6d8bd7996a150e8118f9e Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Tue, 23 Jan 2024 12:28:53 +0000 Subject: [PATCH 1/7] [SYCL][Bindless][Exp] Add Support For Unsampled Image Arrays - Creation / destruction of unsampled image arrays - Reading / writing of unsampled image arrays - sycl::ext::oneapi::experimental::image_type::array enum value added - sycl::ext::oneapi::experimental::image_descriptor::array_size member added - sycl::ext::oneapi::experimental::image_descriptor::verify() member function added --- libclc/ptx-nvidiacl/libspirv/images/image.cl | 375 +++++++++++++- .../libspirv/images/image_helpers.ll | 187 +++++++ .../sycl_ext_oneapi_bindless_images.asciidoc | 253 +++++++++- sycl/include/CL/__spirv/spirv_ops.hpp | 7 + sycl/include/sycl/detail/image_ocl_types.hpp | 29 ++ .../sycl/ext/oneapi/bindless_images.hpp | 75 +++ .../ext/oneapi/bindless_images_descriptor.hpp | 86 +++- sycl/plugins/unified_runtime/CMakeLists.txt | 15 +- sycl/source/detail/bindless_images.cpp | 41 +- sycl/source/handler.cpp | 129 ++++- .../array/read_write_unsampled_array.cpp | 467 ++++++++++++++++++ .../bindless_images/bindless_helpers.hpp | 59 +++ .../test-e2e/bindless_images/read_sampled.cpp | 27 +- .../bindless_images/read_write_unsampled.cpp | 91 +--- 14 files changed, 1666 insertions(+), 175 deletions(-) create mode 100644 sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp create mode 100644 sycl/test-e2e/bindless_images/bindless_helpers.hpp diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl index f55f0c435cf35..c489a18bc0fc9 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image.cl +++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl @@ -256,36 +256,47 @@ pixelf32 as_pixelf32(int4 v) { return as_float4(v); } image, x * sizeof(pixelf##pixelf_size), y, z)); \ } -_DEFINE_VEC4_CAST(float, int) -_DEFINE_VEC4_CAST(int, float) -_DEFINE_VEC4_CAST(float, uint) -_DEFINE_VEC4_CAST(uint, float) -_DEFINE_VEC4_CAST(uint, int) +_DEFINE_VEC4_CAST(int, int) _DEFINE_VEC4_CAST(int, uint) _DEFINE_VEC4_CAST(int, short) _DEFINE_VEC4_CAST(int, char) +_DEFINE_VEC4_CAST(int, float) _DEFINE_VEC4_CAST(uint, ushort) _DEFINE_VEC4_CAST(uint, uchar) +_DEFINE_VEC4_CAST(short, short) +_DEFINE_VEC4_CAST(short, ushort) _DEFINE_VEC4_CAST(short, char) _DEFINE_VEC4_CAST(short, uchar) +_DEFINE_VEC4_CAST(float, int) +_DEFINE_VEC4_CAST(float, uint) _DEFINE_VEC4_CAST(float, half) _DEFINE_VEC4_TO_VEC2_CAST(int, int) -_DEFINE_VEC4_TO_VEC2_CAST(uint, uint) -_DEFINE_VEC4_TO_VEC2_CAST(float, float) -_DEFINE_VEC4_TO_VEC2_CAST(short, short) -_DEFINE_VEC4_TO_VEC2_CAST(short, char) +_DEFINE_VEC4_TO_VEC2_CAST(int, uint) _DEFINE_VEC4_TO_VEC2_CAST(int, short) _DEFINE_VEC4_TO_VEC2_CAST(int, char) +_DEFINE_VEC4_TO_VEC2_CAST(uint, uint) _DEFINE_VEC4_TO_VEC2_CAST(uint, ushort) _DEFINE_VEC4_TO_VEC2_CAST(uint, uchar) +_DEFINE_VEC4_TO_VEC2_CAST(short, short) +_DEFINE_VEC4_TO_VEC2_CAST(short, ushort) +_DEFINE_VEC4_TO_VEC2_CAST(short, char) +_DEFINE_VEC4_TO_VEC2_CAST(float, float) _DEFINE_VEC4_TO_VEC2_CAST(float, half) +_DEFINE_VEC2_CAST(int, int) _DEFINE_VEC2_CAST(int, float) +_DEFINE_VEC2_CAST(short, short) _DEFINE_VEC2_CAST(short, char) _DEFINE_VEC2_CAST(short, uchar) +_DEFINE_CAST(int, int) +_DEFINE_CAST(int, uint) _DEFINE_CAST(int, float) +_DEFINE_CAST(short, short) +_DEFINE_CAST(short, ushort) +_DEFINE_CAST(short, char) +_DEFINE_CAST(short, uchar) _DEFINE_CAST(float, float) _DEFINE_CAST(float2, float2) _DEFINE_CAST(float4, float4) @@ -2588,3 +2599,349 @@ _CLC_DEFINE_MIPMAP_BINDLESS_READS_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_f, flo #undef COORD_PARAMS_3D #undef _CLC_DEFINE_MIPMAP_BINDLESS_READS_BUILTIN + +// ------- Image Arrays / Layered Images ------- + +// // --- THUNKS: Surface Array Reads --- +// int +int __nvvm_suld_1d_array_i32_clamp_s(long, int, int) __asm( + "llvm.nvvm.suld.1d.array.i32.clamp"); +int __nvvm_suld_2d_array_i32_clamp_s(long, int, int, int) __asm( + "llvm.nvvm.suld.2d.array.i32.clamp"); +int2 __nvvm_suld_1d_array_v2i32_clamp_s(long, int, int) __asm( + "__clc_llvm_nvvm_suld_1d_array_v2i32_clamp"); +int2 __nvvm_suld_2d_array_v2i32_clamp_s(long, int, int, int) __asm( + "__clc_llvm_nvvm_suld_2d_array_v2i32_clamp"); +int4 __nvvm_suld_1d_array_v4i32_clamp_s(long, int, int) __asm( + "__clc_llvm_nvvm_suld_1d_array_v4i32_clamp"); +int4 __nvvm_suld_2d_array_v4i32_clamp_s(long, int, int, int) __asm( + "__clc_llvm_nvvm_suld_2d_array_v4i32_clamp"); + +// short +short __nvvm_suld_1d_array_i16_clamp_s(long, int, int) __asm( + "llvm.nvvm.suld.1d.array.i16.clamp"); +short __nvvm_suld_2d_array_i16_clamp_s(long, int, int, int) __asm( + "llvm.nvvm.suld.2d.array.i16.clamp"); +short2 __nvvm_suld_1d_array_v2i16_clamp_s(long, int, int) __asm( + "__clc_llvm_nvvm_suld_1d_array_v2i16_clamp"); +short2 __nvvm_suld_2d_array_v2i16_clamp_s(long, int, int, int) __asm( + "__clc_llvm_nvvm_suld_2d_array_v2i16_clamp"); +short4 __nvvm_suld_1d_array_v4i16_clamp_s(long, int, int) __asm( + "__clc_llvm_nvvm_suld_1d_array_v4i16_clamp"); +short4 __nvvm_suld_2d_array_v4i16_clamp_s(long, int, int, int) __asm( + "__clc_llvm_nvvm_suld_2d_array_v4i16_clamp"); + +// char helper -- i8 intrinsic returns i16, requires helper +short __nvvm_suld_1d_array_i8_clamp_s_helper(long, int, int) __asm( + "llvm.nvvm.suld.1d.array.i8.clamp"); +short __nvvm_suld_2d_array_i8_clamp_s_helper(long, int, int, int) __asm( + "llvm.nvvm.suld.2d.array.i8.clamp"); +short2 __nvvm_suld_1d_array_v2i8_clamp_s_helper(long, int, int) __asm( + "__clc_llvm_nvvm_suld_1d_array_v2i8_clamp"); +short2 __nvvm_suld_2d_array_v2i8_clamp_s_helper(long, int, int, int) __asm( + "__clc_llvm_nvvm_suld_2d_array_v2i8_clamp"); +short4 __nvvm_suld_1d_array_v4i8_clamp_s_helper(long, int, int) __asm( + "__clc_llvm_nvvm_suld_1d_array_v4i8_clamp"); +short4 __nvvm_suld_2d_array_v4i8_clamp_s_helper(long, int, int, int) __asm( + "__clc_llvm_nvvm_suld_2d_array_v4i8_clamp"); + +// Macro to generate surface array fetches +#define _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN( \ + elem_t, fetch_elem_t, cast_to_elem_t, vec_size, fetch_vec_size, helper) \ + elem_t __nvvm_suld_1d_array_##vec_size##_clamp_s(unsigned long imageHandle, \ + int idx, int x) { \ + fetch_elem_t a = __nvvm_suld_1d_array_##fetch_vec_size##_clamp_s##helper( \ + imageHandle, idx, x); \ + return as_##elem_t(cast_##fetch_elem_t##_to_##cast_to_elem_t(a)); \ + } \ + elem_t __nvvm_suld_2d_array_##vec_size##_clamp_s(unsigned long imageHandle, \ + int idx, int x, int y) { \ + fetch_elem_t a = __nvvm_suld_2d_array_##fetch_vec_size##_clamp_s##helper( \ + imageHandle, idx, x, y); \ + return as_##elem_t(cast_##fetch_elem_t##_to_##cast_to_elem_t(a)); \ + } + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(uint, int, uint, j32, i32, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(ushort, short, ushort, t16, i16, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(char, short, char, i8, i8, _helper) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(uchar, short, uchar, h8, i8, _helper) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(uint2, int4, uint2, v2j32, v4i32, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(ushort2, short4, ushort2, v2t16, v4i16, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(char2, short2, char2, v2i8, v2i8, _helper) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(uchar2, short2, uchar2, v2h8, v2i8, _helper) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(uint4, int4, uint4, v4j32, v4i32, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(ushort4, short4, ushort4, v4t16, v4i16, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(char4, short4, char4, v4i8, v4i8, _helper) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(uchar4, short4, uchar4, v4h8, v4i8, _helper) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(float, int, int, f32, i32, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(half, short, short, f16, i16, ) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(float2, int2, int2, v2f32, v2i32, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(half2, short2, short2, v2f16, v2i16, ) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(float4, int4, int4, v4f32, v4i32, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(half4, short4, short4, v4f16, v4i16, ) + +#undef _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN + +// // --- THUNKS: Surface Array Writes --- +// int +void __nvvm_sust_1d_array_v4i32_clamp_s( + unsigned long, int, int, int, int, int, + int) __asm("llvm.nvvm.sust.b.1d.array.v4i32.clamp"); +void __nvvm_sust_2d_array_v4i32_clamp_s( + unsigned long, int, int, int, int, int, int, + int) __asm("llvm.nvvm.sust.b.2d.array.v4i32.clamp"); +void __nvvm_sust_1d_array_v2i32_clamp_s( + unsigned long, int, int, int, + int) __asm("llvm.nvvm.sust.b.1d.array.v2i32.clamp"); +void __nvvm_sust_2d_array_v2i32_clamp_s( + unsigned long, int, int, int, int, + int) __asm("llvm.nvvm.sust.b.2d.array.v2i32.clamp"); +void __nvvm_sust_1d_array_i32_clamp_s(unsigned long, int, int, int) __asm( + "llvm.nvvm.sust.b.1d.array.i32.clamp"); +void __nvvm_sust_2d_array_i32_clamp_s(unsigned long, int, int, int, int) __asm( + "llvm.nvvm.sust.b.2d.array.i32.clamp"); + +// short +void __nvvm_sust_1d_array_v4i16_clamp_s( + unsigned long, int, int, short, short, short, + short) __asm("llvm.nvvm.sust.b.1d.array.v4i16.clamp"); +void __nvvm_sust_2d_array_v4i16_clamp_s( + unsigned long, int, int, int, short, short, short, + short) __asm("llvm.nvvm.sust.b.2d.array.v4i16.clamp"); +void __nvvm_sust_1d_array_v2i16_clamp_s( + unsigned long, int, int, short, + short) __asm("llvm.nvvm.sust.b.1d.array.v2i16.clamp"); +void __nvvm_sust_2d_array_v2i16_clamp_s( + unsigned long, int, int, int, short, + short) __asm("llvm.nvvm.sust.b.2d.array.v2i16.clamp"); +void __nvvm_sust_1d_array_i16_clamp_s(unsigned long, int, int, short) __asm( + "llvm.nvvm.sust.b.1d.array.i16.clamp"); +void __nvvm_sust_2d_array_i16_clamp_s( + unsigned long, int, int, int, + short) __asm("llvm.nvvm.sust.b.2d.array.i16.clamp"); + +// char helper -- i8 intrinsic takes i16, requires helper +void __nvvm_sust_1d_array_v4i8_clamp_s_helper( + unsigned long, int, int, short, short, short, + short) __asm("llvm.nvvm.sust.b.1d.array.v4i8.clamp"); +void __nvvm_sust_2d_array_v4i8_clamp_s_helper( + unsigned long, int, int, int, short, short, short, + short) __asm("llvm.nvvm.sust.b.2d.array.v4i8.clamp"); +void __nvvm_sust_1d_array_v2i8_clamp_s_helper( + unsigned long, int, int, short, + short) __asm("llvm.nvvm.sust.b.1d.array.v2i8.clamp"); +void __nvvm_sust_2d_array_v2i8_clamp_s_helper( + unsigned long, int, int, int, short, + short) __asm("llvm.nvvm.sust.b.2d.array.v2i8.clamp"); +void __nvvm_sust_1d_array_i8_clamp_s_helper( + unsigned long, int, int, short) __asm("llvm.nvvm.sust.b.1d.array.i8.clamp"); +void __nvvm_sust_2d_array_i8_clamp_s_helper( + unsigned long, int, int, int, + short) __asm("llvm.nvvm.sust.b.2d.array.i8.clamp"); + +#define COLOR_INPUT_1_CHANNEL(elem_t) elem_t a +#define COLOR_INPUT_2_CHANNEL(elem_t) elem_t a, elem_t b +#define COLOR_INPUT_4_CHANNEL(elem_t) elem_t a, elem_t b, elem_t c, elem_t d + +#define COLOR_PARAMS_1_CHANNEL_AS_TYPE(elem_t) as_##elem_t(a) +#define COLOR_PARAMS_2_CHANNEL_AS_TYPE(elem_t) as_##elem_t(a), as_##elem_t(b) +#define COLOR_PARAMS_4_CHANNEL_AS_TYPE(elem_t) \ + as_##elem_t(a), as_##elem_t(b), as_##elem_t(c), as_##elem_t(d) + +#define COLOR_PARAMS_1_CHANNEL_C_CAST(elem_t) (elem_t) a +#define COLOR_PARAMS_2_CHANNEL_C_CAST(elem_t) (elem_t) a, (elem_t)b +#define COLOR_PARAMS_4_CHANNEL_C_CAST(elem_t) \ + (elem_t) a, (elem_t)b, (elem_t)c, (elem_t)d + +// Macro to generate surface array writes +#define _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN( \ + elem_t, write_elem_t, vec_size, write_vec_size, num_channels, \ + type_conversion, helper) \ + void __nvvm_sust_1d_array_##vec_size##_clamp_s( \ + unsigned long imageHandle, int idx, int x, \ + COLOR_INPUT_##num_channels##_CHANNEL(elem_t)) { \ + return __nvvm_sust_1d_array_##write_vec_size##_clamp_s##helper( \ + imageHandle, idx, x, \ + COLOR_PARAMS_##num_channels##_CHANNEL_##type_conversion( \ + write_elem_t)); \ + } \ + void __nvvm_sust_2d_array_##vec_size##_clamp_s( \ + unsigned long imageHandle, int idx, int x, int y, \ + COLOR_INPUT_##num_channels##_CHANNEL(elem_t)) { \ + return __nvvm_sust_2d_array_##write_vec_size##_clamp_s##helper( \ + imageHandle, idx, x, y, \ + COLOR_PARAMS_##num_channels##_CHANNEL_##type_conversion( \ + write_elem_t)); \ + } + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(uint, int, j32, i32, 1, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(ushort, short, t16, i16, 1, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(char, short, i8, i8, 1, C_CAST, _helper) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(uchar, short, h8, i8, 1, C_CAST, _helper) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(uint, int, v2j32, v2i32, 2, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(ushort, short, v2t16, v2i16, 2, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(char, short, v2i8, v2i8, 2, C_CAST, _helper) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(uchar, short, v2h8, v2i8, 2, C_CAST, _helper) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(uint, int, v4j32, v4i32, 4, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(ushort, short, v4t16, v4i16, 4, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(char, short, v4i8, v4i8, 4, C_CAST, _helper) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(uchar, short, v4h8, v4i8, 4, C_CAST, _helper) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(float, int, f32, i32, 1, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(half, short, f16, i16, 1, AS_TYPE, ) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(float, int, v2f32, v2i32, 2, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(half, short, v2f16, v2i16, 2, AS_TYPE, ) + +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(float, int, v4f32, v4i32, 4, AS_TYPE, ) +_CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(half, short, v4f16, v4i16, 4, AS_TYPE, ) + +#undef _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN + +#undef COLOR_INPUT_1_CHANNEL +#undef COLOR_INPUT_2_CHANNEL +#undef COLOR_INPUT_4_CHANNEL + +#undef COLOR_PARAMS_1_CHANNEL_AS_TYPE +#undef COLOR_PARAMS_2_CHANNEL_AS_TYPE +#undef COLOR_PARAMS_4_CHANNEL_AS_TYPE + +#undef COLOR_PARAMS_1_CHANNEL_C_CAST +#undef COLOR_PARAMS_2_CHANNEL_C_CAST +#undef COLOR_PARAMS_4_CHANNEL_C_CAST + +// GENERATED FUNCS: SURFACE ARRAY READ/WRITES + +// Vector of size 1 is scalar +#define ELEM_VEC_1(elem_t) elem_t +#define ELEM_VEC_2(elem_t) elem_t##2 +#define ELEM_VEC_4(elem_t) elem_t##4 + +#define COORD_INPUT_1D(elem_t) ELEM_VEC_1(elem_t) coord +#define COORD_INPUT_2D(elem_t) ELEM_VEC_2(elem_t) coord + +#define COORD_PARAMS_1D(elem_t) coord * sizeof(elem_t) +#define COORD_PARAMS_2D(elem_t) coord.x * sizeof(elem_t), coord.y + +#define COLOR_PARAMS_1_CHANNEL c +#define COLOR_PARAMS_2_CHANNEL c.x, c.y +#define COLOR_PARAMS_4_CHANNEL c.x, c.y, c.z, c.w + +#define VEC_SIZE_1(elem_t, size) elem_t##size +#define VEC_SIZE_2(elem_t, size) v2##elem_t##size +#define VEC_SIZE_4(elem_t, size) v4##elem_t##size + +#define DVEC_SIZE_1(prefix, elem_t, postfix) prefix##elem_t##postfix +#define DVEC_SIZE_2(prefix, elem_t, postfix) prefix##Dv2_##elem_t##postfix +#define DVEC_SIZE_4(prefix, elem_t, postfix) prefix##Dv4_##elem_t##postfix + +#define CONCAT(x, y) x##y +#define CONCAT_HELP(x, y) CONCAT(x, y) + +#define NVVM_FUNC(name, dimension, vec_size_mangled) \ + __nvvm_##name##_##dimension##d_array_##vec_size_mangled##_clamp_s +#define NVVM_FUNC_HELP(a, b, c) NVVM_FUNC(a, b, c) + +#define MANGLE_FUNC_IMG_HANDLE_HELP(size, name, prefix, postfix) \ + MANGLE_FUNC_IMG_HANDLE(size, name, prefix, postfix) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + elem_t, vec_size, dimension, ocl_elem_t_mangled, nvvm_elem_t_mangled, \ + elem_t_size) \ + _CLC_DEF ELEM_VEC_##vec_size(elem_t) MANGLE_FUNC_IMG_HANDLE_HELP( \ + 22, __spirv_ImageArrayRead, \ + DVEC_SIZE_##vec_size(I, ocl_elem_t_mangled, ), \ + DVEC_SIZE_##dimension(, i, ET_T0_T1_i))( \ + ulong imageHandle, COORD_INPUT_##dimension##D(int), int idx) { \ + return NVVM_FUNC_HELP( \ + suld, dimension, \ + VEC_SIZE_##vec_size(nvvm_elem_t_mangled, elem_t_size))( \ + imageHandle, idx, \ + COORD_PARAMS_##dimension##D(ELEM_VEC_##vec_size(elem_t))); \ + } + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + elem_t, vec_size, dimension, elem_t_mangled, write_mangled, elem_t_size) \ + _CLC_DEF void MANGLE_FUNC_IMG_HANDLE_HELP( \ + 23, __spirv_ImageArrayWrite, I, \ + CONCAT_HELP(DVEC_SIZE_##dimension(, i, ), \ + DVEC_SIZE_##vec_size(, elem_t_mangled, EvT_T0_iT1_)))( \ + ulong imageHandle, COORD_INPUT_##dimension##D(int), int idx, \ + ELEM_VEC_##vec_size(elem_t) c) { \ + NVVM_FUNC_HELP(sust, dimension, \ + VEC_SIZE_##vec_size(write_mangled, elem_t_size)) \ + (imageHandle, idx, \ + COORD_PARAMS_##dimension##D(ELEM_VEC_##vec_size(elem_t)), \ + COLOR_PARAMS_##vec_size##_CHANNEL); \ + } + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ND( \ + dimension, elem_t, vec_size, ocl_elem_t_mangled, nvvm_elem_t_mangled, \ + elem_t_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + elem_t, vec_size, dimension, ocl_elem_t_mangled, nvvm_elem_t_mangled, \ + elem_t_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + elem_t, vec_size, dimension, ocl_elem_t_mangled, nvvm_elem_t_mangled, \ + elem_t_size) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_VEC_SIZE_N( \ + vec_size, elem_t, ocl_elem_t_mangled, nvvm_elem_t_mangled, elem_t_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ND( \ + 1, elem_t, vec_size, ocl_elem_t_mangled, nvvm_elem_t_mangled, \ + elem_t_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ND( \ + 2, elem_t, vec_size, ocl_elem_t_mangled, nvvm_elem_t_mangled, \ + elem_t_size) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL( \ + elem_t, ocl_elem_t_mangled, nvvm_elem_t_mangled, elem_t_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_VEC_SIZE_N( \ + 1, elem_t, ocl_elem_t_mangled, nvvm_elem_t_mangled, elem_t_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_VEC_SIZE_N( \ + 2, elem_t, ocl_elem_t_mangled, nvvm_elem_t_mangled, elem_t_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_VEC_SIZE_N( \ + 4, elem_t, ocl_elem_t_mangled, nvvm_elem_t_mangled, elem_t_size) + +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(int, i, i, 32) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(uint, j, j, 32) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(short, s, i, 16) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(ushort, t, t, 16) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(char, a, i, 8) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(uchar, h, h, 8) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(float, f, f, 32) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(half, DF16_, f, 16) + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_READ_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ND +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_VEC_SIZE_N +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL +#undef ELEM_VEC_1 +#undef ELEM_VEC_2 +#undef ELEM_VEC_4 +#undef COORD_INPUT_1D +#undef COORD_INPUT_2D +#undef COORD_PARAMS_1D +#undef COORD_PARAMS_2D +#undef COLOR_PARAMS_1_CHANNEL +#undef COLOR_PARAMS_2_CHANNEL +#undef COLOR_PARAMS_4_CHANNEL +#undef VEC_SIZE_1 +#undef VEC_SIZE_2 +#undef VEC_SIZE_4 +#undef DVEC_SIZE_1 +#undef DVEC_SIZE_2 +#undef DVEC_SIZE_4 +#undef CONCAT +#undef CONCAT_HELP +#undef NVVM_FUNC +#undef NVVM_FUNC_HELPER +#undef MANGLE_FUNC_IMG_HANDLE_HELP diff --git a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll index fdc7833275234..67316eff4725c 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll +++ b/libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll @@ -42,6 +42,14 @@ define <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %s) nounwind always ret <4 x i32> %v } +define <2 x i32> @__clc_struct32_to_vector2({i32,i32} %s) nounwind alwaysinline { + %a = alloca {i32,i32} + store {i32,i32} %s, {i32,i32}* %a + %bc = bitcast {i32,i32} * %a to <2 x i32> * + %v = load <2 x i32>, <2 x i32> * %bc, align 128 + ret <2 x i32> %v +} + define <4 x float> @__clc_structf32_to_vector({float,float,float,float} %s) nounwind alwaysinline { %a = alloca {float,float,float,float} store {float,float,float,float} %s, {float,float,float,float}* %a @@ -485,3 +493,182 @@ entry: %1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0) ret <4 x i32> %1 } + +; <--- IMAGE ARRAYS ---> + +; Surface Reads +; +; @llvm.nvvm.suld..array.v.clamp +; +; = { 1d, 2d, 3d } +; = { 2, 4 } +; = { i8, i16, i32 } +; +; Note: The case of NChannels=1 doesn't need to be handled here as it can be +; called directly. + + +; @llvm.nvvm.suld..array.v{i8, i16, i32}.clamp + +; - @llvm.nvvm.suld..array.v{2, 4}i8.clamp + +; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i8.clamp + +declare {i16,i16} @llvm.nvvm.suld.1d.array.v2i8.clamp(i64, i32, i32) +define <2 x i16> @__clc_llvm_nvvm_suld_1d_array_v2i8_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16} @llvm.nvvm.suld.1d.array.v2i8.clamp(i64 %img, i32 %idx, i32 %x); + %1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0) + ret <2 x i16> %1 +} + +declare {i16,i16} @llvm.nvvm.suld.2d.array.v2i8.clamp(i64, i32, i32, i32) +define <2 x i16> @__clc_llvm_nvvm_suld_2d_array_v2i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16} @llvm.nvvm.suld.2d.array.v2i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y); + %1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0) + ret <2 x i16> %1 +} + +declare {i16,i16} @llvm.nvvm.suld.3d.array.v2i8.clamp(i64, i32, i32, i32, i32) +define <2 x i16> @__clc_llvm_nvvm_suld_3d_array_v2i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16} @llvm.nvvm.suld.3d.array.v2i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z); + %1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0) + ret <2 x i16> %1 +} + +; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i8.clamp + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i8.clamp(i64, i32, i32) +define <4 x i16> @__clc_llvm_nvvm_suld_1d_array_v4i8_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i8.clamp(i64 %img, i32 %idx, i32 %x); + %1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0) + ret <4 x i16> %1 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i8.clamp(i64, i32, i32, i32) +define <4 x i16> @__clc_llvm_nvvm_suld_2d_array_v4i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y); + %1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0) + ret <4 x i16> %1 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i8.clamp(i64, i32, i32, i32, i32) +define <4 x i16> @__clc_llvm_nvvm_suld_3d_array_v4i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z); + %1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0) + ret <4 x i16> %1 +} + +; - @llvm.nvvm.suld..array.v{2, 4}i16.clamp + +; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i16.clamp + +declare {i16,i16} @llvm.nvvm.suld.1d.array.v2i16.clamp(i64, i32, i32) +define <2 x i16> @__clc_llvm_nvvm_suld_1d_array_v2i16_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16} @llvm.nvvm.suld.1d.array.v2i16.clamp(i64 %img, i32 %idx, i32 %x); + %1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0) + ret <2 x i16> %1 +} + +declare {i16,i16} @llvm.nvvm.suld.2d.array.v2i16.clamp(i64, i32, i32, i32) +define <2 x i16> @__clc_llvm_nvvm_suld_2d_array_v2i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16} @llvm.nvvm.suld.2d.array.v2i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y); + %1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0) + ret <2 x i16> %1 +} + +declare {i16,i16} @llvm.nvvm.suld.3d.array.v2i16.clamp(i64, i32, i32, i32, i32) +define <2 x i16> @__clc_llvm_nvvm_suld_3d_array_v2i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16} @llvm.nvvm.suld.3d.array.v2i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z); + %1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0) + ret <2 x i16> %1 +} + +; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i16.clamp + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i16.clamp(i64, i32, i32) +define <4 x i16> @__clc_llvm_nvvm_suld_1d_array_v4i16_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i16.clamp(i64 %img, i32 %idx, i32 %x); + %1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0) + ret <4 x i16> %1 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i16.clamp(i64, i32, i32, i32) +define <4 x i16> @__clc_llvm_nvvm_suld_2d_array_v4i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y); + %1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0) + ret <4 x i16> %1 +} + +declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i16.clamp(i64, i32, i32, i32, i32) +define <4 x i16> @__clc_llvm_nvvm_suld_3d_array_v4i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z); + %1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0) + ret <4 x i16> %1 +} + +; - @llvm.nvvm.suld..array.v{2, 4}i32.clamp + +; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i32.clamp + +declare {i32,i32} @llvm.nvvm.suld.1d.array.v2i32.clamp(i64, i32, i32) +define <2 x i32> @__clc_llvm_nvvm_suld_1d_array_v2i32_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32} @llvm.nvvm.suld.1d.array.v2i32.clamp(i64 %img, i32 %idx, i32 %x); + %1 = tail call <2 x i32> @__clc_struct32_to_vector2({i32,i32} %0) + ret <2 x i32> %1 +} + +declare {i32,i32} @llvm.nvvm.suld.2d.array.v2i32.clamp(i64, i32, i32, i32) +define <2 x i32> @__clc_llvm_nvvm_suld_2d_array_v2i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32} @llvm.nvvm.suld.2d.array.v2i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y); + %1 = tail call <2 x i32> @__clc_struct32_to_vector2({i32,i32} %0) + ret <2 x i32> %1 +} + +declare {i32,i32} @llvm.nvvm.suld.3d.array.v2i32.clamp(i64, i32, i32, i32, i32) +define <2 x i32> @__clc_llvm_nvvm_suld_3d_array_v2i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32} @llvm.nvvm.suld.3d.array.v2i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z); + %1 = tail call <2 x i32> @__clc_struct32_to_vector2({i32,i32} %0) + ret <2 x i32> %1 +} + +; - @llvm.nvvm.suld..array.v4i32.clamp + +; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i32.clamp + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.array.v4i32.clamp(i64, i32, i32) +define <4 x i32> @__clc_llvm_nvvm_suld_1d_array_v4i32_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.array.v4i32.clamp(i64 %img, i32 %idx, i32 %x); + %1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.array.v4i32.clamp(i64, i32, i32, i32) +define <4 x i32> @__clc_llvm_nvvm_suld_2d_array_v4i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.array.v4i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y); + %1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0) + ret <4 x i32> %1 +} + +declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.array.v4i32.clamp(i64, i32, i32, i32, i32) +define <4 x i32> @__clc_llvm_nvvm_suld_3d_array_v4i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline { +entry: + %0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.array.v4i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z); + %1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0) ret <4 x i32> %1 +} diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index d37670db4641c..3b650ab19ebab 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -194,6 +194,7 @@ enum class image_channel_type : /* unspecified */ { enum class image_type : /* unspecified */ { standard, mipmap, + array, interop, }; @@ -205,23 +206,26 @@ struct image_descriptor { image_channel_order channel_order; image_type type; unsigned int num_levels; + unsigned int array_size; image_descriptor(sycl::range<1> dims, image_channel_order channel_order, image_channel_type channel_type, image_type type = image_type::standard, - unsigned int num_levels = 1); + unsigned int num_levels = 1, unsigned int array_size = 1); image_descriptor(sycl::range<2> dims, image_channel_order channel_order, image_channel_type channel_type, image_type type = image_type::standard, - unsigned int num_levels = 1); + unsigned int num_levels = 1, unsigned int array_size = 1); image_descriptor(sycl::range<3> dims, image_channel_order channel_order, image_channel_type channel_type, image_type type = image_type::standard, - unsigned int num_levels = 1); + unsigned int num_levels = 1, unsigned int array_size = 1); image_descriptor get_mip_level_desc(unsigned int level) const; + + void verify() const; }; } @@ -236,7 +240,7 @@ semantics. [NOTE] ==== -Additional future `image_type`s _may_ include "layered" and/or "cubemap". +Additional future `image_type`s _may_ include "cubemap". ==== Note that `image_channel_type` and `image_channel_order` existed in SYCL 1.2.1, @@ -244,7 +248,13 @@ but were removed in SYCL 2020 in favor of a single, unified enum class. We propose separating them again to enable better flexibility and to avoid combinatorial complexity. -For the `standard` image type, the value of `num_levels` must be `1`. +The `verify` member function is available to check the validity of the image +descriptor against the limitations outlined below. If the given descriptor is +deemed invalid, then a `sycl::exception` will be thrown with error code +`sycl::errc::invalid`. + +For the `standard` image type, the value of `num_levels` and `array_size` must +both be `1`. The `type` member will inform the implementation of the type of image to create, allocate, or free. @@ -254,6 +264,8 @@ member function `get_mip_level_desc` will return an `image_descriptor` for a given level of a mipmap, with valid dimension values for that level, and the type of the returned `image_descriptor` will be `image_type::standard`. +Only array image types support more than one array layer. + === Allocating image memory The process of creating an image is two-fold: @@ -1032,8 +1044,8 @@ kernel must be submitted for the written data to be accessible. [NOTE] ==== -Attempting to read an image with `read_mipmap` or any other defined read -function will result in undefined behaviour. +Attempting to read a standard image type with `read_mipmap`, `read_image_array`, +or any other defined read function will result in undefined behaviour. ==== === Recognized standard types [[recognized_standard_types]] @@ -1189,6 +1201,102 @@ Attempting to read a mipmap with `read_image` or any other defined read function will result in undefined behaviour. ==== +== Image arrays + +Another type of image we propose support for is image arrays. Image arrays are +images made up of multiple array indices where each index is itself an image and +every index has the same dimensionality, size, and data type. + +Image arrays may also be referred to as layered images, and the array indices +may be referred to layers. + +=== Allocation of image arrays + +Image arrays are allocated in a similar manner to standard images. + +Image array memory is allocated through `alloc_image_mem`. The user should +populate the `image_descriptor` with the image type of `image_type::array`, +and provide the size of the array they wish to allocate. The value of +`array_size` must be greater than `1`. + +Image array memory allocated this way requires the user to free that memory +after all operations using the memory are completed and no more operations +operating on the memory will be scheduled. This is done using `free_image_mem`, +passing `image_type::array`. + +The RAII class `image_mem` may also be used to perform allocation and +deallocation of arrayed image device memory. The constructor and destructor act +as a wrapper for the functions `alloc_image_mem` and `free_image_mem` +respectively. + +[NOTE] +==== +Currently there is no support for image arrays backed by USM. +==== + +=== Obtaining a handle to an image array + +A handle to an image array is acquired in the same way as +`unsampled_image_handle`. We create the handle through the `create_image` +functions which take `image_descriptor` that has `image_type::array` and +`array_size` greater than `1`. + +[NOTE] +==== +Currently there is no support for sampled image arrays. +==== + +=== Copying image array data + +When copying to or from image arrays, the user should copy to/from the entire +array of images in one call to `ext_oneapi_copy` by passing the image arrays' +`image_mem_handle`. + +=== Reading an image array + +Inside the kernel, it's possible to read an image array via `read_image_array`, +passing the `unsampled_image_handle`, the coordinates, and the array index. + +```c++ +// Read an unsampled image array +template +DataT read_image_array(const unsampled_image_handle &ImageHandle, + const CoordT &Coords, const unsigned int ArrayLayer); +``` + +Reading an image array follows the same restrictions on what coordinate types +may be used as laid out in <>. + +[NOTE] +==== +Attempting to read an image array with `read_image`, `read_mipmap` or any other +defined read function will result in undefined behaviour. +==== + +=== Writing an image array + +Inside the kernel, it's possible to write to an image array via +`write_image_array`, passing the `unsampled_image_handle`, the coordinates, the +array index, and the data to write. User-defined types are allowed to be written +provided that type is trivially copyable. + +```c++ +// Write to an unsampled image array +template +DataT write_image_array(const unsampled_image_handle &ImageHandle, + const CoordT &Coords, const unsigned int ArrayLayer + const DataT &Color); +``` + +Writing to an image array follows the same restrictions on what coordinate types +may be used as laid out in <>. + +[NOTE] +==== +Attempting to write to an image array with `write_image` or any other defined +write function will result in undefined behaviour. +==== + == Interoperability === Querying interoperability support @@ -1749,6 +1857,130 @@ try { bool validated = (dataOut == dataExpected); ``` +=== 1D image array read/write +```cpp +using VecType = sycl::vec; + +sycl::device dev; +sycl::queue q(dev); +auto ctxt = q.get_context(); + +// declare image data +constexpr size_t width = 5; +constexpr size_t array_size = 2; +constexpr size_t N = width; +std::vector out(N * array_size); +std::vector expected(N * array_size); +std::vector outBuf(N); +std::vector dataIn1(N * array_size); +std::vector dataIn2(N * array_size); + +for (int i = 0; i < N * array_size; i++) { + // Populate input data (to-be image arrays) + dataIn1[i] = VecType(i); + dataIn2[i] = VecType(2*i); +} + +// Populate expected output +for (int i = 0; i < width; i++) { + for (int l = 0; l < array_size; l++) { + expected[l * N + i] = dataIn1[l * N + i][0] + dataIn2[l * N + i][0]; + } +} + +try { + // Extension: image descriptor -- number of layers + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::rgba, sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::array, 1, array_size); + + // Extension: allocate image array memory on device + sycl::ext::oneapi::experimental::image_mem arrayMem1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem arrayMem2(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem outMem(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), arrayMem1.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), arrayMem2.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create a unsampled image handles to represent the image arrays + sycl::ext::oneapi::experimental::unsampled_image_handle arrayHandle1 = + sycl::ext::oneapi::experimental::create_image(arrayMem1, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle arrayHandle2 = + sycl::ext::oneapi::experimental::create_image(arrayMem2, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle outHandle = + sycl::ext::oneapi::experimental::create_image(outMem, desc, dev, + ctxt); + + q.submit([&](sycl::handler &cgh) { + + cgh.parallel_for(N, [=](sycl::id<1> id) { + float sum1 = 0; + float sum2 = 0; + + // Extension: read image layers 0 and 1 + VecType px1 = sycl::ext::oneapi::experimental::read_image_array( + arrayHandle1, int(id[0]), 0); + VecType px2 = sycl::ext::oneapi::experimental::read_image_array( + arrayHandle1, int(id[0]), 1); + + // Extension: read image layers 0 and 1 + VecType px3 = sycl::ext::oneapi::experimental::read_image_array( + arrayHandle2, int(id[0]), 0); + VecType px4 = sycl::ext::oneapi::experimental::read_image_array( + arrayHandle2, int(id[0]), 1); + + sum1 = px1[0] + px3[0]; + sum2 = px2[0] + px4[0]; + + // Extension: write to image layers with handle + sycl::ext::oneapi::experimental::write_image_array( + outHandle, int(id[0]), 0, VecType(sum1)); + sycl::ext::oneapi::experimental::write_image_array( + outHandle, int(id[0]), 1, VecType(sum2)); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host + q.ext_oneapi_copy(outMem.get_handle(), out.data(), desc); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(arrayHandle1, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(arrayHandle2, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(outHandle, dev, ctxt); + +} catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + std::cout << "Test failed!" << std::endl; + exit(1); +} catch (...) { + std::cerr << "Unknown exception caught!\n"; + std::cout << "Test failed!" << std::endl; + exit(2); +} + +// collect and validate output +bool validated = true; +for (int i = 0; i < N * array_size; i++) { + bool mismatch = false; + if (out[i][0] != expected[i]) { + mismatch = true; + validated = false; + } +} +if (validated) { + return 0; +} + +return 1; +``` + === Using imported memory and semaphore objects ```c++ @@ -1933,7 +2165,6 @@ There are dimension specific limitations: These features still need to be handled: * Level Zero and SPIR-V support -* Layered images * Cubemap images == Revision History @@ -2060,4 +2291,10 @@ These features still need to be handled: wording around what types are allowed to be read or written. - Allow `read_image` and `read_mipmap` to return a user-defined type. +|5.1|2024-01-04| - Added support for unsampled image arrays. + - Creation of unsampled image arrays. + - Reading/writing of unsampled image arrays. + - `image_type::array` added to enum. + - `array_size` member added to `image_descriptor`. + - `image_descriptor::verify()` member function added. |====================== diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index ea1a6580d30e6..e25730ddd7151 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -199,6 +199,13 @@ extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT); template extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT); +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayRead(ImageT, TempArgT, int); + +template +extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageArrayWrite(ImageT, CoordT, int, + ValT); + template extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT, __ocl_sampler_t); diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 60db7ccb645c8..86d7e8153c6cd 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -83,6 +83,35 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) { return sycl::detail::convertDataToType(Ret); } +template +static RetType __invoke__ImageArrayRead(ImageT Img, CoordT Coords, + int ArrayLayer) { + + // Convert from sycl types to builtin types to get correct function mangling. + using TempRetT = sycl::detail::ConvertToOpenCLType_t; + using TempArgT = sycl::detail::ConvertToOpenCLType_t; + + TempArgT Arg = sycl::detail::convertDataToType(Coords); + TempRetT Ret = + __spirv_ImageArrayRead(Img, Arg, ArrayLayer); + return sycl::detail::convertDataToType(Ret); +} + +template +static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer, + ValT Val) { + + // Convert from sycl types to builtin types to get correct function mangling. + using TmpValT = sycl::detail::ConvertToOpenCLType_t; + using TmpCoordT = sycl::detail::ConvertToOpenCLType_t; + + TmpCoordT TmpCoord = + sycl::detail::convertDataToType(Coords); + TmpValT TmpVal = sycl::detail::convertDataToType(Val); + __spirv_ImageArrayWrite(Img, TmpCoord, ArrayLayer, + TmpVal); +} + template static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, float Level) { diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 847f53ea2547f..245e05328b34d 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1030,6 +1030,45 @@ DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], #endif } +/** + * @brief Read an unsampled image array using its handle + * + * @tparam DataT The return type + * @tparam CoordT The input coordinate type. e.g. int or int2 for 1D or 2D, + * respectively + * @param imageHandle The image handle + * @param coords The coordinates at which to fetch image data + * @param arrayLayer The image array layer at which to read + * @return Image data + * + * __NVPTX__: Name mangling info + * Cuda surfaces require integer coords (by bytes) + * Cuda textures require float coords (by element or normalized) + * The name mangling should therefore not interfere with one + * another + */ +template +DataT read_image_array(const unsampled_image_handle &imageHandle + [[maybe_unused]], + const CoordT &coords [[maybe_unused]], + const int arrayLayer [[maybe_unused]]) { + constexpr size_t coordSize = detail::coord_size(); + static_assert(coordSize == 1 || coordSize == 2, + "Expected input coordinate to be have 1 or 2 components for 1D " + "and 2D images respectively."); + +#ifdef __SYCL_DEVICE_ONLY__ +#if defined(__NVPTX__) + return __invoke__ImageArrayRead(imageHandle.raw_handle, coords, + arrayLayer); +#else + // TODO: add SPIRV part for unsampled image array read +#endif +#else + assert(false); // Bindless images not yet implemented on host +#endif +} + /** * @brief Write to an unsampled image using its handle * @@ -1038,6 +1077,7 @@ DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], * 1D, 2D, and 3D, respectively * @param imageHandle The image handle * @param coords The coordinates at which to write image data + * @param color The data to write */ template void write_image(const unsampled_image_handle &imageHandle [[maybe_unused]], @@ -1063,6 +1103,41 @@ void write_image(const unsampled_image_handle &imageHandle [[maybe_unused]], #endif } +/** + * @brief Write to an unsampled image array using its handle + * + * @tparam DataT The data type to write + * @tparam CoordT The input coordinate type. e.g. int or int2 for 1D or 2D, + * respectively + * @param imageHandle The image handle + * @param coords The coordinates at which to write image data + * @param arrayLayer The image array layer at which to write + * @param color The data to write + */ +template +void write_image_array(const unsampled_image_handle &imageHandle + [[maybe_unused]], + const CoordT &coords [[maybe_unused]], + const int arrayLayer [[maybe_unused]], + const DataT &color [[maybe_unused]]) { + detail::assert_unsampled_coords(); + constexpr size_t coordSize = detail::coord_size(); + static_assert(coordSize == 1 || coordSize == 2, + "Expected input coordinate to be have 1 or 2 components for 1D " + "and 2D images respectively."); + +#ifdef __SYCL_DEVICE_ONLY__ +#if defined(__NVPTX__) + __invoke__ImageArrayWrite((uint64_t)imageHandle.raw_handle, coords, + arrayLayer, detail::convert_color(color)); +#else + // TODO: add SPIRV part for unsampled image array write +#endif +#else + assert(false); // Bindless images not yet implemented on host +#endif +} + } // namespace ext::oneapi::experimental } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp index dd8751992bd7c..76acca9e1dfd0 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp @@ -24,10 +24,10 @@ namespace ext::oneapi::experimental { /// image type enum enum class image_type : unsigned int { standard = 0, - interop = 1, - mipmap = 2, + mipmap = 1, + array = 2, cubemap = 3, /* Not implemented */ - layered = 4, /* Not implemented */ + interop = 4, }; /// A struct to describe the properties of an image. @@ -39,30 +39,39 @@ struct image_descriptor { image_channel_type channel_type; image_type type; unsigned int num_levels; + unsigned int array_size; image_descriptor() = default; image_descriptor(range<1> dims, image_channel_order channel_order, image_channel_type channel_type, image_type type = image_type::standard, - unsigned int num_levels = 1) + unsigned int num_levels = 1, unsigned int array_size = 1) : width(dims[0]), height(0), depth(0), channel_order(channel_order), - channel_type(channel_type), type(type), num_levels(num_levels) {} + channel_type(channel_type), type(type), num_levels(num_levels), + array_size(array_size) { + verify(); + } image_descriptor(range<2> dims, image_channel_order channel_order, image_channel_type channel_type, image_type type = image_type::standard, - unsigned int num_levels = 1) + unsigned int num_levels = 1, unsigned int array_size = 1) : width(dims[0]), height(dims[1]), depth(0), channel_order(channel_order), - channel_type(channel_type), type(type), num_levels(num_levels) {} + channel_type(channel_type), type(type), num_levels(num_levels), + array_size(array_size) { + verify(); + } image_descriptor(range<3> dims, image_channel_order channel_order, image_channel_type channel_type, image_type type = image_type::standard, - unsigned int num_levels = 1) + unsigned int num_levels = 1, unsigned int array_size = 1) : width(dims[0]), height(dims[1]), depth(dims[2]), channel_order(channel_order), channel_type(channel_type), type(type), - num_levels(num_levels){}; + num_levels(num_levels), array_size(array_size) { + verify(); + }; /// Get the descriptor for a mipmap level image_descriptor get_mip_level_desc(unsigned int level) const { @@ -88,8 +97,67 @@ struct image_descriptor { sycl::ext::oneapi::experimental::image_descriptor levelDesc( {width, height, depth}, this->channel_order, this->channel_type); + levelDesc.verify(); return levelDesc; } + + void verify() const { + if (this->type == image_type::standard) { + if (this->array_size > 1) { + // Not a standard image + throw sycl::exception( + sycl::errc::invalid, + "Standard images cannot have array_size greater than 1! Use " + "image_type::array for image arrays."); + } + if (this->num_levels > 1) { + // Image arrays cannot be mipmaps + throw sycl::exception( + sycl::errc::invalid, + "Standard images cannot have num_levels greater than 1! Use " + "image_type::mipmap for mipmap images."); + } + } else if (this->type == image_type::array) { + if (this->array_size <= 1) { + // Not an image array + throw sycl::exception(sycl::errc::invalid, + "Image array must have array_size greater than " + "1! Use image_type::standard otherwise."); + } + if (this->depth != 0) { + // Image arrays must only be 1D or 2D + throw sycl::exception(sycl::errc::invalid, + "Cannot have 3D image arrays! Either depth must " + "be 0 or array_size must be 1."); + } + if (this->num_levels != 1) { + // Image arrays cannot be mipmaps + throw sycl::exception(sycl::errc::invalid, + "Cannot have mipmap image arrays! Either " + "num_levels or array_size must be 1."); + } + } else if (this->type == image_type::mipmap) { + if (this->array_size > 1) { + // Mipmap images cannot be arrays + throw sycl::exception( + sycl::errc::invalid, + "Mipmap images cannot have array_size greater than 1! Use " + "image_type::array for image arrays."); + } + if (this->num_levels <= 1) { + // Mipmaps must have more than one level + throw sycl::exception(sycl::errc::invalid, + "Mipmap images must have num_levels greater than " + "1! Use image_type::standard otherwise."); + } + } else if (this->type == image_type::interop) { + // No checks to be made. + } else { + // Invalid image type + throw sycl::exception(sycl::errc::invalid, + "Invalid image descriptor image type"); + } + } }; } // namespace ext::oneapi::experimental diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index bfe3360006c9d..cca84450785a5 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,14 +56,13 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 6032f6fde37eeadcc3f12c10460e3ffec161dd33 - # Merge: 92f44da3 f9ad3d42 - # Author: Kenneth Benzie (Benie) - # Date: Mon Jan 22 12:30:12 2024 +0000 - # Merge pull request #1259 from igchor/fix_sync - # [L0] do not ignore returned values from zeHostSynchronize - set(UNIFIED_RUNTIME_TAG 6032f6fde37eeadcc3f12c10460e3ffec161dd33) + set(UNIFIED_RUNTIME_REPO "https://github.com/isaacault/unified-runtime.git") + # commit f640ae0454750be60407e9d4620da0b1db970b53 + # Author: Isaac Ault + # Date: Tue Jan 23 12:34:08 2024 +0000 + # + # [Bindless][Exp] Add Support For Image Arrays + set(UNIFIED_RUNTIME_TAG f640ae0454750be60407e9d4620da0b1db970b53) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp index b06435891d270..21e82fb8183ee 100644 --- a/sycl/source/detail/bindless_images.cpp +++ b/sycl/source/detail/bindless_images.cpp @@ -28,11 +28,25 @@ void populate_pi_structs(const image_descriptor &desc, pi_image_desc &piDesc, piDesc.image_width = desc.width; piDesc.image_height = desc.height; piDesc.image_depth = desc.depth; - piDesc.image_type = desc.depth > 0 ? PI_MEM_TYPE_IMAGE3D - : (desc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + + if (desc.array_size > 1) { + // Image Array + if (desc.depth > 0) { + // Image arrays must be 1D or 2D + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "No support for 3D image arrays."); + } + piDesc.image_type = + desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + } else { + piDesc.image_type = + desc.depth > 0 + ? PI_MEM_TYPE_IMAGE3D + : (desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + } + piDesc.image_row_pitch = pitch; - piDesc.image_array_size = 0; + piDesc.image_array_size = desc.array_size; piDesc.image_slice_pitch = 0; piDesc.num_mip_levels = desc.num_levels; piDesc.num_samples = 0; @@ -154,21 +168,7 @@ alloc_image_mem(const image_descriptor &desc, const sycl::device &syclDevice, pi_device Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - if (desc.type == image_type::mipmap) { - // Mipmaps must have more than one level - if (desc.num_levels <= 1) - throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), - "Mipmap number of levels must be 2 or more"); - } else if (desc.type == image_type::standard) { - // Non-mipmap images must have only 1 level - if (desc.num_levels != 1) - throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), - "Image number of levels must be 1"); - } else { - // Not an image to allocate - throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), - "Invalid image type to allocate"); - } + desc.verify(); pi_image_desc piDesc; pi_image_format piFormat; @@ -273,7 +273,8 @@ __SYCL_EXPORT void free_image_mem(image_mem_handle memHandle, Plugin->call( C, Device, memHandle.raw_handle); - } else if (imageType == image_type::standard) { + } else if (imageType == image_type::standard || + imageType == image_type::array) { Plugin->call( C, Device, memHandle.raw_handle); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 02ffef951d1b5..55b052f308338 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -966,9 +966,24 @@ void handler::ext_oneapi_copy( PiDesc.image_width = Desc.width; PiDesc.image_height = Desc.height; PiDesc.image_depth = Desc.depth; - PiDesc.image_type = Desc.depth > 0 ? PI_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + PiDesc.image_array_size = Desc.array_size; + + if (Desc.array_size > 1) { + // Image Array + if (Desc.depth > 0) { + // Image arrays must be 1D or 2D + throw sycl::exception( + sycl::make_error_code(sycl::errc::invalid), + "Image descriptor malformed - cannot copy 3D image arrays."); + } + PiDesc.image_type = + Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + } else { + PiDesc.image_type = + Desc.depth > 0 + ? PI_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + } sycl::detail::pi::PiMemImageFormat PiFormat; PiFormat.image_channel_data_type = @@ -1002,10 +1017,24 @@ void handler::ext_oneapi_copy( PiDesc.image_width = DestImgDesc.width; PiDesc.image_height = DestImgDesc.height; PiDesc.image_depth = DestImgDesc.depth; - PiDesc.image_type = DestImgDesc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + PiDesc.image_array_size = DestImgDesc.array_size; + + if (DestImgDesc.array_size > 1) { + // Image Array + if (DestImgDesc.depth > 0) { + // Image arrays must be 1D or 2D + throw sycl::exception( + sycl::make_error_code(sycl::errc::invalid), + "Image descriptor malformed - cannot copy 3D image arrays."); + } + PiDesc.image_type = DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY + : PI_MEM_TYPE_IMAGE1D_ARRAY; + } else { + PiDesc.image_type = DestImgDesc.depth > 0 + ? PI_MEM_TYPE_IMAGE3D + : (DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D + : PI_MEM_TYPE_IMAGE1D); + } sycl::detail::pi::PiMemImageFormat PiFormat; PiFormat.image_channel_data_type = @@ -1037,9 +1066,24 @@ void handler::ext_oneapi_copy( PiDesc.image_width = Desc.width; PiDesc.image_height = Desc.height; PiDesc.image_depth = Desc.depth; - PiDesc.image_type = Desc.depth > 0 ? PI_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + PiDesc.image_array_size = Desc.array_size; + + if (Desc.array_size > 1) { + // Image Array + if (Desc.depth > 0) { + // Image arrays must be 1D or 2D + throw sycl::exception( + sycl::make_error_code(sycl::errc::invalid), + "Image descriptor malformed - cannot copy 3D image arrays."); + } + PiDesc.image_type = + Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + } else { + PiDesc.image_type = + Desc.depth > 0 + ? PI_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + } sycl::detail::pi::PiMemImageFormat PiFormat; PiFormat.image_channel_data_type = @@ -1073,10 +1117,24 @@ void handler::ext_oneapi_copy( PiDesc.image_width = SrcImgDesc.width; PiDesc.image_height = SrcImgDesc.height; PiDesc.image_depth = SrcImgDesc.depth; - PiDesc.image_type = - SrcImgDesc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + PiDesc.image_array_size = SrcImgDesc.array_size; + + if (SrcImgDesc.array_size > 1) { + // Image Array + if (SrcImgDesc.depth > 0) { + // Image arrays must be 1D or 2D + throw sycl::exception( + sycl::make_error_code(sycl::errc::invalid), + "Image descriptor malformed - cannot copy 3D image arrays."); + } + PiDesc.image_type = SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY + : PI_MEM_TYPE_IMAGE1D_ARRAY; + } else { + PiDesc.image_type = SrcImgDesc.depth > 0 + ? PI_MEM_TYPE_IMAGE3D + : (SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D + : PI_MEM_TYPE_IMAGE1D); + } sycl::detail::pi::PiMemImageFormat PiFormat; PiFormat.image_channel_data_type = @@ -1108,9 +1166,24 @@ void handler::ext_oneapi_copy( PiDesc.image_width = Desc.width; PiDesc.image_height = Desc.height; PiDesc.image_depth = Desc.depth; - PiDesc.image_type = Desc.depth > 0 ? PI_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + PiDesc.image_array_size = Desc.array_size; + + if (Desc.array_size > 1) { + // Image Array + if (Desc.depth > 0) { + // Image arrays must be 1D or 2D + throw sycl::exception( + sycl::make_error_code(sycl::errc::invalid), + "Image descriptor malformed - cannot copy 3D image arrays."); + } + PiDesc.image_type = + Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + } else { + PiDesc.image_type = + Desc.depth > 0 + ? PI_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + } sycl::detail::pi::PiMemImageFormat PiFormat; PiFormat.image_channel_data_type = @@ -1146,10 +1219,24 @@ void handler::ext_oneapi_copy( PiDesc.image_width = DeviceImgDesc.width; PiDesc.image_height = DeviceImgDesc.height; PiDesc.image_depth = DeviceImgDesc.depth; - PiDesc.image_type = DeviceImgDesc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + PiDesc.image_array_size = DeviceImgDesc.array_size; + + if (DeviceImgDesc.array_size > 1) { + // Image Array + if (DeviceImgDesc.depth > 0) { + // Image arrays must be 1D or 2D + throw sycl::exception( + sycl::make_error_code(sycl::errc::invalid), + "Image descriptor malformed - cannot copy 3D image arrays."); + } + PiDesc.image_type = DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY + : PI_MEM_TYPE_IMAGE1D_ARRAY; + } else { + PiDesc.image_type = DeviceImgDesc.depth > 0 + ? PI_MEM_TYPE_IMAGE3D + : (DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D + : PI_MEM_TYPE_IMAGE1D); + } sycl::detail::pi::PiMemImageFormat PiFormat; PiFormat.image_channel_data_type = diff --git a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp new file mode 100644 index 0000000000000..300f01cc9a440 --- /dev/null +++ b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp @@ -0,0 +1,467 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include "../bindless_helpers.hpp" +#include +#include +#include +#include + +static sycl::device dev; + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +// Helpers and utilities +struct util { + // parallel_for 3D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<3> globalSize, sycl::range<3> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image_array( + input_0, sycl::int2(dim0, dim1), int(dim2)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image_array( + input_1, sycl::int2(dim0, dim1), int(dim2)); + + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image_array( + output, sycl::int2(dim0, dim1), int(dim2), VecType(sum)); + } else { + DType px1 = + sycl::ext::oneapi::experimental::read_image_array( + input_0, sycl::int2(dim0, dim1), int(dim2)); + DType px2 = + sycl::ext::oneapi::experimental::read_image_array( + input_1, sycl::int2(dim0, dim1), int(dim2)); + + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image_array( + output, sycl::int2(dim0, dim1), int(dim2), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); + } + } + + // parallel_for 2D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<2> globalSize, sycl::range<2> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image_array( + input_0, int(dim0), int(dim1)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image_array( + input_1, int(dim0), int(dim1)); + + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image_array( + output, int(dim0), int(dim1), VecType(sum)); + } else { + DType px1 = + sycl::ext::oneapi::experimental::read_image_array( + input_0, int(dim0), int(dim1)); + DType px2 = + sycl::ext::oneapi::experimental::read_image_array( + input_1, int(dim0), int(dim1)); + + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image_array( + output, int(dim0), int(dim1), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); + } + } +}; + +template +bool run_test(sycl::range dims, sycl::range localSize, + unsigned int seed = 0) { + using VecType = sycl::vec; + + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // skip half tests if not supported + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + + size_t num_elems = dims.size(); + + std::vector input_0(num_elems); + std::vector input_1(num_elems); + std::vector expected(num_elems); + std::vector actual(num_elems); + + std::srand(seed); + bindless_helpers::fillRand(input_0, seed); + bindless_helpers::fillRand(input_1, seed); + bindless_helpers::add_host(input_0, input_1, expected); + + try { + sycl::ext::oneapi::experimental::image_descriptor desc( + {dims[0], NDims > 2 ? dims[1] : 0}, COrder, CType, + sycl::ext::oneapi::experimental::image_type::array, 1, + NDims > 2 ? dims[2] : dims[1]); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + + auto img_input_0 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + auto img_input_1 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + auto img_output = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + + // Extension: copy over data to device + q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(input_1.data(), img_mem_1.get_handle(), desc); + q.wait(); + + { + sycl::range globalSize = dims; + q.wait(); + util::run_ndim_test( + q, globalSize, localSize, img_input_0, img_input_1, img_output); + q.wait(); + + q.ext_oneapi_copy(img_mem_2.get_handle(), actual.data(), desc); + q.wait(); + } + + // Cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(img_input_0, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_input_1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_output, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < num_elems; i++) { + for (int j = 0; j < NChannels; ++j) { + bool mismatch = false; + if (actual[i][j] != expected[i][j]) { + mismatch = true; + validated = false; + } + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "\tResult mismatch at [" << i << "][" << j + << "] Expected: " << +expected[i][j] + << ", Actual: " << +actual[i][j] << std::endl; +#else + break; +#endif + } + } + } +#ifdef VERBOSE_PRINT + if (validated) { + std::cout << "\tTest passed!" << std::endl; + } else { + std::cout << "\tTest failed!\n"; + } +#endif + + return !validated; +} + +void printTestName(std::string name) { +#ifdef VERBOSE_PRINT + std::cout << name; +#endif +} + +int main() { + + unsigned int seed = 0; + bool failed = false; + + printTestName("Running 1D int\n"); + failed |= run_test<2, int32_t, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D int\n"); + failed |= run_test<3, int32_t, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D int2\n"); + failed |= run_test<2, int32_t, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D int2\n"); + failed |= run_test<3, int32_t, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D int4\n"); + failed |= run_test<2, int32_t, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D int4\n"); + failed |= run_test<3, int32_t, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned int\n"); + failed |= run_test<2, uint32_t, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D unsigned int\n"); + failed |= run_test<3, uint32_t, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned int2\n"); + failed |= run_test<2, uint32_t, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D unsigned int2\n"); + failed |= run_test<3, uint32_t, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned int4\n"); + failed |= run_test<2, uint32_t, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D unsigned int4\n"); + failed |= run_test<3, uint32_t, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D short\n"); + failed |= run_test<2, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D short\n"); + failed |= run_test<3, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D short2\n"); + failed |= run_test<2, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D short2\n"); + failed |= run_test<3, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D short4\n"); + failed |= run_test<2, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D short4\n"); + failed |= run_test<3, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned short\n"); + failed |= + run_test<2, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_1d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 2D unsigned short\n"); + failed |= + run_test<3, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned short2\n"); + failed |= + run_test<2, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_1d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 2D unsigned short2\n"); + failed |= + run_test<3, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned short4\n"); + failed |= + run_test<2, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D unsigned short4\n"); + failed |= + run_test<3, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D char\n"); + failed |= run_test<2, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D char\n"); + failed |= run_test<3, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D char2\n"); + failed |= run_test<2, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D char2\n"); + failed |= run_test<3, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D char4\n"); + failed |= run_test<2, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D char4\n"); + failed |= run_test<3, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned char\n"); + failed |= + run_test<2, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_1d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 2D unsigned char\n"); + failed |= + run_test<3, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_2d>({48, 128, 32}, + {16, 16, 4}, seed); + printTestName("Running 1D unsigned char2\n"); + failed |= + run_test<2, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_1d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 2D unsigned char2\n"); + failed |= + run_test<3, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned char4\n"); + failed |= + run_test<2, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D unsigned char4\n"); + failed |= + run_test<3, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D float\n"); + failed |= run_test<2, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D float\n"); + failed |= run_test<3, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_2d>( + {1024, 832, 32}, {16, 16, 4}, seed); + printTestName("Running 1D float2\n"); + failed |= run_test<2, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D float2\n"); + failed |= run_test<3, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_2d>( + {832, 1024, 32}, {16, 16, 4}, seed); + printTestName("Running 1D float4\n"); + failed |= run_test<2, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D float4\n"); + failed |= run_test<3, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_2d>( + {1024, 1024, 16}, {16, 16, 4}, seed); + + printTestName("Running 1D half\n"); + failed |= run_test<2, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D half\n"); + failed |= run_test<3, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D half2\n"); + failed |= run_test<2, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D half2\n"); + failed |= run_test<3, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D half4\n"); + failed |= run_test<2, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_1d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 2D half4\n"); + failed |= run_test<3, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_2d>( + {48, 128, 32}, {16, 16, 4}, seed); + + if (failed) { + std::cerr << "An error has occured!\n"; + return 1; + } + + std::cout << "All tests passed!\n"; + return 0; +} diff --git a/sycl/test-e2e/bindless_images/bindless_helpers.hpp b/sycl/test-e2e/bindless_images/bindless_helpers.hpp new file mode 100644 index 0000000000000..c963c06539f75 --- /dev/null +++ b/sycl/test-e2e/bindless_images/bindless_helpers.hpp @@ -0,0 +1,59 @@ +#pragma once +#include +#include + +namespace bindless_helpers { + +template +static void fillRand(std::vector> &v, int seed) { + std::default_random_engine generator; + generator.seed(seed); + auto distribution = [&]() { + if constexpr (std::is_same_v) { + return std::uniform_real_distribution(0.0, 100.0); + } else if constexpr (std::is_floating_point_v) { + return std::uniform_real_distribution(0.0, 100.0); + } else { + return std::uniform_int_distribution(0, 100); + } + }(); + for (int i = 0; i < v.size(); ++i) { + sycl::vec temp; + + for (int j = 0; j < NChannels; j++) { + temp[j] = distribution(generator); + } + + v[i] = temp; + } +} + +template +static void add_host(const std::vector> &in_0, + const std::vector> &in_1, + std::vector> &out) { + for (int i = 0; i < out.size(); ++i) { + for (int j = 0; j < NChannels; ++j) { + out[i][j] = in_0[i][j] + in_1[i][j]; + } + } +} + +template > +static DType add_kernel(const DType in_0, const DType in_1) { + return in_0 + in_1; +} + +template 1)>> +static sycl::vec +add_kernel(const sycl::vec &in_0, + const sycl::vec &in_1) { + sycl::vec out; + for (int i = 0; i < NChannels; ++i) { + out[i] = in_0[i] + in_1[i]; + } + return out; +} +}; // namespace bindless_helpers \ No newline at end of file diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp index efcd7115cf6b2..aad32f24e57db 100644 --- a/sycl/test-e2e/bindless_images/read_sampled.cpp +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -14,6 +14,7 @@ // Same as above but all mismatches are printed // #define VERBOSE_LV3 +#include "bindless_helpers.hpp" #include #include #include @@ -22,30 +23,6 @@ namespace syclexp = sycl::ext::oneapi::experimental; namespace util { -template -static void fillRand(std::vector> &v, int seed) { - std::default_random_engine generator; - generator.seed(seed); - auto distribution = [&]() { - if constexpr (std::is_same_v) { - return std::uniform_real_distribution(0.0, 100.0); - } else if constexpr (std::is_floating_point_v) { - return std::uniform_real_distribution(0.0, 100.0); - } else { - return std::uniform_int_distribution(0, 100); - } - }(); - for (int i = 0; i < v.size(); ++i) { - sycl::vec temp; - - for (int j = 0; j < NChannels; j++) { - temp[j] = distribution(generator); - } - - v[i] = temp; - } -} - static bool isNumberWithinPercentOfNumber(float firstN, float percent, float secondN, float &diff, float &percDiff) { @@ -829,7 +806,7 @@ static bool runTest(sycl::range dims, sycl::range localSize, std::vector actual(numElems); std::srand(seed); - util::fillRand(input, seed); + bindless_helpers::fillRand(input, seed); { sycl::range globalSize = dims; diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp index 855da584d7846..f9799699fb722 100644 --- a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out +#include "bindless_helpers.hpp" #include -#include #include #include @@ -16,68 +16,6 @@ static sycl::device dev; // Helpers and utilities struct util { - template - static void fill_rand(std::vector> &v, int seed) { - std::default_random_engine generator; - generator.seed(seed); - auto distribution = [&]() { - auto distr_t_zero = []() { - if constexpr (std::is_same_v) { - return float{}; - } else if constexpr (sizeof(DType) == 1) { - return int{}; - } else { - return DType{}; - } - }(); - using distr_t = decltype(distr_t_zero); - if constexpr (std::is_floating_point_v) { - return std::uniform_real_distribution(distr_t_zero, - static_cast(100)); - } else { - return std::uniform_int_distribution(distr_t_zero, 100); - } - }(); - for (int i = 0; i < v.size(); ++i) { - sycl::vec temp; - - for (int j = 0; j < NChannels; j++) { - temp[j] = static_cast(distribution(generator)); - } - - v[i] = temp; - } - } - - template - static void add_host(const std::vector> &in_0, - const std::vector> &in_1, - std::vector> &out) { - for (int i = 0; i < out.size(); ++i) { - for (int j = 0; j < NChannels; ++j) { - out[i][j] = in_0[i][j] + in_1[i][j]; - } - } - } - - template > - static DType add_kernel(const DType in_0, const DType in_1) { - return in_0 + in_1; - } - - template 1)>> - static sycl::vec - add_kernel(const sycl::vec &in_0, - const sycl::vec &in_1) { - sycl::vec out; - for (int i = 0; i < NChannels; ++i) { - out[i] = in_0[i] + in_1[i]; - } - return out; - } - // parallel_for 3D template > @@ -104,8 +42,8 @@ struct util { sycl::ext::oneapi::experimental::read_image( input_1, sycl::int4(dim0, dim1, dim2, 0)); - auto sum = - VecType(util::add_kernel(px1, px2)); + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); sycl::ext::oneapi::experimental::write_image( output, sycl::int4(dim0, dim1, dim2, 0), VecType(sum)); } else { @@ -114,7 +52,8 @@ struct util { DType px2 = sycl::ext::oneapi::experimental::read_image( input_1, sycl::int4(dim0, dim1, dim2, 0)); - auto sum = DType(util::add_kernel(px1, px2)); + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); sycl::ext::oneapi::experimental::write_image( output, sycl::int4(dim0, dim1, dim2, 0), DType(sum)); } @@ -154,8 +93,8 @@ struct util { sycl::ext::oneapi::experimental::read_image( input_1, sycl::int2(dim0, dim1)); - auto sum = - VecType(util::add_kernel(px1, px2)); + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); sycl::ext::oneapi::experimental::write_image( output, sycl::int2(dim0, dim1), VecType(sum)); } else { @@ -164,7 +103,8 @@ struct util { DType px2 = sycl::ext::oneapi::experimental::read_image( input_1, sycl::int2(dim0, dim1)); - auto sum = DType(util::add_kernel(px1, px2)); + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); sycl::ext::oneapi::experimental::write_image( output, sycl::int2(dim0, dim1), DType(sum)); } @@ -203,8 +143,8 @@ struct util { sycl::ext::oneapi::experimental::read_image( input_1, int(dim0)); - auto sum = - VecType(util::add_kernel(px1, px2)); + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); sycl::ext::oneapi::experimental::write_image( output, int(dim0), VecType(sum)); } else { @@ -213,7 +153,8 @@ struct util { DType px2 = sycl::ext::oneapi::experimental::read_image( input_1, int(dim0)); - auto sum = DType(util::add_kernel(px1, px2)); + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); sycl::ext::oneapi::experimental::write_image( output, int(dim0), DType(sum)); } @@ -261,9 +202,9 @@ bool run_test(sycl::range dims, sycl::range localSize, std::vector actual(num_elems); std::srand(seed); - util::fill_rand(input_0, seed); - util::fill_rand(input_1, seed); - util::add_host(input_0, input_1, expected); + bindless_helpers::fillRand(input_0, seed); + bindless_helpers::fillRand(input_1, seed); + bindless_helpers::add_host(input_0, input_1, expected); try { sycl::ext::oneapi::experimental::image_descriptor desc(dims, COrder, CType); From 560a552f75a94ce4c9544307319a31587e7dbd18 Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Wed, 24 Jan 2024 10:38:58 +0000 Subject: [PATCH 2/7] Address Feedback: - Use bindless_helpers funcs for vulkan interop testing - Rename fill_rand func to conform to snake case - Use new assertion on unsampled coords in read_image_array - Declare and use syclexp namespace in image array test --- .../sycl/ext/oneapi/bindless_images.hpp | 1 + .../array/read_write_unsampled_array.cpp | 106 ++++++++---------- .../bindless_images/bindless_helpers.hpp | 6 +- .../test-e2e/bindless_images/read_sampled.cpp | 2 +- .../bindless_images/read_write_unsampled.cpp | 4 +- .../vulkan_interop/unsampled_images.cpp | 48 ++------ 6 files changed, 68 insertions(+), 99 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 245e05328b34d..9ab1760234df7 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1052,6 +1052,7 @@ DataT read_image_array(const unsampled_image_handle &imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]], const int arrayLayer [[maybe_unused]]) { + detail::assert_unsampled_coords(); constexpr size_t coordSize = detail::coord_size(); static_assert(coordSize == 1 || coordSize == 2, "Expected input coordinate to be have 1 or 2 components for 1D " diff --git a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp index 300f01cc9a440..8ae505cfc0972 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp @@ -15,16 +15,18 @@ static sycl::device dev; // Uncomment to print additional test information // #define VERBOSE_PRINT +namespace syclexp = sycl::ext::oneapi::experimental; + // Helpers and utilities struct util { // parallel_for 3D template > - static void run_ndim_test( - sycl::queue q, sycl::range<3> globalSize, sycl::range<3> localSize, - sycl::ext::oneapi::experimental::unsampled_image_handle input_0, - sycl::ext::oneapi::experimental::unsampled_image_handle input_1, - sycl::ext::oneapi::experimental::unsampled_image_handle output) { + static void run_ndim_test(sycl::queue q, sycl::range<3> globalSize, + sycl::range<3> localSize, + syclexp::unsampled_image_handle input_0, + syclexp::unsampled_image_handle input_1, + syclexp::unsampled_image_handle output) { using VecType = sycl::vec; try { q.submit([&](sycl::handler &cgh) { @@ -36,28 +38,24 @@ struct util { size_t dim2 = it.get_global_id(2); if constexpr (NChannels >= 1) { - VecType px1 = - sycl::ext::oneapi::experimental::read_image_array( - input_0, sycl::int2(dim0, dim1), int(dim2)); - VecType px2 = - sycl::ext::oneapi::experimental::read_image_array( - input_1, sycl::int2(dim0, dim1), int(dim2)); + VecType px1 = syclexp::read_image_array( + input_0, sycl::int2(dim0, dim1), int(dim2)); + VecType px2 = syclexp::read_image_array( + input_1, sycl::int2(dim0, dim1), int(dim2)); auto sum = VecType( bindless_helpers::add_kernel(px1, px2)); - sycl::ext::oneapi::experimental::write_image_array( + syclexp::write_image_array( output, sycl::int2(dim0, dim1), int(dim2), VecType(sum)); } else { - DType px1 = - sycl::ext::oneapi::experimental::read_image_array( - input_0, sycl::int2(dim0, dim1), int(dim2)); - DType px2 = - sycl::ext::oneapi::experimental::read_image_array( - input_1, sycl::int2(dim0, dim1), int(dim2)); + DType px1 = syclexp::read_image_array( + input_0, sycl::int2(dim0, dim1), int(dim2)); + DType px2 = syclexp::read_image_array( + input_1, sycl::int2(dim0, dim1), int(dim2)); auto sum = DType( bindless_helpers::add_kernel(px1, px2)); - sycl::ext::oneapi::experimental::write_image_array( + syclexp::write_image_array( output, sycl::int2(dim0, dim1), int(dim2), DType(sum)); } }); @@ -74,11 +72,11 @@ struct util { // parallel_for 2D template > - static void run_ndim_test( - sycl::queue q, sycl::range<2> globalSize, sycl::range<2> localSize, - sycl::ext::oneapi::experimental::unsampled_image_handle input_0, - sycl::ext::oneapi::experimental::unsampled_image_handle input_1, - sycl::ext::oneapi::experimental::unsampled_image_handle output) { + static void run_ndim_test(sycl::queue q, sycl::range<2> globalSize, + sycl::range<2> localSize, + syclexp::unsampled_image_handle input_0, + syclexp::unsampled_image_handle input_1, + syclexp::unsampled_image_handle output) { using VecType = sycl::vec; try { q.submit([&](sycl::handler &cgh) { @@ -89,29 +87,25 @@ struct util { size_t dim1 = it.get_global_id(1); if constexpr (NChannels >= 1) { - VecType px1 = - sycl::ext::oneapi::experimental::read_image_array( - input_0, int(dim0), int(dim1)); - VecType px2 = - sycl::ext::oneapi::experimental::read_image_array( - input_1, int(dim0), int(dim1)); + VecType px1 = syclexp::read_image_array( + input_0, int(dim0), int(dim1)); + VecType px2 = syclexp::read_image_array( + input_1, int(dim0), int(dim1)); auto sum = VecType( bindless_helpers::add_kernel(px1, px2)); - sycl::ext::oneapi::experimental::write_image_array( - output, int(dim0), int(dim1), VecType(sum)); + syclexp::write_image_array(output, int(dim0), + int(dim1), VecType(sum)); } else { - DType px1 = - sycl::ext::oneapi::experimental::read_image_array( - input_0, int(dim0), int(dim1)); - DType px2 = - sycl::ext::oneapi::experimental::read_image_array( - input_1, int(dim0), int(dim1)); + DType px1 = syclexp::read_image_array(input_0, int(dim0), + int(dim1)); + DType px2 = syclexp::read_image_array(input_1, int(dim0), + int(dim1)); auto sum = DType( bindless_helpers::add_kernel(px1, px2)); - sycl::ext::oneapi::experimental::write_image_array( - output, int(dim0), int(dim1), DType(sum)); + syclexp::write_image_array(output, int(dim0), int(dim1), + DType(sum)); } }); }); @@ -153,27 +147,23 @@ bool run_test(sycl::range dims, sycl::range localSize, std::vector actual(num_elems); std::srand(seed); - bindless_helpers::fillRand(input_0, seed); - bindless_helpers::fillRand(input_1, seed); + bindless_helpers::fill_rand(input_0, seed); + bindless_helpers::fill_rand(input_1, seed); bindless_helpers::add_host(input_0, input_1, expected); try { - sycl::ext::oneapi::experimental::image_descriptor desc( - {dims[0], NDims > 2 ? dims[1] : 0}, COrder, CType, - sycl::ext::oneapi::experimental::image_type::array, 1, - NDims > 2 ? dims[2] : dims[1]); + syclexp::image_descriptor desc({dims[0], NDims > 2 ? dims[1] : 0}, COrder, + CType, syclexp::image_type::array, 1, + NDims > 2 ? dims[2] : dims[1]); // Extension: allocate memory on device and create the handle - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + syclexp::image_mem img_mem_0(desc, q); + syclexp::image_mem img_mem_1(desc, q); + syclexp::image_mem img_mem_2(desc, q); - auto img_input_0 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); - auto img_input_1 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); - auto img_output = - sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + auto img_input_0 = syclexp::create_image(img_mem_0, desc, q); + auto img_input_1 = syclexp::create_image(img_mem_1, desc, q); + auto img_output = syclexp::create_image(img_mem_2, desc, q); // Extension: copy over data to device q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); @@ -192,9 +182,9 @@ bool run_test(sycl::range dims, sycl::range localSize, } // Cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(img_input_0, q); - sycl::ext::oneapi::experimental::destroy_image_handle(img_input_1, q); - sycl::ext::oneapi::experimental::destroy_image_handle(img_output, q); + syclexp::destroy_image_handle(img_input_0, q); + syclexp::destroy_image_handle(img_input_1, q); + syclexp::destroy_image_handle(img_output, q); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; exit(-1); diff --git a/sycl/test-e2e/bindless_images/bindless_helpers.hpp b/sycl/test-e2e/bindless_images/bindless_helpers.hpp index c963c06539f75..29770196ff2d0 100644 --- a/sycl/test-e2e/bindless_images/bindless_helpers.hpp +++ b/sycl/test-e2e/bindless_images/bindless_helpers.hpp @@ -5,7 +5,8 @@ namespace bindless_helpers { template -static void fillRand(std::vector> &v, int seed) { +static void fill_rand(std::vector> &v, + int seed = std::default_random_engine::default_seed) { std::default_random_engine generator; generator.seed(seed); auto distribution = [&]() { @@ -56,4 +57,5 @@ add_kernel(const sycl::vec &in_0, } return out; } -}; // namespace bindless_helpers \ No newline at end of file + +}; // namespace bindless_helpers diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp index aad32f24e57db..243f9a707d76c 100644 --- a/sycl/test-e2e/bindless_images/read_sampled.cpp +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -806,7 +806,7 @@ static bool runTest(sycl::range dims, sycl::range localSize, std::vector actual(numElems); std::srand(seed); - bindless_helpers::fillRand(input, seed); + bindless_helpers::fill_rand(input, seed); { sycl::range globalSize = dims; diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp index f9799699fb722..ee5babcfd0135 100644 --- a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp @@ -202,8 +202,8 @@ bool run_test(sycl::range dims, sycl::range localSize, std::vector actual(num_elems); std::srand(seed); - bindless_helpers::fillRand(input_0, seed); - bindless_helpers::fillRand(input_1, seed); + bindless_helpers::fill_rand(input_0, seed); + bindless_helpers::fill_rand(input_1, seed); bindless_helpers::add_host(input_0, input_1, expected); try { diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index d8d79d2c167f1..3fab936fa4259 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -10,6 +10,7 @@ #include +#include "../bindless_helpers.hpp" #include "vulkan_common.hpp" #include @@ -112,33 +113,6 @@ void cleanup_test(sycl::context &ctxt, sycl::device &dev, handles_t handles) { syclexp::destroy_image_handle(handles.output, dev, ctxt); } -template -void fill_rand(std::vector> &v) { - std::default_random_engine generator; - using distribution_t = - std::conditional_t, - std::uniform_int_distribution, - std::uniform_real_distribution>; - distribution_t distribution(static_cast(0), static_cast(100)); - - assert(v.empty()); - for (int i = 0; i < v.capacity(); ++i) { - v.emplace_back(distribution(generator)); - } -} - -template T add_kernel(T &in_0, T &in_1) { - if constexpr (std::is_scalar_v) { - return in_0 + in_1; - } else { - T out; - for (int i = 0; i < NChannels; ++i) { - out[i] = in_0[i] + in_1[i]; - } - return out; - } -} - template void run_ndim_test(sycl::range global_size, @@ -190,8 +164,8 @@ void run_ndim_test(sycl::range global_size, VecType px2 = syclexp::read_image( handles.input_2, sycl::int2(dim0, dim1)); - auto sum = - VecType(util::add_kernel(px1, px2)); + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); syclexp::write_image( handles.output, sycl::int2(dim0, dim1), VecType(sum)); } else { @@ -200,7 +174,8 @@ void run_ndim_test(sycl::range global_size, DType px2 = syclexp::read_image(handles.input_2, sycl::int2(dim0, dim1)); - auto sum = DType(util::add_kernel(px1, px2)); + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); syclexp::write_image(handles.output, sycl::int2(dim0, dim1), DType(sum)); } @@ -213,8 +188,8 @@ void run_ndim_test(sycl::range global_size, VecType px2 = syclexp::read_image( handles.input_2, sycl::int4(dim0, dim1, dim2, 0)); - auto sum = - VecType(util::add_kernel(px1, px2)); + auto sum = VecType( + bindless_helpers::add_kernel(px1, px2)); syclexp::write_image(handles.output, sycl::int4(dim0, dim1, dim2, 0), VecType(sum)); @@ -224,7 +199,8 @@ void run_ndim_test(sycl::range global_size, DType px2 = syclexp::read_image( handles.input_2, sycl::int4(dim0, dim1, dim2, 0)); - auto sum = DType(util::add_kernel(px1, px2)); + auto sum = DType( + bindless_helpers::add_kernel(px1, px2)); syclexp::write_image(handles.output, sycl::int4(dim0, dim1, dim2, 0), DType(sum)); @@ -290,7 +266,7 @@ bool run_test(sycl::range dims, sycl::range local_size, std::vector input_vector_0; input_vector_0.reserve(num_elems); std::srand(seed); - util::fill_rand(input_vector_0); + bindless_helpers::fill_rand(input_vector_0); VecType *inputStagingData = nullptr; VK_CHECK_CALL(vkMapMemory(vk_device, inVkImgRes1.stagingMemory, 0 /*offset*/, @@ -304,7 +280,7 @@ bool run_test(sycl::range dims, sycl::range local_size, std::vector input_vector_1; input_vector_1.reserve(num_elems); std::srand(seed); - util::fill_rand(input_vector_1); + bindless_helpers::fill_rand(input_vector_1); VK_CHECK_CALL(vkMapMemory(vk_device, inVkImgRes2.stagingMemory, 0 /*offset*/, imageSizeBytes, 0 /*flags*/, @@ -543,7 +519,7 @@ bool run_all() { printString("Running 2D uint2\n"); valid &= run_test<2, uint32_t, 2, sycl::image_channel_type::unsigned_int32, sycl::image_channel_order::rg, class uint2_2d>( - {1024, 1024}, {2, 2}, seed); + {128, 128}, {2, 2}, seed); printString("Running 2D uint\n"); valid &= run_test<2, uint32_t, 1, sycl::image_channel_type::unsigned_int32, From 6b2ca14b2793cf606790296a46318e2ac3ab3516 Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Tue, 6 Feb 2024 13:52:08 +0000 Subject: [PATCH 3/7] Address Feedback: * Remove cast reshuffling * Wrap image array read/write intrinsics in macros --- libclc/ptx-nvidiacl/libspirv/images/image.cl | 280 ++++++++++--------- 1 file changed, 147 insertions(+), 133 deletions(-) diff --git a/libclc/ptx-nvidiacl/libspirv/images/image.cl b/libclc/ptx-nvidiacl/libspirv/images/image.cl index c489a18bc0fc9..41b13a4a44d1e 100644 --- a/libclc/ptx-nvidiacl/libspirv/images/image.cl +++ b/libclc/ptx-nvidiacl/libspirv/images/image.cl @@ -256,53 +256,55 @@ pixelf32 as_pixelf32(int4 v) { return as_float4(v); } image, x * sizeof(pixelf##pixelf_size), y, z)); \ } -_DEFINE_VEC4_CAST(int, int) +_DEFINE_VEC4_CAST(float, int) +_DEFINE_VEC4_CAST(int, float) +_DEFINE_VEC4_CAST(float, uint) +_DEFINE_VEC4_CAST(uint, float) +_DEFINE_VEC4_CAST(uint, int) _DEFINE_VEC4_CAST(int, uint) _DEFINE_VEC4_CAST(int, short) _DEFINE_VEC4_CAST(int, char) -_DEFINE_VEC4_CAST(int, float) _DEFINE_VEC4_CAST(uint, ushort) _DEFINE_VEC4_CAST(uint, uchar) -_DEFINE_VEC4_CAST(short, short) -_DEFINE_VEC4_CAST(short, ushort) _DEFINE_VEC4_CAST(short, char) _DEFINE_VEC4_CAST(short, uchar) -_DEFINE_VEC4_CAST(float, int) -_DEFINE_VEC4_CAST(float, uint) _DEFINE_VEC4_CAST(float, half) +_DEFINE_VEC4_CAST(int, int) +_DEFINE_VEC4_CAST(short, ushort) +_DEFINE_VEC4_CAST(short, short) _DEFINE_VEC4_TO_VEC2_CAST(int, int) -_DEFINE_VEC4_TO_VEC2_CAST(int, uint) +_DEFINE_VEC4_TO_VEC2_CAST(uint, uint) +_DEFINE_VEC4_TO_VEC2_CAST(float, float) +_DEFINE_VEC4_TO_VEC2_CAST(short, short) +_DEFINE_VEC4_TO_VEC2_CAST(short, char) _DEFINE_VEC4_TO_VEC2_CAST(int, short) _DEFINE_VEC4_TO_VEC2_CAST(int, char) -_DEFINE_VEC4_TO_VEC2_CAST(uint, uint) _DEFINE_VEC4_TO_VEC2_CAST(uint, ushort) _DEFINE_VEC4_TO_VEC2_CAST(uint, uchar) -_DEFINE_VEC4_TO_VEC2_CAST(short, short) -_DEFINE_VEC4_TO_VEC2_CAST(short, ushort) -_DEFINE_VEC4_TO_VEC2_CAST(short, char) -_DEFINE_VEC4_TO_VEC2_CAST(float, float) _DEFINE_VEC4_TO_VEC2_CAST(float, half) +_DEFINE_VEC4_TO_VEC2_CAST(int, uint) +_DEFINE_VEC4_TO_VEC2_CAST(short, ushort) -_DEFINE_VEC2_CAST(int, int) _DEFINE_VEC2_CAST(int, float) -_DEFINE_VEC2_CAST(short, short) _DEFINE_VEC2_CAST(short, char) _DEFINE_VEC2_CAST(short, uchar) +_DEFINE_VEC2_CAST(int, int) +_DEFINE_VEC2_CAST(short, short) -_DEFINE_CAST(int, int) -_DEFINE_CAST(int, uint) _DEFINE_CAST(int, float) -_DEFINE_CAST(short, short) -_DEFINE_CAST(short, ushort) -_DEFINE_CAST(short, char) -_DEFINE_CAST(short, uchar) _DEFINE_CAST(float, float) _DEFINE_CAST(float2, float2) _DEFINE_CAST(float4, float4) _DEFINE_CAST(pixelf32, float4) _DEFINE_CAST(pixelf32, pixelf32) _DEFINE_CAST(float4, pixelf32) +_DEFINE_CAST(int, int) +_DEFINE_CAST(int, uint) +_DEFINE_CAST(short, short) +_DEFINE_CAST(short, ushort) +_DEFINE_CAST(short, char) +_DEFINE_CAST(short, uchar) _DEFINE_PIXELF_CAST(32, float4, int4) _DEFINE_PIXELF_CAST(32, float4, uint4) @@ -2602,48 +2604,116 @@ _CLC_DEFINE_MIPMAP_BINDLESS_READS_BUILTIN(half4, 3, Dv4_DF16_, v4f16, Dv4_f, flo // ------- Image Arrays / Layered Images ------- -// // --- THUNKS: Surface Array Reads --- -// int -int __nvvm_suld_1d_array_i32_clamp_s(long, int, int) __asm( - "llvm.nvvm.suld.1d.array.i32.clamp"); -int __nvvm_suld_2d_array_i32_clamp_s(long, int, int, int) __asm( - "llvm.nvvm.suld.2d.array.i32.clamp"); -int2 __nvvm_suld_1d_array_v2i32_clamp_s(long, int, int) __asm( - "__clc_llvm_nvvm_suld_1d_array_v2i32_clamp"); -int2 __nvvm_suld_2d_array_v2i32_clamp_s(long, int, int, int) __asm( - "__clc_llvm_nvvm_suld_2d_array_v2i32_clamp"); -int4 __nvvm_suld_1d_array_v4i32_clamp_s(long, int, int) __asm( - "__clc_llvm_nvvm_suld_1d_array_v4i32_clamp"); -int4 __nvvm_suld_2d_array_v4i32_clamp_s(long, int, int, int) __asm( - "__clc_llvm_nvvm_suld_2d_array_v4i32_clamp"); +// Read/Write Intrinsic Thunks -// short -short __nvvm_suld_1d_array_i16_clamp_s(long, int, int) __asm( - "llvm.nvvm.suld.1d.array.i16.clamp"); -short __nvvm_suld_2d_array_i16_clamp_s(long, int, int, int) __asm( - "llvm.nvvm.suld.2d.array.i16.clamp"); -short2 __nvvm_suld_1d_array_v2i16_clamp_s(long, int, int) __asm( - "__clc_llvm_nvvm_suld_1d_array_v2i16_clamp"); -short2 __nvvm_suld_2d_array_v2i16_clamp_s(long, int, int, int) __asm( - "__clc_llvm_nvvm_suld_2d_array_v2i16_clamp"); -short4 __nvvm_suld_1d_array_v4i16_clamp_s(long, int, int) __asm( - "__clc_llvm_nvvm_suld_1d_array_v4i16_clamp"); -short4 __nvvm_suld_2d_array_v4i16_clamp_s(long, int, int, int) __asm( - "__clc_llvm_nvvm_suld_2d_array_v4i16_clamp"); - -// char helper -- i8 intrinsic returns i16, requires helper -short __nvvm_suld_1d_array_i8_clamp_s_helper(long, int, int) __asm( - "llvm.nvvm.suld.1d.array.i8.clamp"); -short __nvvm_suld_2d_array_i8_clamp_s_helper(long, int, int, int) __asm( - "llvm.nvvm.suld.2d.array.i8.clamp"); -short2 __nvvm_suld_1d_array_v2i8_clamp_s_helper(long, int, int) __asm( - "__clc_llvm_nvvm_suld_1d_array_v2i8_clamp"); -short2 __nvvm_suld_2d_array_v2i8_clamp_s_helper(long, int, int, int) __asm( - "__clc_llvm_nvvm_suld_2d_array_v2i8_clamp"); -short4 __nvvm_suld_1d_array_v4i8_clamp_s_helper(long, int, int) __asm( - "__clc_llvm_nvvm_suld_1d_array_v4i8_clamp"); -short4 __nvvm_suld_2d_array_v4i8_clamp_s_helper(long, int, int, int) __asm( - "__clc_llvm_nvvm_suld_2d_array_v4i8_clamp"); +#define COORD_PARAMS_1D(type) type +#define COORD_PARAMS_2D(type) type, type + +// Vector of size 1 is scalar +#define ELEM_VEC_1(elem_t) elem_t +#define ELEM_VEC_2(elem_t) elem_t##2 +#define ELEM_VEC_4(elem_t) elem_t##4 + +#define VEC_SIZE_1(elem_t, size) elem_t##size +#define VEC_SIZE_2(elem_t, size) v2##elem_t##size +#define VEC_SIZE_4(elem_t, size) v4##elem_t##size + +#define COLOR_INPUT_1_CHANNEL(elem_t) elem_t +#define COLOR_INPUT_2_CHANNEL(elem_t) elem_t, elem_t +#define COLOR_INPUT_4_CHANNEL(elem_t) elem_t, elem_t, elem_t, elem_t + +#define _CONCAT(x, y) x##y +#define CONCAT(x, y) _CONCAT(x, y) + +#define _STR(x) #x +#define STR(x) _STR(x) + +#define _NVVM_FUNC_UNDERSCORE(name, dim, vec_size, help, pre, post) \ + pre##nvvm_##name##_##dim##d##_array_##vec_size##_clamp##post##help +#define NVVM_FUNC_UNDERSCORE(a, b, c, d, e, f) \ + _NVVM_FUNC_UNDERSCORE(a, b, c, d, e, f) + +#define _NVVM_FUNC_PERIOD(name, dim, vec_size, help, pre, post) \ + pre##llvm.nvvm.name.dim##d.array.vec_size.clamp##post##help +#define NVVM_FUNC_PERIOD(a, b, c, d, e, f) _NVVM_FUNC_PERIOD(a, b, c, d, e, f) + +#define BINDLESS_INTRINSIC_FUNC_ND(ret_type, dimension, nvvm_elem_t_mangled, \ + vec_size, elem_t_size, separator, \ + clc_prefix, helper) \ + ELEM_VEC_##vec_size(ret_type) CONCAT( \ + __, \ + NVVM_FUNC_UNDERSCORE( \ + suld, dimension, \ + VEC_SIZE_##vec_size(nvvm_elem_t_mangled, elem_t_size), helper, , \ + _s)( \ + long, int, \ + COORD_PARAMS_##dimension##D( \ + int))) __asm(STR(NVVM_FUNC_##separator(suld, dimension, \ + VEC_SIZE_##vec_size( \ + nvvm_elem_t_mangled, \ + elem_t_size), \ + , clc_prefix, ))); \ + void CONCAT( \ + __, \ + NVVM_FUNC_UNDERSCORE( \ + sust, dimension, \ + VEC_SIZE_##vec_size(nvvm_elem_t_mangled, elem_t_size), helper, , \ + _s)( \ + unsigned long, int, COORD_PARAMS_##dimension##D(int), \ + COLOR_INPUT_##vec_size##_CHANNEL( \ + ret_type))) __asm(STR(NVVM_FUNC_PERIOD(sust.b, dimension, \ + VEC_SIZE_##vec_size( \ + nvvm_elem_t_mangled, \ + elem_t_size), \ + , , ))); + +#define BINDLESS_INTRINSIC_FUNC_VEC_SIZE_N(ret_type, vec_size, \ + nvvm_elem_t_mangled, elem_t_size, \ + separator, clc_prefix, helper) \ + BINDLESS_INTRINSIC_FUNC_ND(ret_type, 1, nvvm_elem_t_mangled, vec_size, \ + elem_t_size, separator, clc_prefix, helper) \ + BINDLESS_INTRINSIC_FUNC_ND(ret_type, 2, nvvm_elem_t_mangled, vec_size, \ + elem_t_size, separator, clc_prefix, helper) + +#define BINDLESS_INTRINSIC_FUNC_ALL(ret_type, nvvm_elem_t_mangled, \ + elem_t_size, helper) \ + BINDLESS_INTRINSIC_FUNC_VEC_SIZE_N(ret_type, 1, nvvm_elem_t_mangled, \ + elem_t_size, PERIOD, , helper) \ + BINDLESS_INTRINSIC_FUNC_VEC_SIZE_N(ret_type, 2, nvvm_elem_t_mangled, \ + elem_t_size, UNDERSCORE, __clc_llvm_, \ + helper) \ + BINDLESS_INTRINSIC_FUNC_VEC_SIZE_N(ret_type, 4, nvvm_elem_t_mangled, \ + elem_t_size, UNDERSCORE, __clc_llvm_, \ + helper) + +BINDLESS_INTRINSIC_FUNC_ALL(int, i, 32, ) +BINDLESS_INTRINSIC_FUNC_ALL(short, i, 16, ) +BINDLESS_INTRINSIC_FUNC_ALL(short, i, 8, _helper) + +#undef COORD_PARAMS_1D +#undef COORD_PARAMS_2D +#undef ELEM_VEC_1 +#undef ELEM_VEC_2 +#undef ELEM_VEC_4 +#undef VEC_SIZE_1 +#undef VEC_SIZE_2 +#undef VEC_SIZE_4 +#undef COLOR_INPUT_1_CHANNEL +#undef COLOR_INPUT_2_CHANNEL +#undef COLOR_INPUT_4_CHANNEL +#undef _CONCAT +#undef CONCAT +#undef _STR +#undef STR +#undef _NVVM_FUNC_PERIOD +#undef NVVM_FUNC_PERIOD +#undef _NVVM_FUNC_UNDERSCORE +#undef NVVM_FUNC_UNDERSCORE +#undef BINDLESS_INTRINSIC_FUNC_ND +#undef BINDLESS_INTRINSIC_FUNC_VEC_SIZE_N +#undef BINDLESS_INTRINSIC_FUNC_ALL + +// // --- THUNKS: Surface Array Reads --- // Macro to generate surface array fetches #define _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN( \ @@ -2688,61 +2758,6 @@ _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN(half4, short4, short4, v4 #undef _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_READS_BUILTIN // // --- THUNKS: Surface Array Writes --- -// int -void __nvvm_sust_1d_array_v4i32_clamp_s( - unsigned long, int, int, int, int, int, - int) __asm("llvm.nvvm.sust.b.1d.array.v4i32.clamp"); -void __nvvm_sust_2d_array_v4i32_clamp_s( - unsigned long, int, int, int, int, int, int, - int) __asm("llvm.nvvm.sust.b.2d.array.v4i32.clamp"); -void __nvvm_sust_1d_array_v2i32_clamp_s( - unsigned long, int, int, int, - int) __asm("llvm.nvvm.sust.b.1d.array.v2i32.clamp"); -void __nvvm_sust_2d_array_v2i32_clamp_s( - unsigned long, int, int, int, int, - int) __asm("llvm.nvvm.sust.b.2d.array.v2i32.clamp"); -void __nvvm_sust_1d_array_i32_clamp_s(unsigned long, int, int, int) __asm( - "llvm.nvvm.sust.b.1d.array.i32.clamp"); -void __nvvm_sust_2d_array_i32_clamp_s(unsigned long, int, int, int, int) __asm( - "llvm.nvvm.sust.b.2d.array.i32.clamp"); - -// short -void __nvvm_sust_1d_array_v4i16_clamp_s( - unsigned long, int, int, short, short, short, - short) __asm("llvm.nvvm.sust.b.1d.array.v4i16.clamp"); -void __nvvm_sust_2d_array_v4i16_clamp_s( - unsigned long, int, int, int, short, short, short, - short) __asm("llvm.nvvm.sust.b.2d.array.v4i16.clamp"); -void __nvvm_sust_1d_array_v2i16_clamp_s( - unsigned long, int, int, short, - short) __asm("llvm.nvvm.sust.b.1d.array.v2i16.clamp"); -void __nvvm_sust_2d_array_v2i16_clamp_s( - unsigned long, int, int, int, short, - short) __asm("llvm.nvvm.sust.b.2d.array.v2i16.clamp"); -void __nvvm_sust_1d_array_i16_clamp_s(unsigned long, int, int, short) __asm( - "llvm.nvvm.sust.b.1d.array.i16.clamp"); -void __nvvm_sust_2d_array_i16_clamp_s( - unsigned long, int, int, int, - short) __asm("llvm.nvvm.sust.b.2d.array.i16.clamp"); - -// char helper -- i8 intrinsic takes i16, requires helper -void __nvvm_sust_1d_array_v4i8_clamp_s_helper( - unsigned long, int, int, short, short, short, - short) __asm("llvm.nvvm.sust.b.1d.array.v4i8.clamp"); -void __nvvm_sust_2d_array_v4i8_clamp_s_helper( - unsigned long, int, int, int, short, short, short, - short) __asm("llvm.nvvm.sust.b.2d.array.v4i8.clamp"); -void __nvvm_sust_1d_array_v2i8_clamp_s_helper( - unsigned long, int, int, short, - short) __asm("llvm.nvvm.sust.b.1d.array.v2i8.clamp"); -void __nvvm_sust_2d_array_v2i8_clamp_s_helper( - unsigned long, int, int, int, short, - short) __asm("llvm.nvvm.sust.b.2d.array.v2i8.clamp"); -void __nvvm_sust_1d_array_i8_clamp_s_helper( - unsigned long, int, int, short) __asm("llvm.nvvm.sust.b.1d.array.i8.clamp"); -void __nvvm_sust_2d_array_i8_clamp_s_helper( - unsigned long, int, int, int, - short) __asm("llvm.nvvm.sust.b.2d.array.i8.clamp"); #define COLOR_INPUT_1_CHANNEL(elem_t) elem_t a #define COLOR_INPUT_2_CHANNEL(elem_t) elem_t a, elem_t b @@ -2842,41 +2857,40 @@ _CLC_DEFINE_SURFACE_ARRAY_BINDLESS_THUNK_WRITES_BUILTIN(half, short, v4f16, v4i1 #define DVEC_SIZE_2(prefix, elem_t, postfix) prefix##Dv2_##elem_t##postfix #define DVEC_SIZE_4(prefix, elem_t, postfix) prefix##Dv4_##elem_t##postfix -#define CONCAT(x, y) x##y -#define CONCAT_HELP(x, y) CONCAT(x, y) +#define _CONCAT(x, y) x##y +#define CONCAT(x, y) _CONCAT(x, y) -#define NVVM_FUNC(name, dimension, vec_size_mangled) \ +#define _NVVM_FUNC(name, dimension, vec_size_mangled) \ __nvvm_##name##_##dimension##d_array_##vec_size_mangled##_clamp_s -#define NVVM_FUNC_HELP(a, b, c) NVVM_FUNC(a, b, c) +#define NVVM_FUNC(a, b, c) _NVVM_FUNC(a, b, c) -#define MANGLE_FUNC_IMG_HANDLE_HELP(size, name, prefix, postfix) \ +#define MANGLE_FUNC_IMG_HANDLE_HELPER(size, name, prefix, postfix) \ MANGLE_FUNC_IMG_HANDLE(size, name, prefix, postfix) #define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ elem_t, vec_size, dimension, ocl_elem_t_mangled, nvvm_elem_t_mangled, \ elem_t_size) \ - _CLC_DEF ELEM_VEC_##vec_size(elem_t) MANGLE_FUNC_IMG_HANDLE_HELP( \ + _CLC_DEF ELEM_VEC_##vec_size(elem_t) MANGLE_FUNC_IMG_HANDLE_HELPER( \ 22, __spirv_ImageArrayRead, \ DVEC_SIZE_##vec_size(I, ocl_elem_t_mangled, ), \ DVEC_SIZE_##dimension(, i, ET_T0_T1_i))( \ ulong imageHandle, COORD_INPUT_##dimension##D(int), int idx) { \ - return NVVM_FUNC_HELP( \ - suld, dimension, \ - VEC_SIZE_##vec_size(nvvm_elem_t_mangled, elem_t_size))( \ + return NVVM_FUNC(suld, dimension, \ + VEC_SIZE_##vec_size(nvvm_elem_t_mangled, elem_t_size))( \ imageHandle, idx, \ COORD_PARAMS_##dimension##D(ELEM_VEC_##vec_size(elem_t))); \ } #define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ elem_t, vec_size, dimension, elem_t_mangled, write_mangled, elem_t_size) \ - _CLC_DEF void MANGLE_FUNC_IMG_HANDLE_HELP( \ + _CLC_DEF void MANGLE_FUNC_IMG_HANDLE_HELPER( \ 23, __spirv_ImageArrayWrite, I, \ - CONCAT_HELP(DVEC_SIZE_##dimension(, i, ), \ - DVEC_SIZE_##vec_size(, elem_t_mangled, EvT_T0_iT1_)))( \ + CONCAT(DVEC_SIZE_##dimension(, i, ), \ + DVEC_SIZE_##vec_size(, elem_t_mangled, EvT_T0_iT1_)))( \ ulong imageHandle, COORD_INPUT_##dimension##D(int), int idx, \ ELEM_VEC_##vec_size(elem_t) c) { \ - NVVM_FUNC_HELP(sust, dimension, \ - VEC_SIZE_##vec_size(write_mangled, elem_t_size)) \ + NVVM_FUNC(sust, dimension, \ + VEC_SIZE_##vec_size(write_mangled, elem_t_size)) \ (imageHandle, idx, \ COORD_PARAMS_##dimension##D(ELEM_VEC_##vec_size(elem_t)), \ COLOR_PARAMS_##vec_size##_CHANNEL); \ @@ -2940,8 +2954,8 @@ _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(half, DF16_, f, 16) #undef DVEC_SIZE_1 #undef DVEC_SIZE_2 #undef DVEC_SIZE_4 +#undef _CONCAT #undef CONCAT -#undef CONCAT_HELP +#undef _NVVM_FUNC #undef NVVM_FUNC -#undef NVVM_FUNC_HELPER -#undef MANGLE_FUNC_IMG_HANDLE_HELP +#undef MANGLE_FUNC_IMG_HANDLE_HELPER From 7a457856a2f0dc77b3a1d27d7866fd4ccd422219 Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Thu, 8 Feb 2024 12:58:24 +0000 Subject: [PATCH 4/7] Address Feedback: * Use image descriptor verify where applicable * Periods after comments --- .../sycl/ext/oneapi/bindless_images.hpp | 12 ++-- .../ext/oneapi/bindless_images_descriptor.hpp | 35 +++++++---- sycl/source/detail/bindless_images.cpp | 23 ++++--- sycl/source/handler.cpp | 60 ++++++------------- .../array/read_write_unsampled_array.cpp | 21 ++++--- 5 files changed, 66 insertions(+), 85 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 9ab1760234df7..6bef4b9e0c7f2 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1063,10 +1063,10 @@ DataT read_image_array(const unsampled_image_handle &imageHandle return __invoke__ImageArrayRead(imageHandle.raw_handle, coords, arrayLayer); #else - // TODO: add SPIRV part for unsampled image array read + // TODO: add SPIRV part for unsampled image array read. #endif #else - assert(false); // Bindless images not yet implemented on host + assert(false); // Bindless images not yet implemented on host. #endif } @@ -1129,13 +1129,13 @@ void write_image_array(const unsampled_image_handle &imageHandle #ifdef __SYCL_DEVICE_ONLY__ #if defined(__NVPTX__) - __invoke__ImageArrayWrite((uint64_t)imageHandle.raw_handle, coords, - arrayLayer, detail::convert_color(color)); + __invoke__ImageArrayWrite(static_cast(imageHandle.raw_handle), + coords, arrayLayer, detail::convert_color(color)); #else - // TODO: add SPIRV part for unsampled image array write + // TODO: add SPIRV part for unsampled image array write. #endif #else - assert(false); // Bindless images not yet implemented on host + assert(false); // Bindless images not yet implemented on host. #endif } diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp index 76acca9e1dfd0..dfddc5479d3d0 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_descriptor.hpp @@ -102,58 +102,67 @@ struct image_descriptor { } void verify() const { - if (this->type == image_type::standard) { + switch (this->type) { + case image_type::standard: if (this->array_size > 1) { - // Not a standard image + // Not a standard image. throw sycl::exception( sycl::errc::invalid, "Standard images cannot have array_size greater than 1! Use " "image_type::array for image arrays."); } if (this->num_levels > 1) { - // Image arrays cannot be mipmaps + // Image arrays cannot be mipmaps. throw sycl::exception( sycl::errc::invalid, "Standard images cannot have num_levels greater than 1! Use " "image_type::mipmap for mipmap images."); } - } else if (this->type == image_type::array) { + return; + + case image_type::array: if (this->array_size <= 1) { - // Not an image array + // Not an image array. throw sycl::exception(sycl::errc::invalid, "Image array must have array_size greater than " "1! Use image_type::standard otherwise."); } if (this->depth != 0) { - // Image arrays must only be 1D or 2D + // Image arrays must only be 1D or 2D. throw sycl::exception(sycl::errc::invalid, "Cannot have 3D image arrays! Either depth must " "be 0 or array_size must be 1."); } if (this->num_levels != 1) { - // Image arrays cannot be mipmaps + // Image arrays cannot be mipmaps. throw sycl::exception(sycl::errc::invalid, "Cannot have mipmap image arrays! Either " "num_levels or array_size must be 1."); } - } else if (this->type == image_type::mipmap) { + return; + + case image_type::mipmap: if (this->array_size > 1) { - // Mipmap images cannot be arrays + // Mipmap images cannot be arrays. throw sycl::exception( sycl::errc::invalid, "Mipmap images cannot have array_size greater than 1! Use " "image_type::array for image arrays."); } if (this->num_levels <= 1) { - // Mipmaps must have more than one level + // Mipmaps must have more than one level. throw sycl::exception(sycl::errc::invalid, "Mipmap images must have num_levels greater than " "1! Use image_type::standard otherwise."); } - } else if (this->type == image_type::interop) { + return; + + case image_type::interop: // No checks to be made. - } else { - // Invalid image type + return; + + default: + // Invalid image type. throw sycl::exception(sycl::errc::invalid, "Invalid image descriptor image type"); } diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp index d3850e5a3c318..64cb0e3357de9 100644 --- a/sycl/source/detail/bindless_images.cpp +++ b/sycl/source/detail/bindless_images.cpp @@ -30,12 +30,7 @@ void populate_pi_structs(const image_descriptor &desc, pi_image_desc &piDesc, piDesc.image_depth = desc.depth; if (desc.array_size > 1) { - // Image Array - if (desc.depth > 0) { - // Image arrays must be 1D or 2D - throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), - "No support for 3D image arrays."); - } + // Image array. piDesc.image_type = desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { @@ -160,6 +155,8 @@ __SYCL_EXPORT void destroy_image_handle(sampled_image_handle &imageHandle, __SYCL_EXPORT image_mem_handle alloc_image_mem(const image_descriptor &desc, const sycl::device &syclDevice, const sycl::context &syclContext) { + desc.verify(); + std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); pi_context C = CtxImpl->getHandleRef(); @@ -168,8 +165,6 @@ alloc_image_mem(const image_descriptor &desc, const sycl::device &syclDevice, pi_device Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - desc.verify(); - pi_image_desc piDesc; pi_image_format piFormat; populate_pi_structs(desc, piDesc, piFormat); @@ -194,6 +189,8 @@ __SYCL_EXPORT_DEPRECATED("Distinct mipmap allocs are deprecated. " image_mem_handle alloc_mipmap_mem(const image_descriptor &desc, const sycl::device &syclDevice, const sycl::context &syclContext) { + desc.verify(); + std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); pi_context C = CtxImpl->getHandleRef(); @@ -202,11 +199,6 @@ image_mem_handle alloc_mipmap_mem(const image_descriptor &desc, pi_device Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - // Mipmaps must have more than one level - if (desc.num_levels <= 1) - throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), - "Mipmap number of levels must be 2 or more"); - pi_image_desc piDesc; pi_image_format piFormat; populate_pi_structs(desc, piDesc, piFormat); @@ -350,6 +342,8 @@ create_image(image_mem &imgMem, const image_descriptor &desc, __SYCL_EXPORT unsampled_image_handle create_image(image_mem_handle memHandle, const image_descriptor &desc, const sycl::device &syclDevice, const sycl::context &syclContext) { + desc.verify(); + std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); pi_context C = CtxImpl->getHandleRef(); @@ -414,6 +408,7 @@ __SYCL_EXPORT sampled_image_handle create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler, const image_descriptor &desc, const sycl::device &syclDevice, const sycl::context &syclContext) { + desc.verify(); std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); @@ -500,6 +495,8 @@ image_mem_handle map_external_image_memory(interop_mem_handle memHandle, const image_descriptor &desc, const sycl::device &syclDevice, const sycl::context &syclContext) { + desc.verify(); + std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); pi_context C = CtxImpl->getHandleRef(); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 55b052f308338..a6ad56a25bbdf 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -959,6 +959,8 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); + Desc.verify(); + MSrcPtr = Src; MDstPtr = Dest.raw_handle; @@ -969,13 +971,7 @@ void handler::ext_oneapi_copy( PiDesc.image_array_size = Desc.array_size; if (Desc.array_size > 1) { - // Image Array - if (Desc.depth > 0) { - // Image arrays must be 1D or 2D - throw sycl::exception( - sycl::make_error_code(sycl::errc::invalid), - "Image descriptor malformed - cannot copy 3D image arrays."); - } + // Image Array. PiDesc.image_type = Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { @@ -1010,6 +1006,8 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); + DestImgDesc.verify(); + MSrcPtr = Src; MDstPtr = Dest.raw_handle; @@ -1020,13 +1018,7 @@ void handler::ext_oneapi_copy( PiDesc.image_array_size = DestImgDesc.array_size; if (DestImgDesc.array_size > 1) { - // Image Array - if (DestImgDesc.depth > 0) { - // Image arrays must be 1D or 2D - throw sycl::exception( - sycl::make_error_code(sycl::errc::invalid), - "Image descriptor malformed - cannot copy 3D image arrays."); - } + // Image Array. PiDesc.image_type = DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { @@ -1059,6 +1051,8 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); + Desc.verify(); + MSrcPtr = Src.raw_handle; MDstPtr = Dest; @@ -1069,13 +1063,7 @@ void handler::ext_oneapi_copy( PiDesc.image_array_size = Desc.array_size; if (Desc.array_size > 1) { - // Image Array - if (Desc.depth > 0) { - // Image arrays must be 1D or 2D - throw sycl::exception( - sycl::make_error_code(sycl::errc::invalid), - "Image descriptor malformed - cannot copy 3D image arrays."); - } + // Image Array. PiDesc.image_type = Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { @@ -1110,6 +1098,8 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); + SrcImgDesc.verify(); + MSrcPtr = Src.raw_handle; MDstPtr = Dest; @@ -1120,13 +1110,7 @@ void handler::ext_oneapi_copy( PiDesc.image_array_size = SrcImgDesc.array_size; if (SrcImgDesc.array_size > 1) { - // Image Array - if (SrcImgDesc.depth > 0) { - // Image arrays must be 1D or 2D - throw sycl::exception( - sycl::make_error_code(sycl::errc::invalid), - "Image descriptor malformed - cannot copy 3D image arrays."); - } + // Image Array. PiDesc.image_type = SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { @@ -1159,6 +1143,8 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); + Desc.verify(); + MSrcPtr = Src; MDstPtr = Dest; @@ -1169,13 +1155,7 @@ void handler::ext_oneapi_copy( PiDesc.image_array_size = Desc.array_size; if (Desc.array_size > 1) { - // Image Array - if (Desc.depth > 0) { - // Image arrays must be 1D or 2D - throw sycl::exception( - sycl::make_error_code(sycl::errc::invalid), - "Image descriptor malformed - cannot copy 3D image arrays."); - } + // Image Array. PiDesc.image_type = Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { @@ -1212,6 +1192,8 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); + DeviceImgDesc.verify(); + MSrcPtr = Src; MDstPtr = Dest; @@ -1222,13 +1204,7 @@ void handler::ext_oneapi_copy( PiDesc.image_array_size = DeviceImgDesc.array_size; if (DeviceImgDesc.array_size > 1) { - // Image Array - if (DeviceImgDesc.depth > 0) { - // Image arrays must be 1D or 2D - throw sycl::exception( - sycl::make_error_code(sycl::errc::invalid), - "Image descriptor malformed - cannot copy 3D image arrays."); - } + // Image Array. PiDesc.image_type = DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; } else { diff --git a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp index 8ae505cfc0972..a804eb4d4eeaa 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp @@ -1,8 +1,7 @@ -// REQUIRES: linux // REQUIRES: cuda -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out -// RUN: %t.out +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out #include "../bindless_helpers.hpp" #include @@ -17,9 +16,9 @@ static sycl::device dev; namespace syclexp = sycl::ext::oneapi::experimental; -// Helpers and utilities +// Helpers and utilities. struct util { - // parallel_for 3D + // parallel_for 3D. template > static void run_ndim_test(sycl::queue q, sycl::range<3> globalSize, @@ -69,7 +68,7 @@ struct util { } } - // parallel_for 2D + // parallel_for 2D. template > static void run_ndim_test(sycl::queue q, sycl::range<2> globalSize, @@ -129,7 +128,7 @@ bool run_test(sycl::range dims, sycl::range localSize, sycl::queue q(dev); auto ctxt = q.get_context(); - // skip half tests if not supported + // skip half tests if not supported. if constexpr (std::is_same_v) { if (!dev.has(sycl::aspect::fp16)) { #ifdef VERBOSE_PRINT @@ -156,7 +155,7 @@ bool run_test(sycl::range dims, sycl::range localSize, CType, syclexp::image_type::array, 1, NDims > 2 ? dims[2] : dims[1]); - // Extension: allocate memory on device and create the handle + // Extension: allocate memory on device and create the handle. syclexp::image_mem img_mem_0(desc, q); syclexp::image_mem img_mem_1(desc, q); syclexp::image_mem img_mem_2(desc, q); @@ -165,7 +164,7 @@ bool run_test(sycl::range dims, sycl::range localSize, auto img_input_1 = syclexp::create_image(img_mem_1, desc, q); auto img_output = syclexp::create_image(img_mem_2, desc, q); - // Extension: copy over data to device + // Extension: copy over data to device. q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); q.ext_oneapi_copy(input_1.data(), img_mem_1.get_handle(), desc); q.wait(); @@ -181,7 +180,7 @@ bool run_test(sycl::range dims, sycl::range localSize, q.wait(); } - // Cleanup + // Cleanup. syclexp::destroy_image_handle(img_input_0, q); syclexp::destroy_image_handle(img_input_1, q); syclexp::destroy_image_handle(img_output, q); @@ -193,7 +192,7 @@ bool run_test(sycl::range dims, sycl::range localSize, exit(-1); } - // collect and validate output + // collect and validate output. bool validated = true; for (int i = 0; i < num_elems; i++) { for (int j = 0; j < NChannels; ++j) { From 5afc472d4d238063222d8eb663031f197670fac4 Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Tue, 20 Feb 2024 17:17:13 +0000 Subject: [PATCH 5/7] Fix merge * Vulkan test passing * `read_image_array` accepts `HintT` template --- .../sycl_ext_oneapi_bindless_images.asciidoc | 5 ++- .../sycl/ext/oneapi/bindless_images.hpp | 39 ++++++++++++------- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- .../vulkan_interop/unsampled_images.cpp | 2 +- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index edb9714cf832b..87bda22159ae3 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -1273,7 +1273,7 @@ passing the `unsampled_image_handle`, the coordinates, and the array index. ```c++ // Read an unsampled image array -template +template DataT read_image_array(const unsampled_image_handle &ImageHandle, const CoordT &Coords, const unsigned int ArrayLayer); ``` @@ -1281,6 +1281,9 @@ DataT read_image_array(const unsampled_image_handle &ImageHandle, Reading an image array follows the same restrictions on what coordinate types may be used as laid out in <>. +Reading an image array by providing a user-defined return `DataT` type also +follows the restrictions as laid out in <>. + [NOTE] ==== Attempting to read an image array with `read_image`, `read_mipmap` or any other diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index a6b3090569ed3..2ba445fff9c2a 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1034,6 +1034,10 @@ DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], * @brief Read an unsampled image array using its handle * * @tparam DataT The return type + * @tparam HintT A hint type that can be used to select for a specialized + * backend intrinsic when a user-defined type is passed as `DataT`. + * HintT should be a `sycl::vec` type, `sycl::half` type, or POD type. + * HintT must also have the same size as DataT. * @tparam CoordT The input coordinate type. e.g. int or int2 for 1D or 2D, * respectively * @param imageHandle The image handle @@ -1047,7 +1051,7 @@ DataT read_image(const sampled_image_handle &imageHandle [[maybe_unused]], * The name mangling should therefore not interfere with one * another */ -template +template DataT read_image_array(const unsampled_image_handle &imageHandle [[maybe_unused]], const CoordT &coords [[maybe_unused]], @@ -1059,12 +1063,18 @@ DataT read_image_array(const unsampled_image_handle &imageHandle "and 2D images respectively."); #ifdef __SYCL_DEVICE_ONLY__ -#if defined(__NVPTX__) - return __invoke__ImageArrayRead(imageHandle.raw_handle, coords, - arrayLayer); -#else - // TODO: add SPIRV part for unsampled image array read. -#endif + if constexpr (detail::is_recognized_standard_type()) { + return __invoke__ImageArrayRead(imageHandle.raw_handle, coords, + arrayLayer); + } else { + static_assert(sizeof(HintT) == sizeof(DataT), + "When trying to read a user-defined type, HintT must be of " + "the same size as the user-defined DataT."); + static_assert(detail::is_recognized_standard_type(), + "HintT must always be a recognized standard type"); + return sycl::bit_cast(__invoke__ImageArrayRead( + imageHandle.raw_handle, coords, arrayLayer)); + } #else assert(false); // Bindless images not yet implemented on host. #endif @@ -1128,12 +1138,15 @@ void write_image_array(const unsampled_image_handle &imageHandle "and 2D images respectively."); #ifdef __SYCL_DEVICE_ONLY__ -#if defined(__NVPTX__) - __invoke__ImageArrayWrite(static_cast(imageHandle.raw_handle), - coords, arrayLayer, detail::convert_color(color)); -#else - // TODO: add SPIRV part for unsampled image array write. -#endif + if constexpr (detail::is_recognized_standard_type()) { + __invoke__ImageArrayWrite(static_cast(imageHandle.raw_handle), + coords, arrayLayer, color); + } else { + // Convert DataT to a supported backend write type when user-defined type is + // passed + __invoke__ImageArrayWrite(static_cast(imageHandle.raw_handle), + coords, arrayLayer, detail::convert_color(color)); + } #else assert(false); // Bindless images not yet implemented on host. #endif diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 8e0866ce001b9..987b18fea0f58 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -62,7 +62,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) # Date: Tue Jan 23 12:34:08 2024 +0000 # # [Bindless][Exp] Add Support For Image Arrays - set(UNIFIED_RUNTIME_TAG 5181d72cdd60dc262f1f02423f2f7c8f1f45029d) + set(UNIFIED_RUNTIME_TAG 89ad3408f09ed22249f051795c91287f342193b4) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index 29c6e339ce4ae..67c042eb36137 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -189,7 +189,7 @@ void run_ndim_test(sycl::range global_size, handles.input_2, sycl::int3(dim0, dim1, dim2)); auto sum = VecType( - bindless_helpers::add_kernel(px1, px2)); + bindless_helpers::add_kernel(px1, px2)); syclexp::write_image( handles.output, sycl::int3(dim0, dim1, dim2), VecType(sum)); } else { From efd0fc5ec942a24a89334aefb61103f8e00f8895 Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Tue, 20 Feb 2024 18:56:17 +0000 Subject: [PATCH 6/7] Fix Merge * Use convertToOpenCLType helper --- sycl/include/sycl/detail/image_ocl_types.hpp | 22 ++++++++------------ 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index d0913f75365d5..ae95060493675 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -85,12 +85,11 @@ static RetType __invoke__ImageArrayRead(ImageT Img, CoordT Coords, // Convert from sycl types to builtin types to get correct function mangling. using TempRetT = sycl::detail::ConvertToOpenCLType_t; - using TempArgT = sycl::detail::ConvertToOpenCLType_t; + auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - TempArgT Arg = sycl::detail::convertDataToType(Coords); - TempRetT Ret = - __spirv_ImageArrayRead(Img, Arg, ArrayLayer); - return sycl::detail::convertDataToType(Ret); + return sycl::detail::convertFromOpenCLTypeFor( + __spirv_ImageArrayRead( + Img, TmpCoords, ArrayLayer)); } template @@ -98,14 +97,11 @@ static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer, ValT Val) { // Convert from sycl types to builtin types to get correct function mangling. - using TmpValT = sycl::detail::ConvertToOpenCLType_t; - using TmpCoordT = sycl::detail::ConvertToOpenCLType_t; - - TmpCoordT TmpCoord = - sycl::detail::convertDataToType(Coords); - TmpValT TmpVal = sycl::detail::convertDataToType(Val); - __spirv_ImageArrayWrite(Img, TmpCoord, ArrayLayer, - TmpVal); + auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + auto TmpVal = sycl::detail::convertToOpenCLType(Val); + + __spirv_ImageArrayWrite( + Img, TmpCoords, ArrayLayer, TmpVal); } template From 217545a7199cbfc05f908b025c117162cc34aaa8 Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Fri, 23 Feb 2024 15:13:18 +0000 Subject: [PATCH 7/7] Fix merge formatting --- sycl/include/sycl/detail/image_ocl_types.hpp | 2 +- sycl/include/sycl/ext/oneapi/bindless_images.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index def6cfeaf2d1d..93d557e509f47 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -81,7 +81,7 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) { template static RetType __invoke__ImageArrayFetch(ImageT Img, CoordT Coords, - int ArrayLayer) { + int ArrayLayer) { // Convert from sycl types to builtin types to get correct function mangling. using TempRetT = sycl::detail::ConvertToOpenCLType_t; diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index aff805d01a219..7f283cb14503c 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1130,7 +1130,7 @@ DataT fetch_image_array(const unsampled_image_handle &imageHandle #ifdef __SYCL_DEVICE_ONLY__ if constexpr (detail::is_recognized_standard_type()) { return __invoke__ImageArrayFetch(imageHandle.raw_handle, coords, - arrayLayer); + arrayLayer); } else { static_assert(sizeof(HintT) == sizeof(DataT), "When trying to fetch a user-defined type, HintT must be of "