diff options
-rw-r--r-- | backend/src/libocl/tmpl/ocl_simd.tmpl.h | 2 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 2 | ||||
-rw-r--r-- | kernels/compiler_get_max_sub_group_size.cl | 5 | ||||
-rw-r--r-- | kernels/compiler_get_sub_group_id.cl | 2 | ||||
-rw-r--r-- | kernels/compiler_get_sub_group_size.cl | 5 | ||||
-rw-r--r-- | kernels/compiler_sub_group_shuffle.cl | 4 | ||||
-rw-r--r-- | src/kernels/cl_internal_block_motion_estimate_intel.cl | 2 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 2 | ||||
-rw-r--r-- | utests/compiler_get_max_sub_group_size.cpp (renamed from utests/compiler_get_sub_group_size.cpp) | 6 |
9 files changed, 15 insertions, 15 deletions
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index 67a1ceeb..40550709 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -26,7 +26,7 @@ int sub_group_any(int); int sub_group_all(int); -uint get_sub_group_size(void); +uint get_max_sub_group_size(void); uint get_sub_group_id(void); OVERLOADABLE float intel_sub_group_shuffle(float x, uint c); diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 8023744b..046e1ae3 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -161,7 +161,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F16_TO_U32, _Z16convert_uint_satDh) // SIMD level function for internal usage DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all) -DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_sub_group_size) +DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_max_sub_group_size) DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_id) DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle) diff --git a/kernels/compiler_get_max_sub_group_size.cl b/kernels/compiler_get_max_sub_group_size.cl new file mode 100644 index 00000000..8fb263bb --- /dev/null +++ b/kernels/compiler_get_max_sub_group_size.cl @@ -0,0 +1,5 @@ +__kernel void compiler_get_max_sub_group_size(global int *dst) +{ + int i = get_global_id(0); + dst[i] = get_max_sub_group_size(); +} diff --git a/kernels/compiler_get_sub_group_id.cl b/kernels/compiler_get_sub_group_id.cl index 10033ff0..afaa2a64 100644 --- a/kernels/compiler_get_sub_group_id.cl +++ b/kernels/compiler_get_sub_group_id.cl @@ -2,7 +2,7 @@ __kernel void compiler_get_sub_group_id(global int *dst) { int i = get_global_id(0); if (i == 0) - dst[0] = get_sub_group_size(); + dst[0] = get_max_sub_group_size(); dst[i+1] = get_sub_group_id(); } diff --git a/kernels/compiler_get_sub_group_size.cl b/kernels/compiler_get_sub_group_size.cl deleted file mode 100644 index 4d5e3ebc..00000000 --- a/kernels/compiler_get_sub_group_size.cl +++ /dev/null @@ -1,5 +0,0 @@ -__kernel void compiler_get_sub_group_size(global int *dst) -{ - int i = get_global_id(0); - dst[i] = get_sub_group_size(); -} diff --git a/kernels/compiler_sub_group_shuffle.cl b/kernels/compiler_sub_group_shuffle.cl index 75adde3c..a171faa6 100644 --- a/kernels/compiler_sub_group_shuffle.cl +++ b/kernels/compiler_sub_group_shuffle.cl @@ -2,11 +2,11 @@ __kernel void compiler_sub_group_shuffle(global int *dst, int c) { int i = get_global_id(0); if (i == 0) - dst[0] = get_sub_group_size(); + dst[0] = get_max_sub_group_size(); dst++; int from = i; - int j = get_sub_group_size() - get_sub_group_id() - 1; + int j = get_max_sub_group_size() - get_sub_group_id() - 1; int o0 = get_sub_group_id(); int o1 = intel_sub_group_shuffle(from, c); int o2 = intel_sub_group_shuffle(from, 5); diff --git a/src/kernels/cl_internal_block_motion_estimate_intel.cl b/src/kernels/cl_internal_block_motion_estimate_intel.cl index 1f28f4e2..23c5488e 100644 --- a/src/kernels/cl_internal_block_motion_estimate_intel.cl +++ b/src/kernels/cl_internal_block_motion_estimate_intel.cl @@ -262,7 +262,7 @@ void block_motion_estimate_intel(accelerator_intel_t accel, ushort res[16]; uint write_back_dwx; - uint simd_width = get_sub_group_size(); + uint simd_width = get_max_sub_group_size(); /* In simd 8 mode, one kernel variable 'uint' map to 8 dword. * In simd 16 mode, one kernel variable 'uint' map to 16 dword. diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index d846b7b7..2c6aea47 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -220,7 +220,7 @@ set (utests_sources runtime_use_host_ptr_buffer.cpp runtime_alloc_host_ptr_buffer.cpp runtime_use_host_ptr_image.cpp - compiler_get_sub_group_size.cpp + compiler_get_max_sub_group_size.cpp compiler_get_sub_group_id.cpp compiler_sub_group_shuffle.cpp builtin_global_linear_id.cpp diff --git a/utests/compiler_get_sub_group_size.cpp b/utests/compiler_get_max_sub_group_size.cpp index 20339d72..debdf940 100644 --- a/utests/compiler_get_sub_group_size.cpp +++ b/utests/compiler_get_max_sub_group_size.cpp @@ -1,11 +1,11 @@ #include "utest_helper.hpp" -void compiler_get_sub_group_size(void) +void compiler_get_max_sub_group_size(void) { const size_t n = 256; // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_get_sub_group_size"); + OCL_CREATE_KERNEL("compiler_get_max_sub_group_size"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); @@ -29,4 +29,4 @@ void compiler_get_sub_group_size(void) OCL_UNMAP_BUFFER(0); } -MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_size); +MAKE_UTEST_FROM_FUNCTION(compiler_get_max_sub_group_size); |