summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2015-05-12 20:44:08 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-05-13 07:43:29 +0800
commit1257a871669ea3e5ab9f3bdc885d69d41c6a522c (patch)
tree13fbf225ef244da9bf4c86b7d3e3114b9d11de2b
parente8a7f0e248bdb0a2e579535fc8fd09ae2cc3c3a5 (diff)
add utest for intel_sub_group_shuffle
v2: correct kernel to be suitable for simd_width both 8 and 16 Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r--kernels/compiler_sub_group_shuffle.cl18
-rw-r--r--utests/CMakeLists.txt3
-rw-r--r--utests/compiler_sub_group_shuffle.cpp45
3 files changed, 65 insertions, 1 deletions
diff --git a/kernels/compiler_sub_group_shuffle.cl b/kernels/compiler_sub_group_shuffle.cl
new file mode 100644
index 00000000..75adde3c
--- /dev/null
+++ b/kernels/compiler_sub_group_shuffle.cl
@@ -0,0 +1,18 @@
+__kernel void compiler_sub_group_shuffle(global int *dst, int c)
+{
+ int i = get_global_id(0);
+ if (i == 0)
+ dst[0] = get_sub_group_size();
+ dst++;
+
+ int from = i;
+ int j = get_sub_group_size() - get_sub_group_id() - 1;
+ int o0 = get_sub_group_id();
+ int o1 = intel_sub_group_shuffle(from, c);
+ int o2 = intel_sub_group_shuffle(from, 5);
+ int o3 = intel_sub_group_shuffle(from, j);
+ dst[i*4] = o0;
+ dst[i*4+1] = o1;
+ dst[i*4+2] = o2;
+ dst[i*4+3] = o3;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index d6ec88a8..899b52c3 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -210,7 +210,8 @@ set (utests_sources
runtime_use_host_ptr_buffer.cpp
runtime_alloc_host_ptr_buffer.cpp
compiler_get_sub_group_size.cpp
- compiler_get_sub_group_id.cpp)
+ compiler_get_sub_group_id.cpp
+ compiler_sub_group_shuffle.cpp)
if (LLVM_VERSION_NODOT VERSION_GREATER 34)
SET(utests_sources
diff --git a/utests/compiler_sub_group_shuffle.cpp b/utests/compiler_sub_group_shuffle.cpp
new file mode 100644
index 00000000..4ba8b998
--- /dev/null
+++ b/utests/compiler_sub_group_shuffle.cpp
@@ -0,0 +1,45 @@
+#include "utest_helper.hpp"
+
+void compiler_sub_group_shuffle(void)
+{
+ const size_t n = 32;
+ const int32_t buf_size = 4 * n + 1;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_sub_group_shuffle");
+ OCL_CREATE_BUFFER(buf[0], 0, buf_size * sizeof(int), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+
+ int c = 3;
+ OCL_SET_ARG(1, sizeof(int), &c);
+
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < buf_size; ++i)
+ ((int*)buf_data[0])[i] = -1;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(0);
+ int* dst = (int *)buf_data[0];
+ int suggroupsize = dst[0];
+ OCL_ASSERT(suggroupsize == 8 || suggroupsize == 16);
+
+ dst++;
+ for (int32_t i = 0; i < (int32_t) n; ++i){
+ int round = i / suggroupsize;
+ int index = i % suggroupsize;
+ OCL_ASSERT(index == dst[4*i]);
+ OCL_ASSERT((round * suggroupsize + c) == dst[4*i+1]);
+ OCL_ASSERT((round * suggroupsize + 5) == dst[4*i+2]);
+ OCL_ASSERT((round * suggroupsize + (suggroupsize - index - 1)) == dst[4*i+3]);
+ }
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_shuffle);