summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-11-18 14:06:47 +0800
committerYang Rong <rong.r.yang@intel.com>2015-11-25 13:22:16 +0800
commit693669c9a73f0f8a724e0561ea0418e12a88c8ca (patch)
tree3c6e18ed9f8adcfe2ffd74d3abda86a992d6269e
parentdf210fe2df30a1aabecb545f9ba44b39610b32dc (diff)
Add utest for workgroup_broadcast.
Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--kernels/compiler_workgroup_broadcast.cl9
-rw-r--r--utests/CMakeLists.txt1
-rw-r--r--utests/compiler_workgroup_broadcast.cpp47
3 files changed, 57 insertions, 0 deletions
diff --git a/kernels/compiler_workgroup_broadcast.cl b/kernels/compiler_workgroup_broadcast.cl
new file mode 100644
index 00000000..4df74e31
--- /dev/null
+++ b/kernels/compiler_workgroup_broadcast.cl
@@ -0,0 +1,9 @@
+kernel void compiler_workgroup_broadcast(global uint *src, global uint *dst) {
+ uint val = src[get_group_id(0)*(get_local_size(1) * get_local_size(0))
+ + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
+ + get_local_id(1)* get_local_size(0) + get_local_id(0)];
+ uint bv = work_group_broadcast(val, 8, 3);
+ dst[get_group_id(0)*(get_local_size(1) * get_local_size(0))
+ + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
+ + get_local_id(1)* get_local_size(0) + get_local_id(0)] = bv;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index b3a051d8..b4f8407f 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -119,6 +119,7 @@ set (utests_sources
compiler_math.cpp
compiler_atomic_functions.cpp
compiler_async_copy.cpp
+ compiler_workgroup_broadcast.cpp
compiler_async_stride_copy.cpp
compiler_insn_selection_min.cpp
compiler_insn_selection_max.cpp
diff --git a/utests/compiler_workgroup_broadcast.cpp b/utests/compiler_workgroup_broadcast.cpp
new file mode 100644
index 00000000..d45e5d89
--- /dev/null
+++ b/utests/compiler_workgroup_broadcast.cpp
@@ -0,0 +1,47 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+void compiler_workgroup_broadcast(void)
+{
+ const size_t n0 = 32;
+ const size_t n1 = 16;
+ const size_t n = n0 * n1;
+ uint32_t src[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_workgroup_broadcast");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n0;
+ globals[1] = n1;
+ locals[0] = 16;
+ locals[1] = 16;
+
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ src[i] = i;
+ }
+ OCL_MAP_BUFFER(0);
+ memcpy(buf_data[0], src, sizeof(src));
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(2);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < (int32_t) n/2; ++i) {
+// printf("%u ", ((uint32_t *)buf_data[1])[i]);
+ OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 56);
+ }
+ for (int32_t i = n/2; i < (int32_t) n; ++i) {
+ // printf("%u ", ((uint32_t *)buf_data[1])[i]);
+ OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 312);
+ }
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast);