summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernels/compiler_device_enqueue.cl18
-rw-r--r--utests/CMakeLists.txt3
-rw-r--r--utests/compiler_device_enqueue.cpp36
3 files changed, 56 insertions, 1 deletions
diff --git a/kernels/compiler_device_enqueue.cl b/kernels/compiler_device_enqueue.cl
new file mode 100644
index 00000000..cb201421
--- /dev/null
+++ b/kernels/compiler_device_enqueue.cl
@@ -0,0 +1,18 @@
+void block_fn(__global uint* val)
+{
+ atomic_add(val, get_global_id(0));
+}
+
+kernel void compiler_device_enqueue(uint glob_size_arr, __global uint* val)
+{
+ size_t tid = get_global_id(0);
+
+ for(int i = 0; i < glob_size_arr; i++)
+ {
+ ndrange_t ndrange = ndrange_1D(glob_size_arr);
+ __global uint * v = val + tid;
+ void (^kernelBlock)(void) = ^{ block_fn(v); };
+ queue_t q = get_default_queue();
+ enqueue_kernel(q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
+ }
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index a1cf8e9f..339f16fb 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -229,7 +229,8 @@ set (utests_sources
compiler_get_sub_group_id.cpp
compiler_sub_group_shuffle.cpp
runtime_pipe_query.cpp
- compiler_pipe_builtin.cpp)
+ compiler_pipe_builtin.cpp
+ compiler_device_enqueue.cpp)
if (LLVM_VERSION_NODOT VERSION_GREATER 34)
SET(utests_sources
diff --git a/utests/compiler_device_enqueue.cpp b/utests/compiler_device_enqueue.cpp
new file mode 100644
index 00000000..a9e3e2df
--- /dev/null
+++ b/utests/compiler_device_enqueue.cpp
@@ -0,0 +1,36 @@
+#include "utest_helper.hpp"
+
+void compiler_device_enqueue(void)
+{
+ const size_t n = 32;
+ const uint32_t global_sz = 3;
+ uint32_t result = 0;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_device_enqueue");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(uint32_t), &global_sz);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]);
+
+ OCL_MAP_BUFFER(0);
+ for(uint32_t i = 0; i < 69; ++i)
+ ((short *)buf_data[0])[i] = 0;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel
+ globals[0] = n;
+ locals[0] = 16;
+ OCL_NDRANGE(1);
+
+ for(uint32_t i = 0; i < global_sz; ++i) {
+ result += i;
+ }
+ result *= global_sz;
+
+ OCL_MAP_BUFFER(0);
+ for (uint32_t i = 0; i < n; ++i)
+ OCL_ASSERT(((uint32_t *)buf_data[0])[i] == result);
+ OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_device_enqueue);