summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernels/compiler_array4.cl9
-rw-r--r--utests/CMakeLists.txt1
-rw-r--r--utests/compiler_array4.cpp45
3 files changed, 55 insertions, 0 deletions
diff --git a/kernels/compiler_array4.cl b/kernels/compiler_array4.cl
new file mode 100644
index 00000000..6ddc9732
--- /dev/null
+++ b/kernels/compiler_array4.cl
@@ -0,0 +1,9 @@
+__kernel void
+compiler_array4(__global int4 *src4, __global int4 *dst4, int offset)
+{
+ int i;
+ int final[16];
+ __global int *dst = (__global int *)(dst4 + offset + get_global_id(0));
+ __global int *src = (__global int *)(src4 + offset + get_global_id(0));
+ dst[-4] = src[-4];
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 4ad80f09..603338f6 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -36,6 +36,7 @@ set (utests_sources
compiler_array1.cpp
compiler_array2.cpp
compiler_array3.cpp
+ compiler_array4.cpp
compiler_byte_scatter.cpp
compiler_ceil.cpp
compiler_clz_short.cpp
diff --git a/utests/compiler_array4.cpp b/utests/compiler_array4.cpp
new file mode 100644
index 00000000..51b6d609
--- /dev/null
+++ b/utests/compiler_array4.cpp
@@ -0,0 +1,45 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ dst[global_id * 4] = src[global_id * 4];
+}
+
+void compiler_array4(void)
+{
+ const size_t n = 16;
+ int cpu_dst[64], cpu_src[64];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_array4");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t) * 4, NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t) * 4, NULL);
+ uint32_t offset = 1;
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(offset), &offset);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 8; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i * 4] = ((int32_t*)buf_data[0])[i * 4] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i) {
+ OCL_ASSERT(((int32_t*)buf_data[1])[i * 4] == cpu_dst[i * 4]);
+ }
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_array4);