summaryrefslogtreecommitdiff
path: root/utests/compiler_array2.cpp
diff options
context:
space:
mode:
authorbsegovia <segovia.benjamin@gmail.com>2012-09-17 12:46:31 +0000
committerbsegovia <segovia.benjamin@gmail.com>2012-09-17 12:46:31 +0000
commitc27f1b7668c068621e5ebb4282704afbdbf907be (patch)
treed21b295dacebca60326df06c0568e5f39f75f354 /utests/compiler_array2.cpp
parent74da5c20ee935149c5be1b1b70a8869cacc27cd7 (diff)
Added support for some Gen extensions. The idea is mostly to play with Gen
specific hardware and propose simple way to use them. We have three extensions here: - Gen register regions. This allows us to perform strided loads in the register file. To implement that on top of OCL, the idea is to encapsulate them in a function with a side effect. Not really clean but it works. - Gen gather from register file. Same idea but here we simply gather data from a bunch of registers - Vote any/all. This is basically the same idea as ptx i.e. uniform predicates for branches. - block read/write. Just to play with uniform load/store messages I added a bunch of tests for all that and fix thing here and there to make them work
Diffstat (limited to 'utests/compiler_array2.cpp')
-rw-r--r--utests/compiler_array2.cpp50
1 files changed, 50 insertions, 0 deletions
diff --git a/utests/compiler_array2.cpp b/utests/compiler_array2.cpp
new file mode 100644
index 00000000..61ca9da8
--- /dev/null
+++ b/utests/compiler_array2.cpp
@@ -0,0 +1,50 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ int final[16];
+ int array[16];
+ for (int j = 0; j < 16; ++j) array[j] = j;
+ for (int j = 0; j < 16; ++j) final[j] = j+1;
+ if (global_id == 15)
+ dst[global_id] = final[global_id];
+ else
+ dst[global_id] = array[15 - global_id];
+}
+
+void compiler_array2(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_array2");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 8; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_array2);
+