summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander <rander.wang@intel.com>2017-05-23 10:03:27 +0800
committerYang Rong <rong.r.yang@intel.com>2017-06-09 20:03:01 +0800
commit05cff5c5475c3f43770b3dabc9119f0136936564 (patch)
treea64851cb30ac441920cfc8539ea22d5085acfd74
parentf8053378a254e0eac9b5b2188e81a0308e290647 (diff)
utests: added for optimization negativeAdd
the negative Add is like: exp -a llvm transfer it to: add x -a, 0 exp x Signed-off-by: rander.wang <rander.wang@intel.com> Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
-rw-r--r--kernels/compiler_remove_negative_add.cl4
-rw-r--r--utests/CMakeLists.txt3
-rw-r--r--utests/compiler_remove_negative_add.cpp40
3 files changed, 46 insertions, 1 deletions
diff --git a/kernels/compiler_remove_negative_add.cl b/kernels/compiler_remove_negative_add.cl
new file mode 100644
index 00000000..d6f72706
--- /dev/null
+++ b/kernels/compiler_remove_negative_add.cl
@@ -0,0 +1,4 @@
+kernel void compiler_remove_negative_add(global float *src, global float *dst) {
+ int i = get_global_id(0);
+ dst[i] = exp2(-src[i]);
+};
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index f8abd457..afef07fc 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -304,7 +304,8 @@ set (utests_sources
runtime_pipe_query.cpp
compiler_pipe_builtin.cpp
compiler_device_enqueue.cpp
- compiler_sqrt_div.cpp)
+ compiler_sqrt_div.cpp
+ compiler_remove_negative_add.cpp)
if (LLVM_VERSION_NODOT VERSION_GREATER 34)
SET(utests_sources
diff --git a/utests/compiler_remove_negative_add.cpp b/utests/compiler_remove_negative_add.cpp
new file mode 100644
index 00000000..2b5df731
--- /dev/null
+++ b/utests/compiler_remove_negative_add.cpp
@@ -0,0 +1,40 @@
+#include "utest_helper.hpp"
+#include <cmath>
+
+void compiler_remove_negative_add(void) {
+ const int n = 1024;
+ float src[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_remove_negative_add");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ for (int j = 0; j < 1024; j++) {
+ OCL_MAP_BUFFER(0);
+ for (int i = 0; i < n; ++i) {
+ src[i] = ((float *)buf_data[0])[i] = (j * n + i + 1) * 0.001f;
+ }
+ OCL_UNMAP_BUFFER(0);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(1);
+ float *dst = (float *)buf_data[1];
+ for (int i = 0; i < n; ++i) {
+ float cpu = exp2(-src[i]);
+ float gpu = dst[i];
+ if (fabsf(cpu - gpu) >= 1e-3) {
+ printf("%f %f %f", src[i], cpu, gpu);
+ OCL_ASSERT(0);
+ }
+ }
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_remove_negative_add);