diff options
author | rander <rander.wang@intel.com> | 2017-05-23 10:03:27 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-06-09 20:03:01 +0800 |
commit | 05cff5c5475c3f43770b3dabc9119f0136936564 (patch) | |
tree | a64851cb30ac441920cfc8539ea22d5085acfd74 | |
parent | f8053378a254e0eac9b5b2188e81a0308e290647 (diff) |
utests: added for optimization negativeAdd
the negative Add is like:
exp -a
llvm transfer it to:
add x -a, 0
exp x
Signed-off-by: rander.wang <rander.wang@intel.com>
Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
-rw-r--r-- | kernels/compiler_remove_negative_add.cl | 4 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 3 | ||||
-rw-r--r-- | utests/compiler_remove_negative_add.cpp | 40 |
3 files changed, 46 insertions, 1 deletions
diff --git a/kernels/compiler_remove_negative_add.cl b/kernels/compiler_remove_negative_add.cl new file mode 100644 index 00000000..d6f72706 --- /dev/null +++ b/kernels/compiler_remove_negative_add.cl @@ -0,0 +1,4 @@ +kernel void compiler_remove_negative_add(global float *src, global float *dst) { + int i = get_global_id(0); + dst[i] = exp2(-src[i]); +}; diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index f8abd457..afef07fc 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -304,7 +304,8 @@ set (utests_sources runtime_pipe_query.cpp compiler_pipe_builtin.cpp compiler_device_enqueue.cpp - compiler_sqrt_div.cpp) + compiler_sqrt_div.cpp + compiler_remove_negative_add.cpp) if (LLVM_VERSION_NODOT VERSION_GREATER 34) SET(utests_sources diff --git a/utests/compiler_remove_negative_add.cpp b/utests/compiler_remove_negative_add.cpp new file mode 100644 index 00000000..2b5df731 --- /dev/null +++ b/utests/compiler_remove_negative_add.cpp @@ -0,0 +1,40 @@ +#include "utest_helper.hpp" +#include <cmath> + +void compiler_remove_negative_add(void) { + const int n = 1024; + float src[n]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_remove_negative_add"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + for (int j = 0; j < 1024; j++) { + OCL_MAP_BUFFER(0); + for (int i = 0; i < n; ++i) { + src[i] = ((float *)buf_data[0])[i] = (j * n + i + 1) * 0.001f; + } + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(1); + float *dst = (float *)buf_data[1]; + for (int i = 0; i < n; ++i) { + float cpu = exp2(-src[i]); + float gpu = dst[i]; + if (fabsf(cpu - gpu) >= 1e-3) { + printf("%f %f %f", src[i], cpu, gpu); + OCL_ASSERT(0); + } + } + OCL_UNMAP_BUFFER(1); + } +} + +MAKE_UTEST_FROM_FUNCTION(compiler_remove_negative_add); |