diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-12-01 16:10:38 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-12-14 15:11:53 +0800 |
commit | 3c083db0b9b54b838a17843c78f9be437016847c (patch) | |
tree | 0af239a269d02f9f1e0568fc87c6aa20dae6740b | |
parent | 52fda87c89d7631e9283ce94ae1ec1b6b3ad5621 (diff) |
Utests: Add test cases for workgroup reduce max/min.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | kernels/compiler_workgroup_reduce.cl | 28 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | utests/compiler_workgroup_reduce.cpp | 174 |
3 files changed, 203 insertions, 0 deletions
diff --git a/kernels/compiler_workgroup_reduce.cl b/kernels/compiler_workgroup_reduce.cl new file mode 100644 index 00000000..27d306bc --- /dev/null +++ b/kernels/compiler_workgroup_reduce.cl @@ -0,0 +1,28 @@ +kernel void compiler_workgroup_reduce_min_uniform(uint src, global uint *dst) { + uint min_val = work_group_reduce_min(src); + dst[get_local_id(0)] = min_val; +} + +kernel void compiler_workgroup_reduce_min_uint(global uint *src, global uint *dst) { + uint val = src[get_local_id(0)]; + uint min_val = work_group_reduce_min(val); + dst[get_local_id(0)] = min_val; +} + +kernel void compiler_workgroup_reduce_max_uint(global uint *src, global uint *dst) { + uint val = src[get_local_id(0)]; + uint max_val = work_group_reduce_max(val); + dst[get_local_id(0)] = max_val; +} + +kernel void compiler_workgroup_reduce_min_float(global float *src, global float *dst) { + float val = src[get_local_id(0)]; + float min_val = work_group_reduce_min(val); + dst[get_local_id(0)] = min_val; +} + +kernel void compiler_workgroup_reduce_max_float(global float *src, global float *dst) { + float val = src[get_local_id(0)]; + float max_val = work_group_reduce_max(val); + dst[get_local_id(0)] = max_val; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 5b4657c0..74189c62 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -120,6 +120,7 @@ set (utests_sources compiler_atomic_functions.cpp compiler_async_copy.cpp compiler_workgroup_broadcast.cpp + compiler_workgroup_reduce.cpp compiler_async_stride_copy.cpp compiler_insn_selection_min.cpp compiler_insn_selection_max.cpp diff --git a/utests/compiler_workgroup_reduce.cpp b/utests/compiler_workgroup_reduce.cpp new file mode 100644 index 00000000..6340cb2a --- /dev/null +++ b/utests/compiler_workgroup_reduce.cpp @@ -0,0 +1,174 @@ +#include <cstdint> +#include <cstring> +#include <iostream> +#include "utest_helper.hpp" + +void compiler_workgroup_reduce_min_uniform(void) +{ + const size_t n = 17; + uint32_t src = 253; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_min_uniform"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); + OCL_SET_ARG(0, sizeof(uint32_t), &src); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]); + globals[0] = n; + locals[0] = n; + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(0); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%u ", ((uint32_t *)buf_data[0])[i]); + OCL_ASSERT(((uint32_t *)buf_data[0])[i] == 253); + } + OCL_UNMAP_BUFFER(0); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_min_uniform); + +static uint32_t test_array_uint[64] = {23, 34, 16, 91, 25, 133, 7787, 134, 987, 9853, 33, 21, 865, 1441, 9083, 812, + 10, 43435, 63, 445, 253, 65, 24, 30, 76, 989, 120 ,113 ,133, 41, 18, 91, + 8321, 6712, 881, 911, 5, 788, 8991, 88, 19, 1110, 1231, 1341, 1983, 1983, 91, 212, + 712, 31, 881, 963, 6801, 651, 9810, 77, 98, 5, 16, 1888, 141, 1613, 1771, 16}; + +void compiler_workgroup_reduce_min_uint(void) +{ + const size_t n = 60; + uint32_t* src = test_array_uint; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_min_uint"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = n; + + OCL_MAP_BUFFER(0); + memcpy(buf_data[0], src, n * sizeof(uint32_t)); + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%u ", ((uint32_t *)buf_data[1])[i]); + OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 5); + } + OCL_UNMAP_BUFFER(1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_min_uint); + +void compiler_workgroup_reduce_max_uint(void) +{ + const size_t n = 60; + uint32_t* src = test_array_uint; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_max_uint"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = n; + + OCL_MAP_BUFFER(0); + memcpy(buf_data[0], src, n * sizeof(uint32_t)); + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%u ", ((uint32_t *)buf_data[1])[i]); + OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 43435); + } + OCL_UNMAP_BUFFER(1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_max_uint); + +static float test_array_float[64] = + {1.0234f, 0.34e32f, -13441.4334f, 1893.21f, -9999.0f, -88.00f, 1.3f, 1.0f, + 2.33f, 134.44f, 263.0f, 1.0f, 0.0f, 344.900043f, 0.1e30f, 1.0e10f, + + 10.0f, 43.435f, 6.3f, 44.545f, 0.253f, 6.5f, 0.24f, 10.30f, + 1312.76f, -0.00989f, 124213.120f, 1.13f, 1.33f, 4.1f, 1.8f, 3234.91f, + + 3.21e38f, 6.712f, 0.881f, 12.91f, 5.0f, 7.88f, 128991.0f, 8.8f, + 0.0019f, -0.1110f, 12.0e31f, -3.3E38f, 1.983f, 1.983f, 10091.0f, 2.12f, + + 0.88712, 1e31f, -881.0f, -196e3f, 68.01f, -651.121f, 9.810f, -0.77f, + 100.98f, 50.0f, 1000.16f, -18e18f, 0.141f, 1613.0f, 1.771f, -16.13f}; + +void compiler_workgroup_reduce_min_float(void) +{ + const size_t n = 60; + float* src = test_array_float; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_min_float"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = n; + + OCL_MAP_BUFFER(0); + memcpy(buf_data[0], src, n * sizeof(float)); + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%f ", ((float *)buf_data[1])[i]); + OCL_ASSERT(((float *)buf_data[1])[i] == -3.3E38f); + } + OCL_UNMAP_BUFFER(1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_min_float); + +void compiler_workgroup_reduce_max_float(void) +{ + const size_t n = 60; + float* src = test_array_float; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce", "compiler_workgroup_reduce_max_float"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = n; + + OCL_MAP_BUFFER(0); + memcpy(buf_data[0], src, n * sizeof(float)); + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%f ", ((float *)buf_data[1])[i]); + OCL_ASSERT(((float *)buf_data[1])[i] == 3.21e38f); + } + OCL_UNMAP_BUFFER(1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_max_float); + |