diff options
author | Yang Rong <rong.r.yang@intel.com> | 2013-10-21 15:47:56 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2013-10-21 15:37:20 +0800 |
commit | e74c945b3ce98c785a390d0343ce0408360f8211 (patch) | |
tree | 6f9e90e70c514436ab3ce797a972042f3858a31b | |
parent | baec33cc8ed361575bc3ffbe3f61b08892f87257 (diff) |
Add more type for async copy test case.
Signed-off-by: Yang Rong <rong.r.yang@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | kernels/compiler_async_copy.cl | 38 | ||||
-rw-r--r-- | utests/compiler_async_copy.cpp | 86 |
2 files changed, 74 insertions, 50 deletions
diff --git a/kernels/compiler_async_copy.cl b/kernels/compiler_async_copy.cl index a2432a46..06ec8e75 100644 --- a/kernels/compiler_async_copy.cl +++ b/kernels/compiler_async_copy.cl @@ -1,16 +1,24 @@ -__kernel void -compiler_async_copy(__global int2 *dst, __global int2 *src, __local int2 *localBuffer, int copiesPerWorkItem) -{ - event_t event; - int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); - int i; - event = async_work_group_copy((__local int2*)localBuffer, (__global const int2*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 ); - wait_group_events( 1, &event ); - - for(i=0; i<copiesPerWorkItem; i++) - localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] + (int2)(3, 3); - barrier(CLK_LOCAL_MEM_FENCE); - - event = async_work_group_copy((__global int2*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const int2*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); - wait_group_events( 1, &event ); +#define DEF(TYPE) \ +kernel void \ +compiler_async_copy_##TYPE(__global TYPE *dst, __global TYPE *src, __local TYPE *localBuffer, int copiesPerWorkItem) \ +{ \ + event_t event; \ + int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); \ + int i; \ + event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 ); \ + wait_group_events( 1, &event ); \ +\ + event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); \ + wait_group_events( 1, &event ); \ } + +DEF(char2); +DEF(uchar2); +DEF(short2); +DEF(ushort2); +DEF(int2); +DEF(uint2); +DEF(long2); +DEF(ulong2); +DEF(float2); +DEF(double2); diff --git a/utests/compiler_async_copy.cpp b/utests/compiler_async_copy.cpp index 9384f85b..7951ff74 100644 --- a/utests/compiler_async_copy.cpp +++ b/utests/compiler_async_copy.cpp @@ -1,39 +1,55 @@ #include "utest_helper.hpp" +#include <stdint.h> -static void compiler_async_copy(void) -{ - const size_t n = 1024; - const size_t local_size = 32; - const int copiesPerWorkItem = 5; +typedef unsigned char uchar; +typedef unsigned short ushort; - // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_async_copy"); - OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(int) * 2, NULL); - OCL_CREATE_BUFFER(buf[1], 0, n * copiesPerWorkItem * sizeof(int) * 2, NULL); - OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); - OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); - OCL_SET_ARG(2, local_size*copiesPerWorkItem*sizeof(int)*2, NULL); - OCL_SET_ARG(3, sizeof(int), &copiesPerWorkItem); +#define DEF(TYPE, KER_TYPE, VEC_SIZE) \ +static void compiler_async_copy_##KER_TYPE##VEC_SIZE(void) \ +{ \ + const size_t n = 1024; \ + const size_t local_size = 32; \ + const int copiesPerWorkItem = 5; \ +\ + /* Setup kernel and buffers */\ + OCL_CREATE_KERNEL_FROM_FILE("compiler_async_copy", "compiler_async_copy_" # KER_TYPE # VEC_SIZE); \ + OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(TYPE) * VEC_SIZE, NULL); \ + OCL_CREATE_BUFFER(buf[1], 0, n * copiesPerWorkItem * sizeof(TYPE) * VEC_SIZE, NULL); \ + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ + OCL_SET_ARG(2, local_size*copiesPerWorkItem*sizeof(TYPE)*VEC_SIZE, NULL); \ + OCL_SET_ARG(3, sizeof(int), &copiesPerWorkItem); \ +\ + OCL_MAP_BUFFER(1); \ + for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; ++i) \ + ((TYPE*)buf_data[1])[i] = rand(); \ + OCL_UNMAP_BUFFER(1); \ +\ + /* Run the kernel */\ + globals[0] = n; \ + locals[0] = local_size; \ + OCL_NDRANGE(1); \ + OCL_MAP_BUFFER(0); \ + OCL_MAP_BUFFER(1); \ +\ + /* Check results */\ + TYPE *dst = (TYPE*)buf_data[0]; \ + TYPE *src = (TYPE*)buf_data[1]; \ + for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; i++) \ + OCL_ASSERT(dst[i] == src[i]); \ + OCL_UNMAP_BUFFER(0); \ + OCL_UNMAP_BUFFER(1); \ +} \ +\ +MAKE_UTEST_FROM_FUNCTION(compiler_async_copy_##KER_TYPE##VEC_SIZE); - OCL_MAP_BUFFER(1); - for (uint32_t i = 0; i < n * copiesPerWorkItem * 2; ++i) - ((int*)buf_data[1])[i] = rand(); - OCL_UNMAP_BUFFER(1); - - // Run the kernel - globals[0] = n; - locals[0] = local_size; - OCL_NDRANGE(1); - OCL_MAP_BUFFER(0); - OCL_MAP_BUFFER(1); - - // Check results - int *dst = (int*)buf_data[0]; - int *src = (int*)buf_data[1]; - for (uint32_t i = 0; i < n * copiesPerWorkItem * 2; i++) - OCL_ASSERT(dst[i] == src[i] + 3); - OCL_UNMAP_BUFFER(0); - OCL_UNMAP_BUFFER(1); -} - -MAKE_UTEST_FROM_FUNCTION(compiler_async_copy); +DEF(char, char, 2); +DEF(uchar, uchar, 2); +DEF(short, short, 2); +DEF(ushort, ushort, 2); +DEF(int, int, 2); +DEF(uint, uint, 2); +DEF(int64_t, long, 2); +DEF(uint64_t, ulong, 2); +DEF(float, float, 2); +DEF(double, double, 2); |