diff options
author | Grigore Lupescu <grigore.lupescu at intel.com> | 2016-04-11 17:40:56 +0300 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2016-04-14 14:13:59 +0800 |
commit | 75f0de837029fcb9d86d682b611a63d9ff56c70e (patch) | |
tree | 14882e70463b849408a147579d7cefe3c4f86a88 | |
parent | cb8fec56bc15fdd61ac8eab64399397c3609b6a1 (diff) |
Utest: Add workgroup broadcast tests
Added the following unit tests:
compiler_workgroup_broadcast_1D_int
compiler_workgroup_broadcast_1D_long
compiler_workgroup_broadcast_2D_int
compiler_workgroup_broadcast_2D_long
compiler_workgroup_broadcast_3D_int
compiler_workgroup_broadcast_3D_long
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
-rw-r--r-- | kernels/compiler_workgroup_broadcast.cl | 128 | ||||
-rw-r--r-- | utests/compiler_workgroup_broadcast.cpp | 319 |
2 files changed, 410 insertions, 37 deletions
diff --git a/kernels/compiler_workgroup_broadcast.cl b/kernels/compiler_workgroup_broadcast.cl index 4df74e31..47ff0b70 100644 --- a/kernels/compiler_workgroup_broadcast.cl +++ b/kernels/compiler_workgroup_broadcast.cl @@ -1,9 +1,121 @@ -kernel void compiler_workgroup_broadcast(global uint *src, global uint *dst) { - uint val = src[get_group_id(0)*(get_local_size(1) * get_local_size(0)) - + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0)) - + get_local_id(1)* get_local_size(0) + get_local_id(0)]; - uint bv = work_group_broadcast(val, 8, 3); - dst[get_group_id(0)*(get_local_size(1) * get_local_size(0)) - + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0)) - + get_local_id(1)* get_local_size(0) + get_local_id(0)] = bv; +/* + * Workgroup broadcast 1D functions + */ + +kernel void compiler_workgroup_broadcast_1D_int(global int *src, + global int *dst, + uint wg_local_x, + uint wg_local_y, + uint wg_local_z) +{ + uint offset = 0; + uint index = offset + get_global_id(0); + + int val = src[index]; + int broadcast_val = work_group_broadcast(val, + wg_local_x); + dst[index] = broadcast_val; +} + +kernel void compiler_workgroup_broadcast_1D_long(global long *src, + global long *dst, + uint wg_local_x, + uint wg_local_y, + uint wg_local_z) +{ + uint offset = 0; + uint index = offset + get_global_id(0); + + long val = src[index]; + long broadcast_val = work_group_broadcast(val, + wg_local_x); + dst[index] = broadcast_val; +} + +/* + * Workgroup broadcast 2D functions + */ +kernel void compiler_workgroup_broadcast_2D_int(global int *src, + global int *dst, + uint wg_local_x, + uint wg_local_y, + uint wg_local_z) +{ + uint lsize = get_local_size(0) * get_local_size(1); + uint offset = get_group_id(0) * lsize + + get_group_id(1) * get_num_groups(0) * lsize; + uint index = offset + get_local_id(0) + + get_local_id(1) * get_local_size(0); + + int val = src[index]; + int broadcast_val = work_group_broadcast(val, + wg_local_x, + wg_local_y); + dst[index] = broadcast_val; +} + +kernel void compiler_workgroup_broadcast_2D_long(global long *src, + global long *dst, + uint wg_local_x, + uint wg_local_y, + uint wg_local_z) +{ + uint lsize = get_local_size(0) * get_local_size(1); + uint offset = get_group_id(0) * lsize + + get_group_id(1) * get_num_groups(0) * lsize; + uint index = offset + get_local_id(0) + + get_local_id(1) * get_local_size(0); + + long val = src[index]; + long broadcast_val = work_group_broadcast(val, + wg_local_x, + wg_local_y); + dst[index] = broadcast_val; +} + +/* + * Workgroup broadcast 3D functions + */ +kernel void compiler_workgroup_broadcast_3D_int(global int *src, + global int *dst, + uint wg_local_x, + uint wg_local_y, + uint wg_local_z) +{ + uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint offset = get_group_id(0) * lsize + + get_group_id(1) * get_num_groups(0) * lsize + + get_group_id(2) * get_num_groups(1) * get_num_groups(0) * lsize; + uint index = offset + get_local_id(0) + + get_local_id(1) * get_local_size(0) + + get_local_id(2) * get_local_size(1) * get_local_size(0); + + int val = src[index]; + int broadcast_val = work_group_broadcast(val, + wg_local_x, + wg_local_y, + wg_local_z); + dst[index] = broadcast_val; +} + +kernel void compiler_workgroup_broadcast_3D_long(global long *src, + global long *dst, + uint wg_local_x, + uint wg_local_y, + uint wg_local_z) +{ + uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint offset = get_group_id(0) * lsize + + get_group_id(1) * get_num_groups(0) * lsize + + get_group_id(2) * get_num_groups(0) * get_num_groups(1) * lsize; + uint index = offset + get_local_id(0) + + get_local_id(1) * get_local_size(0) + + get_local_id(2) * get_local_size(1) * get_local_size(0); + + long val = src[index]; + long broadcast_val = work_group_broadcast(val, + wg_local_x, + wg_local_y, + wg_local_z); + dst[index] = broadcast_val; } diff --git a/utests/compiler_workgroup_broadcast.cpp b/utests/compiler_workgroup_broadcast.cpp index d45e5d89..b57b8598 100644 --- a/utests/compiler_workgroup_broadcast.cpp +++ b/utests/compiler_workgroup_broadcast.cpp @@ -3,45 +3,306 @@ #include <iostream> #include "utest_helper.hpp" -void compiler_workgroup_broadcast(void) +using namespace std; + +/* set to 1 for debug, output of input-expected data */ +#define DEBUG_STDOUT 0 + +/* NDRANGE */ +#define WG_GLOBAL_SIZE_X 16 +#define WG_GLOBAL_SIZE_Y 4 +#define WG_GLOBAL_SIZE_Z 4 + +#define WG_LOCAL_SIZE_X 16 +#define WG_LOCAL_SIZE_Y 2 +#define WG_LOCAL_SIZE_Z 2 + +/* TODO debug bellow case, lid2 always stays 0, instead of 0 and 1 + * + * #define WG_GLOBAL_SIZE_X 16 + * #define WG_GLOBAL_SIZE_Y 1 + * #define WG_GLOBAL_SIZE_Z 4 + * + * #define WG_LOCAL_SIZE_X 16 + * #define WG_LOCAL_SIZE_Y 1 + * #define WG_LOCAL_SIZE_Z 2 + */ + +#define WG_LOCAL_X 5 +#define WG_LOCAL_Y 0 +#define WG_LOCAL_Z 0 + +enum WG_BROADCAST +{ + WG_BROADCAST_1D, + WG_BROADCAST_2D, + WG_BROADCAST_3D +}; + +/* + * Generic compute-expected function for op BROADCAST type + * and any variable type + */ +template<class T> +static void compute_expected(WG_BROADCAST wg_broadcast, + T* input, + T* expected, + uint32_t wg_global_size, + uint32_t wg_local_size) +{ + if(wg_broadcast == WG_BROADCAST_1D) + { + for(uint32_t i = 0; i < wg_local_size; i++) + expected[i] = input[WG_LOCAL_X]; + } + else if(wg_broadcast == WG_BROADCAST_2D) + { + for(uint32_t i = 0; i < wg_local_size; i++) + expected[i] = + input[WG_LOCAL_X + + WG_LOCAL_Y * WG_LOCAL_SIZE_X]; + } + else if(wg_broadcast == WG_BROADCAST_3D) + { + for(uint32_t i = 0; i < wg_local_size; i++) + expected[i] = + input[WG_LOCAL_X + + WG_LOCAL_Y * WG_LOCAL_SIZE_X + + WG_LOCAL_Z * WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y]; + } +} + +/* + * Generic input-expected generate function for op BROADCAST type + * and any variable type + */ +template<class T> +static void generate_data(WG_BROADCAST wg_broadcast, + T* &input, + T* &expected, + uint32_t &wg_global_size, + uint32_t &wg_local_size) +{ + if(wg_broadcast == WG_BROADCAST_1D) + { + wg_global_size = WG_GLOBAL_SIZE_X; + wg_local_size = WG_LOCAL_SIZE_X; + } + else if(wg_broadcast == WG_BROADCAST_2D) + { + wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y; + wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y; + } + else if(wg_broadcast == WG_BROADCAST_3D) + { + wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y * WG_GLOBAL_SIZE_Z; + wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y * WG_LOCAL_SIZE_Z; + } + + /* allocate input and expected arrays */ + input = new T[wg_global_size]; + expected = new T[wg_global_size]; + + /* base value for all data types */ + T base_val = (long)7 << (sizeof(T) * 5 - 3); + + /* seed for random inputs */ + srand (time(NULL)); + + /* generate inputs and expected values */ + for(uint32_t gid = 0; gid < wg_global_size; gid += wg_local_size) + { +#if DEBUG_STDOUT + cout << endl << "IN: " << endl; +#endif + + /* input values */ + for(uint32_t lid = 0; lid < wg_local_size; lid++) + { + /* initially 0, augment after */ + input[gid + lid] = 0; + + /* check all data types, test ideal for QWORD types */ + input[gid + lid] += ((rand() % 2 - 1) * base_val); + /* add trailing random bits, tests GENERAL cases */ + input[gid + lid] += (rand() % 112); + +#if DEBUG_STDOUT + /* output generated input */ + cout << setw(4) << input[gid + lid] << ", " ; + if((lid + 1) % 8 == 0) + cout << endl; +#endif + } + + /* expected values */ + compute_expected(wg_broadcast, input + gid, expected + gid, wg_global_size, wg_local_size); + +#if DEBUG_STDOUT + /* output expected input */ + cout << endl << "EXP: " << endl; + for(uint32_t lid = 0; lid < wg_local_size; lid++){ + cout << setw(4) << expected[gid + lid] << ", " ; + if((lid + 1) % 8 == 0) + cout << endl; + } +#endif + + } +} + +/* + * Generic workgroup utest function for op BROADCAST type + * and any variable type + */ +template<class T> +static void workgroup_generic(WG_BROADCAST wg_broadcast, + T* input, + T* expected) { - const size_t n0 = 32; - const size_t n1 = 16; - const size_t n = n0 * n1; - uint32_t src[n]; + uint32_t wg_global_size = 0; + uint32_t wg_local_size = 0; - // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_workgroup_broadcast"); - OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); - OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + cl_uint wg_local_x = WG_LOCAL_X; + cl_uint wg_local_y = WG_LOCAL_Y; + cl_uint wg_local_z = WG_LOCAL_Z; + + /* input and expected data */ + generate_data(wg_broadcast, input, expected, wg_global_size, wg_local_size); + + /* prepare input for datatype */ + OCL_CREATE_BUFFER(buf[0], 0, wg_global_size * sizeof(T), NULL); + OCL_CREATE_BUFFER(buf[1], 0, wg_global_size * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); - globals[0] = n0; - globals[1] = n1; - locals[0] = 16; - locals[1] = 16; + OCL_SET_ARG(2, sizeof(cl_uint), &wg_local_x); + OCL_SET_ARG(3, sizeof(cl_uint), &wg_local_y); + OCL_SET_ARG(4, sizeof(cl_uint), &wg_local_z); - for (int32_t i = 0; i < (int32_t) n; ++i) { - src[i] = i; - } + /* set input data for GPU */ OCL_MAP_BUFFER(0); - memcpy(buf_data[0], src, sizeof(src)); + memcpy(buf_data[0], input, wg_global_size * sizeof(T)); OCL_UNMAP_BUFFER(0); - // Run the kernel on GPU - OCL_NDRANGE(2); - - // Compare - OCL_MAP_BUFFER(1); - for (int32_t i = 0; i < (int32_t) n/2; ++i) { -// printf("%u ", ((uint32_t *)buf_data[1])[i]); - OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 56); + /* run the kernel on GPU */ + if(wg_broadcast == WG_BROADCAST_1D) + { + globals[0] = WG_GLOBAL_SIZE_X; + locals[0] = WG_LOCAL_SIZE_X; + OCL_NDRANGE(1); + } + else if(wg_broadcast == WG_BROADCAST_2D) + { + globals[0] = WG_GLOBAL_SIZE_X; + locals[0] = WG_LOCAL_SIZE_X; + globals[1] = WG_GLOBAL_SIZE_Y; + locals[1] = WG_LOCAL_SIZE_Y; + OCL_NDRANGE(2); } - for (int32_t i = n/2; i < (int32_t) n; ++i) { - // printf("%u ", ((uint32_t *)buf_data[1])[i]); - OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 312); + else if(wg_broadcast == WG_BROADCAST_3D) + { + globals[0] = WG_GLOBAL_SIZE_X; + locals[0] = WG_LOCAL_SIZE_X; + globals[1] = WG_GLOBAL_SIZE_Y; + locals[1] = WG_LOCAL_SIZE_Y; + globals[2] = WG_GLOBAL_SIZE_Z; + locals[2] = WG_LOCAL_SIZE_Y; + OCL_NDRANGE(3); } + + /* check if mismatch */ + OCL_MAP_BUFFER(1); + uint32_t mismatches = 0; + + for (uint32_t i = 0; i < wg_global_size; i++) + if(((T *)buf_data[1])[i] != *(expected + i)) + { + /* found mismatch, increment */ + mismatches++; + +#if DEBUG_STDOUT + /* output mismatch */ + cout << "Err at " << i << ", " << + ((T *)buf_data[1])[i] << " != " << *(expected + i) << endl; +#endif + } + +#if DEBUG_STDOUT + /* output mismatch count */ + cout << "mismatches " << mismatches << endl; +#endif + OCL_UNMAP_BUFFER(1); + + OCL_ASSERT(mismatches == 0); +} + +/* + * Workgroup broadcast 1D functions + */ +void compiler_workgroup_broadcast_1D_int(void) +{ + cl_int *input = NULL; + cl_int *expected = NULL; + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", + "compiler_workgroup_broadcast_1D_int"); + workgroup_generic(WG_BROADCAST_1D, input, expected); } +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int); -MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast); +void compiler_workgroup_broadcast_1D_long(void) +{ + cl_long *input = NULL; + cl_long *expected = NULL; + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", + "compiler_workgroup_broadcast_1D_long"); + workgroup_generic(WG_BROADCAST_1D, input, expected); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_long); + +/* + * Workgroup broadcast 2D functions + */ +void compiler_workgroup_broadcast_2D_int(void) +{ + cl_int *input = NULL; + cl_int *expected = NULL; + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", + "compiler_workgroup_broadcast_2D_int"); + workgroup_generic(WG_BROADCAST_2D, input, expected); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int); + +void compiler_workgroup_broadcast_2D_long(void) +{ + cl_long *input = NULL; + cl_long *expected = NULL; + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", + "compiler_workgroup_broadcast_2D_long"); + workgroup_generic(WG_BROADCAST_2D, input, expected); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_long); + + +/* + * Workgroup broadcast 3D functions + */ +void compiler_workgroup_broadcast_3D_int(void) +{ + cl_int *input = NULL; + cl_int *expected = NULL; + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", + "compiler_workgroup_broadcast_3D_int"); + workgroup_generic(WG_BROADCAST_3D, input, expected); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int); + +void compiler_workgroup_broadcast_3D_long(void) +{ + cl_long *input = NULL; + cl_long *expected = NULL; + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast", + "compiler_workgroup_broadcast_3D_long"); + workgroup_generic(WG_BROADCAST_3D, input, expected); +} +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_long); |