summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrigore Lupescu <grigore.lupescu at intel.com>2016-04-11 17:40:56 +0300
committerYang Rong <rong.r.yang@intel.com>2016-04-14 14:13:59 +0800
commit75f0de837029fcb9d86d682b611a63d9ff56c70e (patch)
tree14882e70463b849408a147579d7cefe3c4f86a88
parentcb8fec56bc15fdd61ac8eab64399397c3609b6a1 (diff)
Utest: Add workgroup broadcast tests
Added the following unit tests: compiler_workgroup_broadcast_1D_int compiler_workgroup_broadcast_1D_long compiler_workgroup_broadcast_2D_int compiler_workgroup_broadcast_2D_long compiler_workgroup_broadcast_3D_int compiler_workgroup_broadcast_3D_long Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com> Reviewed-by: Pan Xiuli <xiuli.pan@intel.com>
-rw-r--r--kernels/compiler_workgroup_broadcast.cl128
-rw-r--r--utests/compiler_workgroup_broadcast.cpp319
2 files changed, 410 insertions, 37 deletions
diff --git a/kernels/compiler_workgroup_broadcast.cl b/kernels/compiler_workgroup_broadcast.cl
index 4df74e31..47ff0b70 100644
--- a/kernels/compiler_workgroup_broadcast.cl
+++ b/kernels/compiler_workgroup_broadcast.cl
@@ -1,9 +1,121 @@
-kernel void compiler_workgroup_broadcast(global uint *src, global uint *dst) {
- uint val = src[get_group_id(0)*(get_local_size(1) * get_local_size(0))
- + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
- + get_local_id(1)* get_local_size(0) + get_local_id(0)];
- uint bv = work_group_broadcast(val, 8, 3);
- dst[get_group_id(0)*(get_local_size(1) * get_local_size(0))
- + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
- + get_local_id(1)* get_local_size(0) + get_local_id(0)] = bv;
+/*
+ * Workgroup broadcast 1D functions
+ */
+
+kernel void compiler_workgroup_broadcast_1D_int(global int *src,
+ global int *dst,
+ uint wg_local_x,
+ uint wg_local_y,
+ uint wg_local_z)
+{
+ uint offset = 0;
+ uint index = offset + get_global_id(0);
+
+ int val = src[index];
+ int broadcast_val = work_group_broadcast(val,
+ wg_local_x);
+ dst[index] = broadcast_val;
+}
+
+kernel void compiler_workgroup_broadcast_1D_long(global long *src,
+ global long *dst,
+ uint wg_local_x,
+ uint wg_local_y,
+ uint wg_local_z)
+{
+ uint offset = 0;
+ uint index = offset + get_global_id(0);
+
+ long val = src[index];
+ long broadcast_val = work_group_broadcast(val,
+ wg_local_x);
+ dst[index] = broadcast_val;
+}
+
+/*
+ * Workgroup broadcast 2D functions
+ */
+kernel void compiler_workgroup_broadcast_2D_int(global int *src,
+ global int *dst,
+ uint wg_local_x,
+ uint wg_local_y,
+ uint wg_local_z)
+{
+ uint lsize = get_local_size(0) * get_local_size(1);
+ uint offset = get_group_id(0) * lsize +
+ get_group_id(1) * get_num_groups(0) * lsize;
+ uint index = offset + get_local_id(0) +
+ get_local_id(1) * get_local_size(0);
+
+ int val = src[index];
+ int broadcast_val = work_group_broadcast(val,
+ wg_local_x,
+ wg_local_y);
+ dst[index] = broadcast_val;
+}
+
+kernel void compiler_workgroup_broadcast_2D_long(global long *src,
+ global long *dst,
+ uint wg_local_x,
+ uint wg_local_y,
+ uint wg_local_z)
+{
+ uint lsize = get_local_size(0) * get_local_size(1);
+ uint offset = get_group_id(0) * lsize +
+ get_group_id(1) * get_num_groups(0) * lsize;
+ uint index = offset + get_local_id(0) +
+ get_local_id(1) * get_local_size(0);
+
+ long val = src[index];
+ long broadcast_val = work_group_broadcast(val,
+ wg_local_x,
+ wg_local_y);
+ dst[index] = broadcast_val;
+}
+
+/*
+ * Workgroup broadcast 3D functions
+ */
+kernel void compiler_workgroup_broadcast_3D_int(global int *src,
+ global int *dst,
+ uint wg_local_x,
+ uint wg_local_y,
+ uint wg_local_z)
+{
+ uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2);
+ uint offset = get_group_id(0) * lsize +
+ get_group_id(1) * get_num_groups(0) * lsize +
+ get_group_id(2) * get_num_groups(1) * get_num_groups(0) * lsize;
+ uint index = offset + get_local_id(0) +
+ get_local_id(1) * get_local_size(0) +
+ get_local_id(2) * get_local_size(1) * get_local_size(0);
+
+ int val = src[index];
+ int broadcast_val = work_group_broadcast(val,
+ wg_local_x,
+ wg_local_y,
+ wg_local_z);
+ dst[index] = broadcast_val;
+}
+
+kernel void compiler_workgroup_broadcast_3D_long(global long *src,
+ global long *dst,
+ uint wg_local_x,
+ uint wg_local_y,
+ uint wg_local_z)
+{
+ uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2);
+ uint offset = get_group_id(0) * lsize +
+ get_group_id(1) * get_num_groups(0) * lsize +
+ get_group_id(2) * get_num_groups(0) * get_num_groups(1) * lsize;
+ uint index = offset + get_local_id(0) +
+ get_local_id(1) * get_local_size(0) +
+ get_local_id(2) * get_local_size(1) * get_local_size(0);
+
+ long val = src[index];
+ long broadcast_val = work_group_broadcast(val,
+ wg_local_x,
+ wg_local_y,
+ wg_local_z);
+ dst[index] = broadcast_val;
}
diff --git a/utests/compiler_workgroup_broadcast.cpp b/utests/compiler_workgroup_broadcast.cpp
index d45e5d89..b57b8598 100644
--- a/utests/compiler_workgroup_broadcast.cpp
+++ b/utests/compiler_workgroup_broadcast.cpp
@@ -3,45 +3,306 @@
#include <iostream>
#include "utest_helper.hpp"
-void compiler_workgroup_broadcast(void)
+using namespace std;
+
+/* set to 1 for debug, output of input-expected data */
+#define DEBUG_STDOUT 0
+
+/* NDRANGE */
+#define WG_GLOBAL_SIZE_X 16
+#define WG_GLOBAL_SIZE_Y 4
+#define WG_GLOBAL_SIZE_Z 4
+
+#define WG_LOCAL_SIZE_X 16
+#define WG_LOCAL_SIZE_Y 2
+#define WG_LOCAL_SIZE_Z 2
+
+/* TODO debug bellow case, lid2 always stays 0, instead of 0 and 1
+ *
+ * #define WG_GLOBAL_SIZE_X 16
+ * #define WG_GLOBAL_SIZE_Y 1
+ * #define WG_GLOBAL_SIZE_Z 4
+ *
+ * #define WG_LOCAL_SIZE_X 16
+ * #define WG_LOCAL_SIZE_Y 1
+ * #define WG_LOCAL_SIZE_Z 2
+ */
+
+#define WG_LOCAL_X 5
+#define WG_LOCAL_Y 0
+#define WG_LOCAL_Z 0
+
+enum WG_BROADCAST
+{
+ WG_BROADCAST_1D,
+ WG_BROADCAST_2D,
+ WG_BROADCAST_3D
+};
+
+/*
+ * Generic compute-expected function for op BROADCAST type
+ * and any variable type
+ */
+template<class T>
+static void compute_expected(WG_BROADCAST wg_broadcast,
+ T* input,
+ T* expected,
+ uint32_t wg_global_size,
+ uint32_t wg_local_size)
+{
+ if(wg_broadcast == WG_BROADCAST_1D)
+ {
+ for(uint32_t i = 0; i < wg_local_size; i++)
+ expected[i] = input[WG_LOCAL_X];
+ }
+ else if(wg_broadcast == WG_BROADCAST_2D)
+ {
+ for(uint32_t i = 0; i < wg_local_size; i++)
+ expected[i] =
+ input[WG_LOCAL_X +
+ WG_LOCAL_Y * WG_LOCAL_SIZE_X];
+ }
+ else if(wg_broadcast == WG_BROADCAST_3D)
+ {
+ for(uint32_t i = 0; i < wg_local_size; i++)
+ expected[i] =
+ input[WG_LOCAL_X +
+ WG_LOCAL_Y * WG_LOCAL_SIZE_X +
+ WG_LOCAL_Z * WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y];
+ }
+}
+
+/*
+ * Generic input-expected generate function for op BROADCAST type
+ * and any variable type
+ */
+template<class T>
+static void generate_data(WG_BROADCAST wg_broadcast,
+ T* &input,
+ T* &expected,
+ uint32_t &wg_global_size,
+ uint32_t &wg_local_size)
+{
+ if(wg_broadcast == WG_BROADCAST_1D)
+ {
+ wg_global_size = WG_GLOBAL_SIZE_X;
+ wg_local_size = WG_LOCAL_SIZE_X;
+ }
+ else if(wg_broadcast == WG_BROADCAST_2D)
+ {
+ wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y;
+ wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y;
+ }
+ else if(wg_broadcast == WG_BROADCAST_3D)
+ {
+ wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y * WG_GLOBAL_SIZE_Z;
+ wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y * WG_LOCAL_SIZE_Z;
+ }
+
+ /* allocate input and expected arrays */
+ input = new T[wg_global_size];
+ expected = new T[wg_global_size];
+
+ /* base value for all data types */
+ T base_val = (long)7 << (sizeof(T) * 5 - 3);
+
+ /* seed for random inputs */
+ srand (time(NULL));
+
+ /* generate inputs and expected values */
+ for(uint32_t gid = 0; gid < wg_global_size; gid += wg_local_size)
+ {
+#if DEBUG_STDOUT
+ cout << endl << "IN: " << endl;
+#endif
+
+ /* input values */
+ for(uint32_t lid = 0; lid < wg_local_size; lid++)
+ {
+ /* initially 0, augment after */
+ input[gid + lid] = 0;
+
+ /* check all data types, test ideal for QWORD types */
+ input[gid + lid] += ((rand() % 2 - 1) * base_val);
+ /* add trailing random bits, tests GENERAL cases */
+ input[gid + lid] += (rand() % 112);
+
+#if DEBUG_STDOUT
+ /* output generated input */
+ cout << setw(4) << input[gid + lid] << ", " ;
+ if((lid + 1) % 8 == 0)
+ cout << endl;
+#endif
+ }
+
+ /* expected values */
+ compute_expected(wg_broadcast, input + gid, expected + gid, wg_global_size, wg_local_size);
+
+#if DEBUG_STDOUT
+ /* output expected input */
+ cout << endl << "EXP: " << endl;
+ for(uint32_t lid = 0; lid < wg_local_size; lid++){
+ cout << setw(4) << expected[gid + lid] << ", " ;
+ if((lid + 1) % 8 == 0)
+ cout << endl;
+ }
+#endif
+
+ }
+}
+
+/*
+ * Generic workgroup utest function for op BROADCAST type
+ * and any variable type
+ */
+template<class T>
+static void workgroup_generic(WG_BROADCAST wg_broadcast,
+ T* input,
+ T* expected)
{
- const size_t n0 = 32;
- const size_t n1 = 16;
- const size_t n = n0 * n1;
- uint32_t src[n];
+ uint32_t wg_global_size = 0;
+ uint32_t wg_local_size = 0;
- // Setup kernel and buffers
- OCL_CREATE_KERNEL("compiler_workgroup_broadcast");
- OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
- OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ cl_uint wg_local_x = WG_LOCAL_X;
+ cl_uint wg_local_y = WG_LOCAL_Y;
+ cl_uint wg_local_z = WG_LOCAL_Z;
+
+ /* input and expected data */
+ generate_data(wg_broadcast, input, expected, wg_global_size, wg_local_size);
+
+ /* prepare input for datatype */
+ OCL_CREATE_BUFFER(buf[0], 0, wg_global_size * sizeof(T), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, wg_global_size * sizeof(T), NULL);
OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
- globals[0] = n0;
- globals[1] = n1;
- locals[0] = 16;
- locals[1] = 16;
+ OCL_SET_ARG(2, sizeof(cl_uint), &wg_local_x);
+ OCL_SET_ARG(3, sizeof(cl_uint), &wg_local_y);
+ OCL_SET_ARG(4, sizeof(cl_uint), &wg_local_z);
- for (int32_t i = 0; i < (int32_t) n; ++i) {
- src[i] = i;
- }
+ /* set input data for GPU */
OCL_MAP_BUFFER(0);
- memcpy(buf_data[0], src, sizeof(src));
+ memcpy(buf_data[0], input, wg_global_size * sizeof(T));
OCL_UNMAP_BUFFER(0);
- // Run the kernel on GPU
- OCL_NDRANGE(2);
-
- // Compare
- OCL_MAP_BUFFER(1);
- for (int32_t i = 0; i < (int32_t) n/2; ++i) {
-// printf("%u ", ((uint32_t *)buf_data[1])[i]);
- OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 56);
+ /* run the kernel on GPU */
+ if(wg_broadcast == WG_BROADCAST_1D)
+ {
+ globals[0] = WG_GLOBAL_SIZE_X;
+ locals[0] = WG_LOCAL_SIZE_X;
+ OCL_NDRANGE(1);
+ }
+ else if(wg_broadcast == WG_BROADCAST_2D)
+ {
+ globals[0] = WG_GLOBAL_SIZE_X;
+ locals[0] = WG_LOCAL_SIZE_X;
+ globals[1] = WG_GLOBAL_SIZE_Y;
+ locals[1] = WG_LOCAL_SIZE_Y;
+ OCL_NDRANGE(2);
}
- for (int32_t i = n/2; i < (int32_t) n; ++i) {
- // printf("%u ", ((uint32_t *)buf_data[1])[i]);
- OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 312);
+ else if(wg_broadcast == WG_BROADCAST_3D)
+ {
+ globals[0] = WG_GLOBAL_SIZE_X;
+ locals[0] = WG_LOCAL_SIZE_X;
+ globals[1] = WG_GLOBAL_SIZE_Y;
+ locals[1] = WG_LOCAL_SIZE_Y;
+ globals[2] = WG_GLOBAL_SIZE_Z;
+ locals[2] = WG_LOCAL_SIZE_Y;
+ OCL_NDRANGE(3);
}
+
+ /* check if mismatch */
+ OCL_MAP_BUFFER(1);
+ uint32_t mismatches = 0;
+
+ for (uint32_t i = 0; i < wg_global_size; i++)
+ if(((T *)buf_data[1])[i] != *(expected + i))
+ {
+ /* found mismatch, increment */
+ mismatches++;
+
+#if DEBUG_STDOUT
+ /* output mismatch */
+ cout << "Err at " << i << ", " <<
+ ((T *)buf_data[1])[i] << " != " << *(expected + i) << endl;
+#endif
+ }
+
+#if DEBUG_STDOUT
+ /* output mismatch count */
+ cout << "mismatches " << mismatches << endl;
+#endif
+
OCL_UNMAP_BUFFER(1);
+
+ OCL_ASSERT(mismatches == 0);
+}
+
+/*
+ * Workgroup broadcast 1D functions
+ */
+void compiler_workgroup_broadcast_1D_int(void)
+{
+ cl_int *input = NULL;
+ cl_int *expected = NULL;
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+ "compiler_workgroup_broadcast_1D_int");
+ workgroup_generic(WG_BROADCAST_1D, input, expected);
}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int);
-MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast);
+void compiler_workgroup_broadcast_1D_long(void)
+{
+ cl_long *input = NULL;
+ cl_long *expected = NULL;
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+ "compiler_workgroup_broadcast_1D_long");
+ workgroup_generic(WG_BROADCAST_1D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_long);
+
+/*
+ * Workgroup broadcast 2D functions
+ */
+void compiler_workgroup_broadcast_2D_int(void)
+{
+ cl_int *input = NULL;
+ cl_int *expected = NULL;
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+ "compiler_workgroup_broadcast_2D_int");
+ workgroup_generic(WG_BROADCAST_2D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int);
+
+void compiler_workgroup_broadcast_2D_long(void)
+{
+ cl_long *input = NULL;
+ cl_long *expected = NULL;
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+ "compiler_workgroup_broadcast_2D_long");
+ workgroup_generic(WG_BROADCAST_2D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_long);
+
+
+/*
+ * Workgroup broadcast 3D functions
+ */
+void compiler_workgroup_broadcast_3D_int(void)
+{
+ cl_int *input = NULL;
+ cl_int *expected = NULL;
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+ "compiler_workgroup_broadcast_3D_int");
+ workgroup_generic(WG_BROADCAST_3D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int);
+
+void compiler_workgroup_broadcast_3D_long(void)
+{
+ cl_long *input = NULL;
+ cl_long *expected = NULL;
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
+ "compiler_workgroup_broadcast_3D_long");
+ workgroup_generic(WG_BROADCAST_3D, input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_long);