summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-08-27 10:33:42 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-09-03 12:29:01 +0800
commit44929d4847b7b5d0cf5929a3155b6193091c26b7 (patch)
tree66f660a0bfaf670889e3ffe65ff1e8aa3e7ac426
parentbfa6135c9e7a53d2aab21c8c3257e85eb61b1212 (diff)
Add new vload benchmark/test case.
v2: refine the benchmark case and don't mix it with normal unit test cases. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
-rw-r--r--benchmark/CMakeLists.txt5
-rw-r--r--benchmark/benchmark_run.cpp2
-rw-r--r--kernels/vload_bench.cl33
-rw-r--r--utests/CMakeLists.txt1
-rw-r--r--utests/utest.cpp18
-rw-r--r--utests/utest.hpp21
-rw-r--r--utests/utest_helper.cpp5
-rw-r--r--utests/utest_run.cpp11
-rw-r--r--utests/vload_bench.cpp98
9 files changed, 183 insertions, 11 deletions
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index d96a2e02..0a959c88 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -10,8 +10,13 @@ set (benchmark_sources
../utests/utest.cpp
../utests/utest_file_map.cpp
../utests/utest_helper.cpp
+ ../utests/vload_bench.cpp
enqueue_copy_buf.cpp)
+
+SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
+SET(CMAKE_C_FLAGS "-DBUILD_BENCHMARK ${CMAKE_C_FLAGS}")
+
ADD_LIBRARY(benchmarks SHARED ${ADDMATHFUNC} ${benchmark_sources})
#TARGET_LINK_LIBRARIES(benchmarks cl m ${OPENGL_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
diff --git a/benchmark/benchmark_run.cpp b/benchmark/benchmark_run.cpp
index b29ccc32..e5c70576 100644
--- a/benchmark/benchmark_run.cpp
+++ b/benchmark/benchmark_run.cpp
@@ -98,7 +98,7 @@ int main(int argc, char *argv[])
case 'n':
try {
- UTest::runAllNoIssue();
+ UTest::runAllBenchMark();
}
catch (Exception e){
std::cout << " " << e.what() << " [SUCCESS]" << std::endl;
diff --git a/kernels/vload_bench.cl b/kernels/vload_bench.cl
new file mode 100644
index 00000000..c906c752
--- /dev/null
+++ b/kernels/vload_bench.cl
@@ -0,0 +1,33 @@
+#define VLOAD_BENCH(T, N, M) \
+__kernel void \
+vload_bench_##M ##T ##N(__global T* src, __global uint* dst, uint offset) \
+{ \
+ int id = (int)get_global_id(0); \
+ uint ##N srcV = 0; \
+ for(int i = 0; i < M; i++) \
+ { \
+ srcV += convert_uint ##N(vload ##N(id + (i & 0xFFFF), src + offset)); \
+ } \
+ vstore ##N(srcV, id, dst);\
+ /*if (id < 16)*/ \
+ /*printf("id %d %d %d\n", id, srcV.s0, srcV.s1);*/ \
+}
+
+#define VLOAD_BENCH_ALL_VECTOR(T, N_ITERATIONS) \
+ VLOAD_BENCH(T, 2, N_ITERATIONS) \
+ VLOAD_BENCH(T, 3, N_ITERATIONS) \
+ VLOAD_BENCH(T, 4, N_ITERATIONS) \
+ VLOAD_BENCH(T, 8, N_ITERATIONS) \
+ VLOAD_BENCH(T, 16, N_ITERATIONS)
+
+#define VLOAD_BENCH_ALL_TYPES(N_ITERATIONS) \
+ VLOAD_BENCH_ALL_VECTOR(uchar, N_ITERATIONS) \
+ VLOAD_BENCH_ALL_VECTOR(char, N_ITERATIONS) \
+ VLOAD_BENCH_ALL_VECTOR(ushort, N_ITERATIONS) \
+ VLOAD_BENCH_ALL_VECTOR(short, N_ITERATIONS) \
+ VLOAD_BENCH_ALL_VECTOR(uint, N_ITERATIONS) \
+ VLOAD_BENCH_ALL_VECTOR(int, N_ITERATIONS) \
+ VLOAD_BENCH_ALL_VECTOR(float, N_ITERATIONS)
+
+VLOAD_BENCH_ALL_TYPES(1)
+VLOAD_BENCH_ALL_TYPES(10000)
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 721e6f7e..b30e6f96 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -184,6 +184,7 @@ set (utests_sources
image_1D_buffer.cpp
compare_image_2d_and_1d_array.cpp
compiler_constant_expr.cpp
+ vload_bench.cpp
utest_assert.cpp
utest.cpp
utest_file_map.cpp
diff --git a/utests/utest.cpp b/utests/utest.cpp
index b491caea..d06dedc8 100644
--- a/utests/utest.cpp
+++ b/utests/utest.cpp
@@ -106,8 +106,8 @@ void catch_signal(void){
}
}
-UTest::UTest(Function fn, const char *name, bool haveIssue, bool needDestroyProgram)
- : fn(fn), name(name), haveIssue(haveIssue), needDestroyProgram(needDestroyProgram) {
+UTest::UTest(Function fn, const char *name, bool isBenchMark, bool haveIssue, bool needDestroyProgram)
+ : fn(fn), name(name), isBenchMark(isBenchMark), haveIssue(haveIssue), needDestroyProgram(needDestroyProgram) {
if (utestList == NULL) {
utestList = new vector<UTest>;
@@ -165,7 +165,19 @@ void UTest::runAllNoIssue(void) {
for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) {
const UTest &utest = (*utestList)[retStatistics.finishrun];
- if (utest.fn == NULL || utest.haveIssue) continue;
+ if (utest.fn == NULL || utest.haveIssue || utest.isBenchMark) continue;
+ do_run(utest);
+ cl_kernel_destroy(utest.needDestroyProgram);
+ cl_buffer_destroy();
+ }
+}
+
+void UTest::runAllBenchMark(void) {
+ if (utestList == NULL) return;
+
+ for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) {
+ const UTest &utest = (*utestList)[retStatistics.finishrun];
+ if (utest.fn == NULL || utest.haveIssue || !utest.isBenchMark) continue;
do_run(utest);
cl_kernel_destroy(utest.needDestroyProgram);
cl_buffer_destroy();
diff --git a/utests/utest.hpp b/utests/utest.hpp
index 375ef702..26ce6f8c 100644
--- a/utests/utest.hpp
+++ b/utests/utest.hpp
@@ -47,11 +47,13 @@ struct UTest
/*! Empty test */
UTest(void);
/*! Build a new unit test and append it to the unit test list */
- UTest(Function fn, const char *name, bool haveIssue = false, bool needDestroyProgram = true);
+ UTest(Function fn, const char *name, bool isBenchMark = false, bool haveIssue = false, bool needDestroyProgram = true);
/*! Function to execute */
Function fn;
/*! Name of the test */
const char *name;
+ /*! whether it is a bench mark. */
+ bool isBenchMark;
/*! Indicate whether current test cases has issue to be fixes */
bool haveIssue;
/*! Indicate whether destroy kernels/program. */
@@ -62,6 +64,8 @@ struct UTest
static void run(const char *name);
/*! Run all the tests without known issue*/
static void runAllNoIssue(void);
+ /*! Run all the benchmark. */
+ static void runAllBenchMark(void);
/*! Run all the tests */
static void runAll(void);
/*! List all test cases */
@@ -77,7 +81,7 @@ struct UTest
#define MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(FN, KEEP_PROGRAM) \
static void __ANON__##FN##__(void) { UTEST_EXPECT_SUCCESS(FN()); } \
- static const UTest __##FN##__(__ANON__##FN##__, #FN, false, !(KEEP_PROGRAM));
+ static const UTest __##FN##__(__ANON__##FN##__, #FN, false, false, !(KEEP_PROGRAM));
/*! Turn a function into a unit test */
@@ -91,9 +95,14 @@ struct UTest
static const UTest __##FN##__(__ANON__##FN##__, #FN, true);
/*! Turn a function into a unit performance test */
+#define MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(FN, KEEP_PROGRAM) \
+ static void __ANON__##FN##__(void) { BENCHMARK(FN()); } \
+ static const UTest __##FN##__(__ANON__##FN##__, #FN, true, false, !(KEEP_PROGRAM));
+
#define MAKE_BENCHMARK_FROM_FUNCTION(FN) \
static void __ANON__##FN##__(void) { BENCHMARK(FN()); } \
- static const UTest __##FN##__(__ANON__##FN##__, #FN);
+ static const UTest __##FN##__(__ANON__##FN##__, #FN, true);
+
/*! No assert is expected */
#define UTEST_EXPECT_SUCCESS(EXPR) \
@@ -125,14 +134,16 @@ struct UTest
#define BENCHMARK(EXPR) \
do { \
- int ret = 0; \
+ int ret = 0;\
try { \
ret = EXPR; \
- printf(" %s [SUCCESS] [Result: %d]\n", #EXPR, ret);\
+ std::cout << " [Result: " << ret << "] [SUCCESS]" << std::endl; \
+ UTest::retStatistics.passCount += 1; \
} \
catch (Exception e) { \
std::cout << " " << #EXPR << " [FAILED]" << std::endl; \
std::cout << " " << e.what() << std::endl; \
+ UTest::retStatistics.failCount++; \
} \
} while (0)
#endif /* __UTEST_UTEST_HPP__ */
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
index b57b8dc0..0925dafb 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -262,9 +262,10 @@ cl_kernel_init(const char *file_name, const char *kernel_name, int format, const
goto error;
}
prevFileName = file_name;
+
+ /* OCL requires to build the program even if it is created from a binary */
+ OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL);
}
- /* OCL requires to build the program even if it is created from a binary */
- OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL);
/* Create a kernel from the program */
if (kernel)
diff --git a/utests/utest_run.cpp b/utests/utest_run.cpp
index cd4356a9..8883ca88 100644
--- a/utests/utest_run.cpp
+++ b/utests/utest_run.cpp
@@ -106,6 +106,17 @@ int main(int argc, char *argv[])
break;
+ case 'b':
+ try {
+ UTest::runAllBenchMark();
+ }
+ catch (Exception e){
+ std::cout << " " << e.what() << " [SUCCESS]" << std::endl;
+ }
+
+ break;
+
+
case 'h':
default:
usage();
diff --git a/utests/vload_bench.cpp b/utests/vload_bench.cpp
new file mode 100644
index 00000000..37659961
--- /dev/null
+++ b/utests/vload_bench.cpp
@@ -0,0 +1,98 @@
+#include "utest_helper.hpp"
+#include <sys/time.h>
+
+#define N_ITERATIONS 10000
+
+#define T uint8_t
+template <typename T>
+static double vload_bench(const char *kernelFunc, uint32_t N, uint32_t offset, bool benchMode)
+{
+ const size_t n = benchMode ? (512 * 1024) : (8 * 1024);
+ struct timeval start, end;
+
+ // Setup kernel and buffers
+ std::string kernelName = kernelFunc + std::to_string(N);
+ OCL_CALL (cl_kernel_init, "vload_bench.cl", kernelName.c_str(), SOURCE, NULL);
+ //OCL_CREATE_KERNEL("compiler_array");
+ buf_data[0] = (T*) malloc(sizeof(T) * n);
+ for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[0])[i] = i; //rand() & ((1LL << N) - 1);
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[0]);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ free(buf_data[0]);
+ buf_data[0] = NULL;
+
+ // Run the kernel
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(uint32_t), &offset);
+ globals[0] = n / ((N + 1) & ~0x1);
+ locals[0] = 256;
+ if (benchMode)
+ gettimeofday(&start, NULL);
+ OCL_NDRANGE(1);
+ if (benchMode) {
+ OCL_FINISH();
+ gettimeofday(&end, NULL);
+ double elapsed = (end.tv_sec - start.tv_sec) * 1e6 + (end.tv_usec - start.tv_usec);
+ double bandwidth = (globals[0] * (N_ITERATIONS) * sizeof(T) * N) / elapsed;
+ printf("\t%2.1fGB/S\n", bandwidth/1000.);
+ return bandwidth;
+ } else {
+ // Check result
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < globals[0]; ++i) {
+ OCL_ASSERT(((T*)buf_data[0])[i + offset] == ((uint32_t*)buf_data[1])[i]);
+ }
+ return 0;
+ }
+}
+
+#define VLOAD_TEST(T, kT) \
+static void vload_test_ ##kT(void) \
+{ \
+ uint8_t vectorSize[] = {2, 3, 4, 8, 16}; \
+ for(uint32_t i = 0; i < sizeof(vectorSize); i++) { \
+ for(uint32_t offset = 0; offset < vectorSize[i]; offset++) {\
+ (void)vload_bench<T>("vload_bench_1" #kT, vectorSize[i], offset, false); \
+ }\
+ } \
+}\
+MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(vload_test_ ##kT, true)
+
+#ifndef BUILD_BENCHMARK
+VLOAD_TEST(uint8_t, uchar)
+VLOAD_TEST(int8_t, char)
+VLOAD_TEST(uint16_t, ushort)
+VLOAD_TEST(int16_t, short)
+VLOAD_TEST(uint32_t, uint)
+VLOAD_TEST(int32_t, int)
+VLOAD_TEST(float, float)
+#endif
+
+#define VLOAD_BENCH(T, kT) \
+static int vload_bench_ ##kT(void) \
+{ \
+ uint8_t vectorSize[] = {2, 3, 4, 8, 16}; \
+ double totBandwidth = 0; \
+ unsigned int j = 0;\
+ printf("\n");\
+ for(uint32_t i = 0; i < sizeof(vectorSize); i++, j++) { \
+ printf(" Vector size %d:\n", vectorSize[i]); \
+ uint32_t k = 0;\
+ double bandwidthForOneSize = 0;\
+ for(uint32_t offset = 0; offset < vectorSize[i]; offset++, k++) {\
+ printf("\tOffset %d :", offset); \
+ bandwidthForOneSize += vload_bench<T>("vload_bench_10000" #kT, vectorSize[i], offset, true); \
+ }\
+ totBandwidth += bandwidthForOneSize / k;\
+ } \
+ return totBandwidth/j;\
+}\
+MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(vload_bench_ ##kT, true)
+
+#ifdef BUILD_BENCHMARK
+VLOAD_BENCH(uint8_t, uchar)
+VLOAD_BENCH(uint16_t, ushort)
+VLOAD_BENCH(uint32_t, uint)
+#endif