diff options
author | Chuanbo Weng <chuanbo.weng@intel.com> | 2015-02-03 10:27:30 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-02-06 12:46:24 +0800 |
commit | 546738c1c6320f9732ff5ce9fdab223592fef890 (patch) | |
tree | 16c0a760e1ed5a0d7d0388f1d75c72814e82b8a3 /benchmark | |
parent | 25a6a17a4ec46427ac29a0f0b4ce765c440864c7 (diff) |
Refine copy_buf benchmark and rename the file.
Some refinement for copy_buf benchmark:
1. We should measure execution time of clEnqueueCopyBuffer(buffer
creation and initialization time should not be included).
2. Add clFinish before gettimeofday.
3. Rename the file in order to keep the name format the same as
other benchmarks.
v2: Change output measurement from time to bandwidth.
Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'benchmark')
-rw-r--r-- | benchmark/CMakeLists.txt | 2 | ||||
-rw-r--r-- | benchmark/benchmark_copy_buf.cpp (renamed from benchmark/enqueue_copy_buf.cpp) | 38 |
2 files changed, 21 insertions, 19 deletions
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 9a2bd776..73dbe853 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -11,7 +11,7 @@ set (benchmark_sources ../utests/utest_file_map.cpp ../utests/utest_helper.cpp ../utests/vload_bench.cpp - enqueue_copy_buf.cpp + benchmark_copy_buf.cpp benchmark_use_host_ptr_buffer.cpp benchmark_read_buffer.cpp benchmark_read_image.cpp) diff --git a/benchmark/enqueue_copy_buf.cpp b/benchmark/benchmark_copy_buf.cpp index 549c8b16..e21c936f 100644 --- a/benchmark/enqueue_copy_buf.cpp +++ b/benchmark/benchmark_copy_buf.cpp @@ -1,9 +1,14 @@ #include "utests/utest_helper.hpp" #include <sys/time.h> -void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb) +double benchmark_copy_buf(void) { - unsigned int i; + size_t i; + const size_t sz = 127 *1023 * 1023; + const size_t cb = sz; + size_t src_off =0, dst_off = 0; + struct timeval start,stop; + cl_char* buf0; OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL); @@ -18,32 +23,29 @@ void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb) clEnqueueUnmapMemObject(queue, buf[0], buf0, 0, NULL, NULL); if (src_off + cb > sz || dst_off + cb > sz) { - /* Expect Error. */ + /* Expect Error. */ OCL_ASSERT(clEnqueueCopyBuffer(queue, buf[0], buf[1], - src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); - return; + src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); } + /* Internal kernel will be built for the first time of calling + * clEnqueueCopyBuffer, so the first execution time of clEnqueueCopyBuffer + * will be much longer. It should not be added to benchmark time. */ OCL_ASSERT(CL_SUCCESS == clEnqueueCopyBuffer(queue, buf[0], buf[1], - src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); -} - -double enqueue_copy_buf(void) -{ - size_t i; - const size_t sz = 127 *1023 * 1023; - struct timeval start,stop; - + src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); + OCL_FINISH(); gettimeofday(&start,0); - for (i=0; i<10; i++) { - test_copy_buf(sz, 0, 0, sz); + for (i=0; i<100; i++) { + OCL_ASSERT(CL_SUCCESS == clEnqueueCopyBuffer(queue, buf[0], buf[1], + src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); } + OCL_FINISH(); gettimeofday(&stop,0); double elapsed = time_subtract(&stop, &start, 0); - return BANDWIDTH(sz * sizeof(char) * 10, elapsed); + return BANDWIDTH(sz * sizeof(char) * 100, elapsed); } -MAKE_BENCHMARK_FROM_FUNCTION(enqueue_copy_buf); +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_copy_buf); |