summaryrefslogtreecommitdiff
path: root/benchmark
diff options
context:
space:
mode:
authorChuanbo Weng <chuanbo.weng@intel.com>2015-02-03 10:27:30 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-02-06 12:46:24 +0800
commit546738c1c6320f9732ff5ce9fdab223592fef890 (patch)
tree16c0a760e1ed5a0d7d0388f1d75c72814e82b8a3 /benchmark
parent25a6a17a4ec46427ac29a0f0b4ce765c440864c7 (diff)
Refine copy_buf benchmark and rename the file.
Some refinement for copy_buf benchmark: 1. We should measure execution time of clEnqueueCopyBuffer(buffer creation and initialization time should not be included). 2. Add clFinish before gettimeofday. 3. Rename the file in order to keep the name format the same as other benchmarks. v2: Change output measurement from time to bandwidth. Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'benchmark')
-rw-r--r--benchmark/CMakeLists.txt2
-rw-r--r--benchmark/benchmark_copy_buf.cpp (renamed from benchmark/enqueue_copy_buf.cpp)38
2 files changed, 21 insertions, 19 deletions
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 9a2bd776..73dbe853 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -11,7 +11,7 @@ set (benchmark_sources
../utests/utest_file_map.cpp
../utests/utest_helper.cpp
../utests/vload_bench.cpp
- enqueue_copy_buf.cpp
+ benchmark_copy_buf.cpp
benchmark_use_host_ptr_buffer.cpp
benchmark_read_buffer.cpp
benchmark_read_image.cpp)
diff --git a/benchmark/enqueue_copy_buf.cpp b/benchmark/benchmark_copy_buf.cpp
index 549c8b16..e21c936f 100644
--- a/benchmark/enqueue_copy_buf.cpp
+++ b/benchmark/benchmark_copy_buf.cpp
@@ -1,9 +1,14 @@
#include "utests/utest_helper.hpp"
#include <sys/time.h>
-void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb)
+double benchmark_copy_buf(void)
{
- unsigned int i;
+ size_t i;
+ const size_t sz = 127 *1023 * 1023;
+ const size_t cb = sz;
+ size_t src_off =0, dst_off = 0;
+ struct timeval start,stop;
+
cl_char* buf0;
OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL);
@@ -18,32 +23,29 @@ void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb)
clEnqueueUnmapMemObject(queue, buf[0], buf0, 0, NULL, NULL);
if (src_off + cb > sz || dst_off + cb > sz) {
- /* Expect Error. */
+ /* Expect Error. */
OCL_ASSERT(clEnqueueCopyBuffer(queue, buf[0], buf[1],
- src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
- return;
+ src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
}
+ /* Internal kernel will be built for the first time of calling
+ * clEnqueueCopyBuffer, so the first execution time of clEnqueueCopyBuffer
+ * will be much longer. It should not be added to benchmark time. */
OCL_ASSERT(CL_SUCCESS == clEnqueueCopyBuffer(queue, buf[0], buf[1],
- src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
-}
-
-double enqueue_copy_buf(void)
-{
- size_t i;
- const size_t sz = 127 *1023 * 1023;
- struct timeval start,stop;
-
+ src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
+ OCL_FINISH();
gettimeofday(&start,0);
- for (i=0; i<10; i++) {
- test_copy_buf(sz, 0, 0, sz);
+ for (i=0; i<100; i++) {
+ OCL_ASSERT(CL_SUCCESS == clEnqueueCopyBuffer(queue, buf[0], buf[1],
+ src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
}
+ OCL_FINISH();
gettimeofday(&stop,0);
double elapsed = time_subtract(&stop, &start, 0);
- return BANDWIDTH(sz * sizeof(char) * 10, elapsed);
+ return BANDWIDTH(sz * sizeof(char) * 100, elapsed);
}
-MAKE_BENCHMARK_FROM_FUNCTION(enqueue_copy_buf);
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_copy_buf);