diff options
author | Yang Rong <rong.r.yang@intel.com> | 2015-01-09 09:38:47 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-01-09 10:25:58 +0800 |
commit | 17fae996979c86e76e91f1ff79950562c3f986e0 (patch) | |
tree | 034ae4835b836d02c3a4494ec92f7889845c0efd /benchmark | |
parent | 8a51c74d2e17e5461a3e0b0d00ac2c6d58767654 (diff) |
Add read buffer/image benchmark.
Add there two benchmark to compare the buffer and image performance
V2: init the coord before read image.
V3: Correct the image's width and buffer's read index.
Signed-off-by: Yang Rong <rong.r.yang@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'benchmark')
-rw-r--r-- | benchmark/CMakeLists.txt | 4 | ||||
-rw-r--r-- | benchmark/benchmark_read_buffer.cpp | 49 | ||||
-rw-r--r-- | benchmark/benchmark_read_image.cpp | 67 |
3 files changed, 119 insertions, 1 deletions
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index ac2d8aad..9a2bd776 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -12,7 +12,9 @@ set (benchmark_sources ../utests/utest_helper.cpp ../utests/vload_bench.cpp enqueue_copy_buf.cpp - benchmark_use_host_ptr_buffer.cpp) + benchmark_use_host_ptr_buffer.cpp + benchmark_read_buffer.cpp + benchmark_read_image.cpp) SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") diff --git a/benchmark/benchmark_read_buffer.cpp b/benchmark/benchmark_read_buffer.cpp new file mode 100644 index 00000000..31a1f599 --- /dev/null +++ b/benchmark/benchmark_read_buffer.cpp @@ -0,0 +1,49 @@ +#include "utests/utest_helper.hpp" +#include <sys/time.h> + +int benchmark_read_buffer(void) +{ + struct timeval start,stop; + + const size_t n = 1024 * 1024; + int count = 16; + const size_t sz = 4 * n * count; + + OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[2], 0, sz * sizeof(float), NULL); + + OCL_CREATE_KERNEL("compiler_read_buffer"); + + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + for (size_t i = 0; i < sz; i ++) { + ((float *)(buf_data[0]))[i] = rand(); + ((float *)(buf_data[1]))[i] = rand(); + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + + // Setup kernel and buffers + globals[0] = n; + locals[0] = 256; + + gettimeofday(&start,0); + for (size_t i=0; i<100; i++) { + OCL_NDRANGE(1); + } + OCL_FINISH(); + gettimeofday(&stop,0); + + clReleaseMemObject(buf[0]); + free(buf_data[0]); + buf_data[0] = NULL; + + return time_subtract(&stop, &start, 0); +} + +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_buffer); diff --git a/benchmark/benchmark_read_image.cpp b/benchmark/benchmark_read_image.cpp new file mode 100644 index 00000000..48aa9871 --- /dev/null +++ b/benchmark/benchmark_read_image.cpp @@ -0,0 +1,67 @@ +#include <string.h> +#include "utests/utest_helper.hpp" +#include <sys/time.h> + +int benchmark_read_image(void) +{ + struct timeval start,stop; + + const size_t x_count = 4; + const size_t y_count = 4; + const size_t w = 1024; + const size_t h = 1024; + const size_t sz = 4 * x_count * y_count * w * h; + cl_image_format format; + cl_image_desc desc; + + memset(&desc, 0x0, sizeof(cl_image_desc)); + memset(&format, 0x0, sizeof(cl_image_format)); + + // Setup kernel and images + OCL_CREATE_KERNEL("compiler_read_image"); + buf_data[0] = (uint32_t*) malloc(sizeof(float) * sz); + buf_data[1] = (uint32_t*) malloc(sizeof(float) * sz); + for (uint32_t i = 0; i < sz; ++i) { + ((float*)buf_data[0])[i] = rand(); + ((float*)buf_data[1])[i] = rand(); + } + + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = CL_FLOAT; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = w * x_count; + desc.image_height = h * y_count; + desc.image_row_pitch = desc.image_width * sizeof(float) * 4; + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]); + OCL_CREATE_IMAGE(buf[1], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[1]); + OCL_CREATE_BUFFER(buf[2], 0, sz * sizeof(float), NULL); + + free(buf_data[0]); + buf_data[0] = NULL; + free(buf_data[1]); + buf_data[1] = NULL; + + // Run the kernel + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = w; + globals[1] = h; + locals[0] = 16; + locals[1] = 16; + + gettimeofday(&start,0); + for (size_t i=0; i<100; i++) { + OCL_NDRANGE(2); + } + OCL_FINISH(); + gettimeofday(&stop,0); + + clReleaseMemObject(buf[0]); + free(buf_data[0]); + buf_data[0] = NULL; + + return time_subtract(&stop, &start, 0); +} + +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_image); |