summaryrefslogtreecommitdiff
path: root/kernels
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2015-01-09 09:38:47 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-01-09 14:40:02 +0800
commit5e1020028a8142353a5ee83285724d1c38c4ef3d (patch)
treefcb41296990708721c087a3371d8cceaa7d48954 /kernels
parentbf7cbf44814534bbf2f0718f603a0759290bf4b8 (diff)
Add read buffer/image benchmark.
Add there two benchmark to compare the buffer and image performance V2: init the coord before read image. V3: Correct the image's width and buffer's read index. Signed-off-by: Yang Rong <rong.r.yang@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'kernels')
-rw-r--r--kernels/compiler_read_buffer.cl15
-rw-r--r--kernels/compiler_read_image.cl25
2 files changed, 40 insertions, 0 deletions
diff --git a/kernels/compiler_read_buffer.cl b/kernels/compiler_read_buffer.cl
new file mode 100644
index 00000000..4d3183a1
--- /dev/null
+++ b/kernels/compiler_read_buffer.cl
@@ -0,0 +1,15 @@
+#define COUNT 16
+
+__kernel void
+compiler_read_buffer(__global float4* src0, __global float4* src1, __global float4* dst)
+{
+ float4 sum = 0;
+ int offset = 0, i = 0;
+ int id = (int)get_global_id(0);
+ int sz = (int)get_global_size(0);
+ for(i=0; i<COUNT; i++) {
+ sum = sum + src0[offset + id] + src1[offset + id];
+ offset += sz;
+ }
+ dst[id] = sum;
+}
diff --git a/kernels/compiler_read_image.cl b/kernels/compiler_read_image.cl
new file mode 100644
index 00000000..f059743f
--- /dev/null
+++ b/kernels/compiler_read_image.cl
@@ -0,0 +1,25 @@
+#define X_COUNT 4
+#define Y_COUNT 4
+
+__kernel void
+compiler_read_image(__read_only image2d_t src0, __read_only image2d_t src1, __global float4* dst)
+{
+ float4 sum = 0;
+ int2 coord;
+ int x_sz = (int)get_global_size(0);
+ int y_sz = (int)get_global_size(1);
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE| CLK_ADDRESS_CLAMP| CLK_FILTER_NEAREST;
+ int i, j;
+
+ int x = (int)get_global_id(0);
+ int y = (int)get_global_id(1);
+
+ for(i=0; i<X_COUNT; i++) {
+ coord.x = x + i * x_sz;
+ for(j=0; j<Y_COUNT; j++) {
+ coord.y = y + j * y_sz;
+ sum = sum + read_imagef(src0, sampler, coord) + read_imagef(src1, sampler, coord);
+ }
+ }
+ dst[y * x_sz + x] = sum;
+}