summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorLuo <xionghu.luo@intel.com>2014-05-12 12:56:26 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-05-22 18:07:54 +0800
commit4a3e69d6df6141777dd67e97aff2a451bc01aa00 (patch)
tree8de3ee4380cab942ad24ad1e23fef6d9012a8c10 /src/kernels
parentdacaf74130c60502481f896f1a86e6c622d1f128 (diff)
move enqueue_copy_image kernels outside of runtime code.
seperate the kernel code from host code to make it clean; build the kernels offline by gbe_bin_generator to improve the performance. v2: fix the image base issue with the standalone compiler. Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com> Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/cl_internal_copy_buf_align1.cl8
-rw-r--r--src/kernels/cl_internal_copy_buf_align16.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_align4.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_rect.cl15
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_same_offset.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_src_offset.cl2
-rw-r--r--src/kernels/cl_internal_copy_buffer_to_image_2d.cl18
-rw-r--r--src/kernels/cl_internal_copy_buffer_to_image_3d.cl19
-rw-r--r--src/kernels/cl_internal_copy_image_2d_to_2d.cl21
-rw-r--r--src/kernels/cl_internal_copy_image_2d_to_3d.cl22
-rw-r--r--src/kernels/cl_internal_copy_image_2d_to_buffer.cl19
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_2d.cl22
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_3d.cl23
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_buffer.cl22
15 files changed, 186 insertions, 13 deletions
diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl
deleted file mode 100644
index cd3ec7bf..00000000
--- a/src/kernels/cl_internal_copy_buf_align1.cl
+++ /dev/null
@@ -1,8 +0,0 @@
-kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset,
- global char* dst, unsigned int dst_offset,
- unsigned int size)
-{
- int i = get_global_id(0);
- if (i < size)
- dst[i+dst_offset] = src[i+src_offset];
-}
diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl
index 75b1a4a1..1abb4e97 100644
--- a/src/kernels/cl_internal_copy_buf_align16.cl
+++ b/src/kernels/cl_internal_copy_buf_align16.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset,
+kernel void __cl_copy_region_align16 ( global float* src, unsigned int src_offset,
global float* dst, unsigned int dst_offset,
unsigned int size)
{
diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl
index 44a0f81d..27174ca5 100644
--- a/src/kernels/cl_internal_copy_buf_align4.cl
+++ b/src/kernels/cl_internal_copy_buf_align4.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset,
+kernel void __cl_copy_region_align4 ( global float* src, unsigned int src_offset,
global float* dst, unsigned int dst_offset,
unsigned int size)
{
diff --git a/src/kernels/cl_internal_copy_buf_rect.cl b/src/kernels/cl_internal_copy_buf_rect.cl
new file mode 100644
index 00000000..71e7484e
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_rect.cl
@@ -0,0 +1,15 @@
+kernel void __cl_copy_buffer_rect ( global char* src, global char* dst,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_offset, unsigned int dst_offset,
+ unsigned int src_row_pitch, unsigned int src_slice_pitch,
+ unsigned int dst_row_pitch, unsigned int dst_slice_pitch)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_offset += k * src_slice_pitch + j * src_row_pitch + i;
+ dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i;
+ dst[dst_offset] = src[src_offset];
+}
diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
index 13f41626..e02d0e5b 100644
--- a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
+++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset,
+kernel void __cl_copy_region_unalign_dst_offset ( global int* src, unsigned int src_offset,
global int* dst, unsigned int dst_offset,
unsigned int size,
unsigned int first_mask, unsigned int last_mask,
diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
index 85102461..83b6e976 100644
--- a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
+++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset,
+kernel void __cl_copy_region_unalign_same_offset ( global int* src, unsigned int src_offset,
global int* dst, unsigned int dst_offset,
unsigned int size,
unsigned int first_mask, unsigned int last_mask)
diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
index f98368ac..ce0aa1db 100644
--- a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
+++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset,
+kernel void __cl_copy_region_unalign_src_offset ( global int* src, unsigned int src_offset,
global int* dst, unsigned int dst_offset,
unsigned int size,
unsigned int first_mask, unsigned int last_mask,
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d.cl b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
new file mode 100644
index 00000000..a218b58a
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
@@ -0,0 +1,18 @@
+kernel void __cl_copy_buffer_to_image_2d(__read_only image2d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+ unsigned int src_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color = (uint4)(0);
+ int2 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ src_offset += (k * region1 + j) * region0 + i;
+ color.x = buffer[src_offset];
+ write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
new file mode 100644
index 00000000..84d3b278
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
@@ -0,0 +1,19 @@
+kernel void __cl_copy_buffer_to_image_3d(__read_only image3d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+ unsigned int src_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color = (uint4)(0);
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ src_offset += (k * region1 + j) * region0 + i;
+ color.x = buffer[src_offset];
+ write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_to_2d.cl
new file mode 100644
index 00000000..c5eaab12
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_2d.cl
@@ -0,0 +1,21 @@
+kernel void __cl_copy_image_2d_to_2d(__read_only image2d_t src_image, __write_only image2d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ int2 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_to_3d.cl
new file mode 100644
index 00000000..4c73a745
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_3d.cl
@@ -0,0 +1,22 @@
+kernel void __cl_copy_image_2d_to_3d(__read_only image2d_t src_image, __write_only image3d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer.cl b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl
new file mode 100644
index 00000000..b6c352ec
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl
@@ -0,0 +1,19 @@
+kernel void __cl_copy_image_2d_to_buffer( __read_only image2d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ color = read_imageui(image, sampler, src_coord);
+ dst_offset += (k * region1 + j) * region0 + i;
+ buffer[dst_offset] = color.x;
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d.cl b/src/kernels/cl_internal_copy_image_3d_to_2d.cl
new file mode 100644
index 00000000..e0effa0b
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_2d.cl
@@ -0,0 +1,22 @@
+kernel void __cl_copy_image_3d_to_2d(__read_only image3d_t src_image, __write_only image2d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int2 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_3d.cl b/src/kernels/cl_internal_copy_image_3d_to_3d.cl
new file mode 100644
index 00000000..de80a0a9
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_3d.cl
@@ -0,0 +1,23 @@
+kernel void __cl_copy_image_3d_to_3d(__read_only image3d_t src_image, __write_only image3d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl
new file mode 100644
index 00000000..dcfc8a24
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl
@@ -0,0 +1,22 @@
+#define IMAGE_TYPE image3d_t
+#define COORD_TYPE int4
+kernel void __cl_copy_image_3d_to_buffer ( __read_only IMAGE_TYPE image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ COORD_TYPE src_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ color = read_imageui(image, sampler, src_coord);
+ dst_offset += (k * region1 + j) * region0 + i;
+ buffer[dst_offset] = color.x;
+}