diff options
author | Luo <xionghu.luo@intel.com> | 2014-05-12 12:56:26 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-05-22 18:07:54 +0800 |
commit | 4a3e69d6df6141777dd67e97aff2a451bc01aa00 (patch) | |
tree | 8de3ee4380cab942ad24ad1e23fef6d9012a8c10 /src/kernels | |
parent | dacaf74130c60502481f896f1a86e6c622d1f128 (diff) |
move enqueue_copy_image kernels outside of runtime code.
seperate the kernel code from host code to make it clean; build the
kernels offline by gbe_bin_generator to improve the performance.
v2:
fix the image base issue with the standalone compiler.
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/cl_internal_copy_buf_align1.cl | 8 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_align16.cl | 2 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_align4.cl | 2 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_rect.cl | 15 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl | 2 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_unalign_same_offset.cl | 2 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_unalign_src_offset.cl | 2 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buffer_to_image_2d.cl | 18 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buffer_to_image_3d.cl | 19 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_image_2d_to_2d.cl | 21 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_image_2d_to_3d.cl | 22 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_image_2d_to_buffer.cl | 19 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_image_3d_to_2d.cl | 22 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_image_3d_to_3d.cl | 23 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_image_3d_to_buffer.cl | 22 |
15 files changed, 186 insertions, 13 deletions
diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl deleted file mode 100644 index cd3ec7bf..00000000 --- a/src/kernels/cl_internal_copy_buf_align1.cl +++ /dev/null @@ -1,8 +0,0 @@ -kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset, - global char* dst, unsigned int dst_offset, - unsigned int size) -{ - int i = get_global_id(0); - if (i < size) - dst[i+dst_offset] = src[i+src_offset]; -} diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl index 75b1a4a1..1abb4e97 100644 --- a/src/kernels/cl_internal_copy_buf_align16.cl +++ b/src/kernels/cl_internal_copy_buf_align16.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset, +kernel void __cl_copy_region_align16 ( global float* src, unsigned int src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl index 44a0f81d..27174ca5 100644 --- a/src/kernels/cl_internal_copy_buf_align4.cl +++ b/src/kernels/cl_internal_copy_buf_align4.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset, +kernel void __cl_copy_region_align4 ( global float* src, unsigned int src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { diff --git a/src/kernels/cl_internal_copy_buf_rect.cl b/src/kernels/cl_internal_copy_buf_rect.cl new file mode 100644 index 00000000..71e7484e --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_rect.cl @@ -0,0 +1,15 @@ +kernel void __cl_copy_buffer_rect ( global char* src, global char* dst, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_offset, unsigned int dst_offset, + unsigned int src_row_pitch, unsigned int src_slice_pitch, + unsigned int dst_row_pitch, unsigned int dst_slice_pitch) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_offset += k * src_slice_pitch + j * src_row_pitch + i; + dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; + dst[dst_offset] = src[src_offset]; +} diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl index 13f41626..e02d0e5b 100644 --- a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl +++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset, +kernel void __cl_copy_region_unalign_dst_offset ( global int* src, unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask, diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl index 85102461..83b6e976 100644 --- a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl +++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset, +kernel void __cl_copy_region_unalign_same_offset ( global int* src, unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask) diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl index f98368ac..ce0aa1db 100644 --- a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl +++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl @@ -1,4 +1,4 @@ -kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset, +kernel void __cl_copy_region_unalign_src_offset ( global int* src, unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask, diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d.cl b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl new file mode 100644 index 00000000..a218b58a --- /dev/null +++ b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl @@ -0,0 +1,18 @@ +kernel void __cl_copy_buffer_to_image_2d(__read_only image2d_t image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, + unsigned int src_offset) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color = (uint4)(0); + int2 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + src_offset += (k * region1 + j) * region0 + i; + color.x = buffer[src_offset]; + write_imageui(image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl new file mode 100644 index 00000000..84d3b278 --- /dev/null +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl @@ -0,0 +1,19 @@ +kernel void __cl_copy_buffer_to_image_3d(__read_only image3d_t image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, + unsigned int src_offset) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color = (uint4)(0); + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + src_offset += (k * region1 + j) * region0 + i; + color.x = buffer[src_offset]; + write_imageui(image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_to_2d.cl new file mode 100644 index 00000000..c5eaab12 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_2d.cl @@ -0,0 +1,21 @@ +kernel void __cl_copy_image_2d_to_2d(__read_only image2d_t src_image, __write_only image2d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + int2 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_2d_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_to_3d.cl new file mode 100644 index 00000000..4c73a745 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_3d.cl @@ -0,0 +1,22 @@ +kernel void __cl_copy_image_2d_to_3d(__read_only image2d_t src_image, __write_only image3d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer.cl b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl new file mode 100644 index 00000000..b6c352ec --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl @@ -0,0 +1,19 @@ +kernel void __cl_copy_image_2d_to_buffer( __read_only image2d_t image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_offset) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + color = read_imageui(image, sampler, src_coord); + dst_offset += (k * region1 + j) * region0 + i; + buffer[dst_offset] = color.x; +} diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d.cl b/src/kernels/cl_internal_copy_image_3d_to_2d.cl new file mode 100644 index 00000000..e0effa0b --- /dev/null +++ b/src/kernels/cl_internal_copy_image_3d_to_2d.cl @@ -0,0 +1,22 @@ +kernel void __cl_copy_image_3d_to_2d(__read_only image3d_t src_image, __write_only image2d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int2 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_3d_to_3d.cl b/src/kernels/cl_internal_copy_image_3d_to_3d.cl new file mode 100644 index 00000000..de80a0a9 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_3d_to_3d.cl @@ -0,0 +1,23 @@ +kernel void __cl_copy_image_3d_to_3d(__read_only image3d_t src_image, __write_only image3d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl new file mode 100644 index 00000000..dcfc8a24 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl @@ -0,0 +1,22 @@ +#define IMAGE_TYPE image3d_t +#define COORD_TYPE int4 +kernel void __cl_copy_image_3d_to_buffer ( __read_only IMAGE_TYPE image, global uchar* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_offset) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + uint4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + COORD_TYPE src_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + color = read_imageui(image, sampler, src_coord); + dst_offset += (k * region1 + j) * region0 + i; + buffer[dst_offset] = color.x; +} |