From 65187362d44bda5e2c6e059676c5a16b4aa70647 Mon Sep 17 00:00:00 2001 From: Chuanbo Weng Date: Fri, 6 Feb 2015 11:52:30 +0800 Subject: Implement 1D/2D image array related cl_mem_kernel_copy_image in cl way instead of cpu way. Before this patch, cl_mem_kernel_copy_image do cpu memory copy in order to copy image array objects. This is very slow for large image size. This patch implement image array copy in cl way, which dramatically accelerate image array related clEnqueueCopyImage. clCopyImage case in OpenCL conformance test will not be blocked anymore. Signed-off-by: Chuanbo Weng Reviewed-by: Zhigang Gong --- src/CMakeLists.txt | 3 ++ src/cl_context.h | 6 +++ src/cl_mem.c | 43 ++++++++++++++++------ .../cl_internal_copy_image_1d_array_to_1d_array.cl | 21 +++++++++++ .../cl_internal_copy_image_2d_array_to_2d.cl | 21 +++++++++++ .../cl_internal_copy_image_2d_array_to_2d_array.cl | 23 ++++++++++++ .../cl_internal_copy_image_2d_array_to_3d.cl | 23 ++++++++++++ .../cl_internal_copy_image_2d_to_2d_array.cl | 21 +++++++++++ .../cl_internal_copy_image_3d_to_2d_array.cl | 23 ++++++++++++ 9 files changed, 172 insertions(+), 12 deletions(-) create mode 100644 src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_3d.cl create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d_array.cl create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d_array.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a55f84d0..939f58da 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -46,6 +46,9 @@ cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset cl_internal_copy_buf_rect cl_internal_copy_buf_rect_align4 cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d +cl_internal_copy_image_2d_to_2d_array cl_internal_copy_image_1d_array_to_1d_array +cl_internal_copy_image_2d_array_to_2d_array cl_internal_copy_image_2d_array_to_2d +cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 diff --git a/src/cl_context.h b/src/cl_context.h index 38ad2fd4..2ea0a73a 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -53,6 +53,12 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d + CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, //copy image 2d to image 2d array + CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 1d array + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 2d array + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, //copy image 2d array to image 2d + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, //copy image 2d array to image 3d + CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d array CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d diff --git a/src/cl_mem.c b/src/cl_mem.c index 99554f3b..5243efec 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -1612,27 +1612,43 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + extern char cl_internal_copy_image_2d_to_2d_array_str[]; + extern size_t cl_internal_copy_image_2d_to_2d_array_str_size; - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); - return CL_SUCCESS; + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, + cl_internal_copy_image_2d_to_2d_array_str, (size_t)cl_internal_copy_image_2d_to_2d_array_str_size, NULL); } } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + extern char cl_internal_copy_image_1d_array_to_1d_array_str[]; + extern size_t cl_internal_copy_image_1d_array_to_1d_array_str_size; - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); - return CL_SUCCESS; + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, + cl_internal_copy_image_1d_array_to_1d_array_str, + (size_t)cl_internal_copy_image_1d_array_to_1d_array_str_size, NULL); } } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + extern char cl_internal_copy_image_2d_array_to_2d_array_str[]; + extern size_t cl_internal_copy_image_2d_array_to_2d_array_str_size; - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); - return CL_SUCCESS; + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, + cl_internal_copy_image_2d_array_to_2d_array_str, + (size_t)cl_internal_copy_image_2d_array_to_2d_array_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); - return CL_SUCCESS; + extern char cl_internal_copy_image_2d_array_to_2d_str[]; + extern size_t cl_internal_copy_image_2d_array_to_2d_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, + cl_internal_copy_image_2d_array_to_2d_str, + (size_t)cl_internal_copy_image_2d_array_to_2d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); - return CL_SUCCESS; + extern char cl_internal_copy_image_2d_array_to_3d_str[]; + extern size_t cl_internal_copy_image_2d_array_to_3d_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, + cl_internal_copy_image_2d_array_to_3d_str, + (size_t)cl_internal_copy_image_2d_array_to_3d_str_size, NULL); } } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { @@ -1648,8 +1664,11 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); - return CL_SUCCESS; + extern char cl_internal_copy_image_3d_to_2d_array_str[]; + extern size_t cl_internal_copy_image_3d_to_2d_array_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, + cl_internal_copy_image_3d_to_2d_array_str, (size_t)cl_internal_copy_image_3d_to_2d_array_str_size, NULL); } } diff --git a/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl new file mode 100644 index 00000000..0c7c6e2b --- /dev/null +++ b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl @@ -0,0 +1,21 @@ +kernel void __cl_copy_image_1d_array_to_1d_array(__read_only image1d_array_t src_image, __write_only image1d_array_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + int2 dst_coord; + if((i >= region0) || (k>=region2)) + return; + + src_coord.x = src_origin0 + i; + src_coord.y = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl new file mode 100644 index 00000000..89e36c07 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl @@ -0,0 +1,21 @@ +kernel void __cl_copy_image_2d_array_to_2d(__read_only image2d_array_t src_image, __write_only image2d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int2 dst_coord; + if((i >= region0) || (j>= region1)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl new file mode 100644 index 00000000..36536605 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl @@ -0,0 +1,23 @@ +kernel void __cl_copy_image_2d_array_to_2d_array(__read_only image2d_array_t src_image, __write_only image2d_array_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl new file mode 100644 index 00000000..424f6b5e --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl @@ -0,0 +1,23 @@ +kernel void __cl_copy_image_2d_array_to_3d(__read_only image2d_array_t src_image, __write_only image3d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl new file mode 100644 index 00000000..4384f010 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl @@ -0,0 +1,21 @@ +kernel void __cl_copy_image_2d_to_2d_array(__read_only image2d_t src_image, __write_only image2d_array_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl new file mode 100644 index 00000000..8041a320 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl @@ -0,0 +1,23 @@ +kernel void __cl_copy_image_3d_to_2d_array(__read_only image3d_t src_image, __write_only image2d_array_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int4 src_coord; + int4 dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + src_coord.z = src_origin2 + k; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + dst_coord.z = dst_origin2 + k; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} -- cgit v1.2.3