diff options
author | Yan Wang <yan.wang@linux.intel.com> | 2017-05-25 15:10:19 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-05-25 17:56:55 +0800 |
commit | 6804cca263edd11fd03b2d7f5b7ba034d9a013c3 (patch) | |
tree | cf901363c4be6e3603877e2cf727b19355e6313c | |
parent | c443e7a817d5d5dd8c12c5e57f453e00e187d102 (diff) |
Fix bug of clEnqueueCopyBufferToImage and clEnqueueCopyImageToBuffer.
"imagedim_non_pow_2" cases of basic modudle of confrmance shows
regression after use TILE_Y mode for large image by previous patch.
This bug comes from the non-align16 kernel of clEnqueueCopyBufferToImage
and clEnqueueCopyImageToBuffer.
It will force CL_RGBA/CL_UNORM_INT8/8191x8192 image of conformance test
to CL_R/CL_UNSIGNED_INT8/32764x8192 image for copying.
So it makes width as 8191 x 4 = 32764 and its width will exceed the maximum
width (16 x 1024 = 16384) of GEN surface state structure which only has 14 bits.
So use align4 copy kernel to avoid this bug.
Signed-off-by: Yan Wang <yan.wang@linux.intel.com>
-rw-r--r-- | src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/cl_context.h | 2 | ||||
-rw-r--r-- | src/cl_mem.c | 78 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl | 18 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl | 18 |
5 files changed, 89 insertions, 28 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f87a6376..87ad48bb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ cl_internal_copy_image_2d_array_to_2d_array cl_internal_copy_image_2d_array_to_2 cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_2d_to_buffer_align16 cl_internal_copy_image_3d_to_buffer cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_2d_align16 cl_internal_copy_buffer_to_image_3d +cl_internal_copy_buffer_to_image_2d_align4 cl_internal_copy_image_2d_to_buffer_align4 cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign cl_internal_fill_buf_align128 cl_internal_fill_image_1d diff --git a/src/cl_context.h b/src/cl_context.h index 8ba499f7..75bf8952 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -62,9 +62,11 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d array CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16, + CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16, + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, pattern size=1 CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern, pattern size=2 diff --git a/src/cl_mem.c b/src/cl_mem.c index 0c49c3d7..a8543c9e 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -2146,6 +2146,36 @@ fail: return ret; } +#define ALIGN16 16 +#define ALIGN4 4 +#define ALIGN1 1 + +static size_t +get_align_size_for_copy_kernel(struct _cl_mem_image* image, const size_t origin0, const size_t region0, + const size_t offset, cl_image_format *fmt) { + size_t align_size = 0; + + if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN16 == 0) && + ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) && (offset % ALIGN16 == 0)){ + fmt->image_channel_order = CL_RGBA; + fmt->image_channel_data_type = CL_UNSIGNED_INT32; + align_size = ALIGN16; + } + else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN4 == 0) && + ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) && (offset % ALIGN4 == 0)){ + fmt->image_channel_order = CL_R; + fmt->image_channel_data_type = CL_UNSIGNED_INT32; + align_size = ALIGN4; + } + else{ + fmt->image_channel_order = CL_R; + fmt->image_channel_data_type = CL_UNSIGNED_INT8; + align_size = ALIGN1; + } + + return align_size; +} + LOCAL cl_int cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_mem_image* image, cl_mem buffer, const size_t *src_origin, const size_t dst_offset, const size_t *region) { @@ -2158,7 +2188,6 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m cl_image_format fmt; size_t origin0, region0; size_t kn_dst_offset; - int align16 = 0; size_t align_size = 1; size_t w_saved; @@ -2176,18 +2205,7 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m w_saved = image->w; region0 = region[0] * bpp; kn_dst_offset = dst_offset; - if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) && - ((src_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (dst_offset % 16 == 0)){ - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - align16 = 1; - align_size = 16; - } - else{ - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - align_size = 1; - } + align_size = get_align_size_for_copy_kernel(image, src_origin[0], region0, dst_offset, &fmt); image->intel_fmt = cl_image_get_intel_format(&fmt); image->w = (image->w * image->bpp) / align_size; image->bpp = align_size; @@ -2198,7 +2216,7 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m /* setup the kernel and run. */ if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { - if(align16){ + if(align_size == ALIGN16){ extern char cl_internal_copy_image_2d_to_buffer_align16_str[]; extern size_t cl_internal_copy_image_2d_to_buffer_align16_str_size; @@ -2206,6 +2224,14 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m cl_internal_copy_image_2d_to_buffer_align16_str, (size_t)cl_internal_copy_image_2d_to_buffer_align16_str_size, NULL); } + else if(align_size == ALIGN4){ + extern char cl_internal_copy_image_2d_to_buffer_align4_str[]; + extern size_t cl_internal_copy_image_2d_to_buffer_align4_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4, + cl_internal_copy_image_2d_to_buffer_align4_str, + (size_t)cl_internal_copy_image_2d_to_buffer_align4_str_size, NULL); + } else{ extern char cl_internal_copy_image_2d_to_buffer_str[]; extern size_t cl_internal_copy_image_2d_to_buffer_str_size; @@ -2262,7 +2288,6 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe cl_image_format fmt; size_t origin0, region0; size_t kn_src_offset; - int align16 = 0; size_t align_size = 1; size_t w_saved = 0; @@ -2280,18 +2305,7 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe w_saved = image->w; region0 = region[0] * bpp; kn_src_offset = src_offset; - if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) && - ((dst_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (src_offset % 16 == 0)){ - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - align16 = 1; - align_size = 16; - } - else{ - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - align_size = 1; - } + align_size = get_align_size_for_copy_kernel(image, dst_origin[0], region0, src_offset, &fmt); image->intel_fmt = cl_image_get_intel_format(&fmt); image->w = (image->w * image->bpp) / align_size; image->bpp = align_size; @@ -2302,7 +2316,7 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe /* setup the kernel and run. */ if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { - if(align16){ + if(align_size == ALIGN16){ extern char cl_internal_copy_buffer_to_image_2d_align16_str[]; extern size_t cl_internal_copy_buffer_to_image_2d_align16_str_size; @@ -2310,6 +2324,14 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe cl_internal_copy_buffer_to_image_2d_align16_str, (size_t)cl_internal_copy_buffer_to_image_2d_align16_str_size, NULL); } + else if(align_size == ALIGN4){ + extern char cl_internal_copy_buffer_to_image_2d_align4_str[]; + extern size_t cl_internal_copy_buffer_to_image_2d_align4_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4, + cl_internal_copy_buffer_to_image_2d_align4_str, + (size_t)cl_internal_copy_buffer_to_image_2d_align4_str_size, NULL); + } else{ extern char cl_internal_copy_buffer_to_image_2d_str[]; extern size_t cl_internal_copy_buffer_to_image_2d_str_size; diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl new file mode 100644 index 00000000..79a3d8cd --- /dev/null +++ b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl @@ -0,0 +1,18 @@ +kernel void __cl_copy_buffer_to_image_2d_align4(__write_only image2d_t image, global uint* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, + unsigned int src_offset) +{ + int i = get_global_id(0); + int j = get_global_id(1); + uint4 color = (uint4)(0); + int2 dst_coord; + if((i >= region0) || (j>= region1)) + return; + dst_coord.x = dst_origin0 + i; + dst_coord.y = dst_origin1 + j; + src_offset += j * region0 + i; + color.x = buffer[src_offset]; + write_imageui(image, dst_coord, color.x); +} + diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl new file mode 100644 index 00000000..dc76e024 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl @@ -0,0 +1,18 @@ +kernel void __cl_copy_image_2d_to_buffer_align4( __read_only image2d_t image, global uint* buffer, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_offset) +{ + + int i = get_global_id(0); + int j = get_global_id(1); + if((i >= region0) || (j>= region1)) + return; + uint4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int2 src_coord; + src_coord.x = src_origin0 + i; + src_coord.y = src_origin1 + j; + color = read_imageui(image, sampler, src_coord); + *(buffer + dst_offset + region0*j + i) = color.x; +} |