summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYan Wang <yan.wang@linux.intel.com>2017-06-13 16:31:42 +0800
committerYang Rong <rong.r.yang@intel.com>2017-06-14 17:28:14 +0800
commit7f1c190c1a419d9eff946018638dfdc57b207799 (patch)
treef6e6b8a271f1fcdd72e332e76e86cf127b09f7a2
parent38ca78d9397cbe1a2da81a3f80b76ab4c1b4c689 (diff)
Use aligned16 and aligne4 kernel to copy for large 3D image with TILE_Y.
It is similar with 2D image for avoiding extended image width truncated. Signed-off-by: Yan Wang <yan.wang@linux.intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/cl_context.h60
-rw-r--r--src/cl_mem.c50
-rw-r--r--src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl18
-rw-r--r--src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl18
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl19
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl19
7 files changed, 149 insertions, 37 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 87ad48bb..ecb98b96 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,6 +54,8 @@ cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array
cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_2d_to_buffer_align16 cl_internal_copy_image_3d_to_buffer
cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_2d_align16 cl_internal_copy_buffer_to_image_3d
cl_internal_copy_buffer_to_image_2d_align4 cl_internal_copy_image_2d_to_buffer_align4
+cl_internal_copy_buffer_to_image_3d_align4 cl_internal_copy_image_3d_to_buffer_align4
+cl_internal_copy_buffer_to_image_3d_align16 cl_internal_copy_image_3d_to_buffer_align16
cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
cl_internal_fill_buf_align128 cl_internal_fill_image_1d
diff --git a/src/cl_context.h b/src/cl_context.h
index 75bf8952..3a2e13be 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -49,38 +49,42 @@ enum _cl_internal_ker_type {
CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
CL_ENQUEUE_COPY_BUFFER_RECT,
CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4,
- CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d
- CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
- CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
- CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
- CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
- CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, //copy image 2d to image 2d array
- CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 1d array
- CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 2d array
- CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, //copy image 2d array to image 2d
- CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, //copy image 2d array to image 3d
- CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d array
- CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer
+ CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, // copy image 1d to image 1d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, // copy image 2d to image 2d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, // copy image 3d to image 2d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, // copy image 2d to image 3d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, // copy image 3d to image 3d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, // copy image 2d to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, // copy image 1d array to image 1d array
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, // copy image 2d array to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, // copy image 2d array to image 2d
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, // copy image 2d array to image 3d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, // copy image 3d to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, // copy image 2d to buffer
CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,
CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
- CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer
- CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, // copy image 3d tobuffer
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, // copy buffer to image 2d
CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,
CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
- CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d
- CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, pattern size=1
- CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern, pattern size=2
- CL_ENQUEUE_FILL_BUFFER_ALIGN4, //fill buffer with 4 aligne pattern, pattern size=4
- CL_ENQUEUE_FILL_BUFFER_ALIGN8_8, //fill buffer with 8 aligne pattern, pattern size=8
- CL_ENQUEUE_FILL_BUFFER_ALIGN8_16, //fill buffer with 16 aligne pattern, pattern size=16
- CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, //fill buffer with 16 aligne pattern, pattern size=32
- CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, //fill buffer with 16 aligne pattern, pattern size=64
- CL_ENQUEUE_FILL_BUFFER_ALIGN128, //fill buffer with 128 aligne pattern, pattern size=128
- CL_ENQUEUE_FILL_IMAGE_1D, //fill image 1d
- CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, //fill image 1d array
- CL_ENQUEUE_FILL_IMAGE_2D, //fill image 2d
- CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, //fill image 2d array
- CL_ENQUEUE_FILL_IMAGE_3D, //fill image 3d
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, // copy buffer to image 3d
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,
+ CL_ENQUEUE_FILL_BUFFER_UNALIGN, // fill buffer with 1 aligne pattern, pattern size=1
+ CL_ENQUEUE_FILL_BUFFER_ALIGN2, // fill buffer with 2 aligne pattern, pattern size=2
+ CL_ENQUEUE_FILL_BUFFER_ALIGN4, // fill buffer with 4 aligne pattern, pattern size=4
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_8, // fill buffer with 8 aligne pattern, pattern size=8
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_16, // fill buffer with 16 aligne pattern, pattern size=16
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, // fill buffer with 16 aligne pattern, pattern size=32
+ CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, // fill buffer with 16 aligne pattern, pattern size=64
+ CL_ENQUEUE_FILL_BUFFER_ALIGN128, // fill buffer with 128 aligne pattern, pattern size=128
+ CL_ENQUEUE_FILL_IMAGE_1D, // fill image 1d
+ CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, // fill image 1d array
+ CL_ENQUEUE_FILL_IMAGE_2D, // fill image 2d
+ CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, // fill image 2d array
+ CL_ENQUEUE_FILL_IMAGE_3D, // fill image 3d
CL_INTERNAL_KERNEL_MAX
};
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b6dce3f3..ad92234b 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -2162,14 +2162,13 @@ get_align_size_for_copy_kernel(struct _cl_mem_image* image, const size_t origin0
const size_t offset, cl_image_format *fmt) {
size_t align_size = 0;
- if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN16 == 0) &&
- ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) && (offset % ALIGN16 == 0)){
+ if (((image->w * image->bpp) % ALIGN16 == 0) && ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) &&
+ (offset % ALIGN16 == 0)) {
fmt->image_channel_order = CL_RGBA;
fmt->image_channel_data_type = CL_UNSIGNED_INT32;
align_size = ALIGN16;
- }
- else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN4 == 0) &&
- ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) && (offset % ALIGN4 == 0)){
+ } else if (((image->w * image->bpp) % ALIGN4 == 0) && ((origin0 * image->bpp) % ALIGN4 == 0) &&
+ (region0 % ALIGN4 == 0) && (offset % ALIGN4 == 0)) {
fmt->image_channel_order = CL_R;
fmt->image_channel_data_type = CL_UNSIGNED_INT32;
align_size = ALIGN4;
@@ -2247,11 +2246,28 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
cl_internal_copy_image_2d_to_buffer_str, (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);
}
}else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
- extern char cl_internal_copy_image_3d_to_buffer_str[];
- extern size_t cl_internal_copy_image_3d_to_buffer_str_size;
+ if (align_size == ALIGN16) {
+ extern char cl_internal_copy_image_3d_to_buffer_align16_str[];
+ extern size_t cl_internal_copy_image_3d_to_buffer_align16_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,
+ cl_internal_copy_image_3d_to_buffer_align16_str,
+ (size_t)cl_internal_copy_image_3d_to_buffer_align16_str_size, NULL);
+ } else if (align_size == ALIGN4) {
+ extern char cl_internal_copy_image_3d_to_buffer_align4_str[];
+ extern size_t cl_internal_copy_image_3d_to_buffer_align4_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,
+ cl_internal_copy_image_3d_to_buffer_align4_str,
+ (size_t)cl_internal_copy_image_3d_to_buffer_align4_str_size, NULL);
+ } else {
+ extern char cl_internal_copy_image_3d_to_buffer_str[];
+ extern size_t cl_internal_copy_image_3d_to_buffer_str_size;
- ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
- cl_internal_copy_image_3d_to_buffer_str, (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
+ cl_internal_copy_image_3d_to_buffer_str,
+ (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);
+ }
}
if (!ker) {
@@ -2347,11 +2363,27 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
cl_internal_copy_buffer_to_image_2d_str, (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);
}
}else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ if (align_size == ALIGN16) {
+ extern char cl_internal_copy_buffer_to_image_3d_align16_str[];
+ extern size_t cl_internal_copy_buffer_to_image_3d_align16_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,
+ cl_internal_copy_buffer_to_image_3d_align16_str,
+ (size_t)cl_internal_copy_buffer_to_image_3d_align16_str_size, NULL);
+ } else if (align_size == ALIGN4) {
+ extern char cl_internal_copy_buffer_to_image_3d_align4_str[];
+ extern size_t cl_internal_copy_buffer_to_image_3d_align4_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,
+ cl_internal_copy_buffer_to_image_3d_align4_str,
+ (size_t)cl_internal_copy_buffer_to_image_3d_align4_str_size, NULL);
+ } else {
extern char cl_internal_copy_buffer_to_image_3d_str[];
extern size_t cl_internal_copy_buffer_to_image_3d_str_size;
ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
cl_internal_copy_buffer_to_image_3d_str, (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);
+ }
}
if (!ker)
return CL_OUT_OF_RESOURCES;
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
new file mode 100644
index 00000000..b57b4878
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
@@ -0,0 +1,18 @@
+kernel void __cl_copy_buffer_to_image_3d_align16(__write_only image3d_t image, global uint4 *buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int dst_origin0, unsigned int dst_origin1,
+ unsigned int dst_origin2, unsigned int src_offset) {
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color = (uint4)(0);
+ int4 dst_coord;
+ if ((i >= region0) || (j >= region1) || (k >= region2))
+ return;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ src_offset += (k * region1 + j) * region0 + i;
+ color = buffer[src_offset];
+ write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
new file mode 100644
index 00000000..717af979
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
@@ -0,0 +1,18 @@
+kernel void __cl_copy_buffer_to_image_3d_align4(__write_only image3d_t image, global uint *buffer, unsigned int region0,
+ unsigned int region1, unsigned int region2, unsigned int dst_origin0,
+ unsigned int dst_origin1, unsigned int dst_origin2,
+ unsigned int src_offset) {
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color = (uint4)(0);
+ int4 dst_coord;
+ if ((i >= region0) || (j >= region1) || (k >= region2))
+ return;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ src_offset += (k * region1 + j) * region0 + i;
+ color.x = buffer[src_offset];
+ write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
new file mode 100644
index 00000000..a7a3c2e3
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
@@ -0,0 +1,19 @@
+kernel void __cl_copy_image_3d_to_buffer_align16(__read_only image3d_t image, global uint4 *buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1,
+ unsigned int src_origin2, unsigned int dst_offset) {
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ if ((i >= region0) || (j >= region1) || (k >= region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ color = read_imageui(image, sampler, src_coord);
+ dst_offset += (k * region1 + j) * region0 + i;
+ *(buffer + dst_offset) = color;
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
new file mode 100644
index 00000000..bb001afd
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
@@ -0,0 +1,19 @@
+kernel void __cl_copy_image_3d_to_buffer_align4(__read_only image3d_t image, global uint *buffer, unsigned int region0,
+ unsigned int region1, unsigned int region2, unsigned int src_origin0,
+ unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_offset) {
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ if ((i >= region0) || (j >= region1) || (k >= region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ color = read_imageui(image, sampler, src_coord);
+ dst_offset += (k * region1 + j) * region0 + i;
+ buffer[dst_offset] = color.x;
+}