summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/gbe_bin_generater.cpp2
-rw-r--r--src/CMakeLists.txt23
-rw-r--r--src/cl_context.h16
-rw-r--r--src/cl_mem.c214
-rw-r--r--src/kernels/cl_internal_copy_buf_align1.cl8
-rw-r--r--src/kernels/cl_internal_copy_buf_align16.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_align4.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_rect.cl15
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_same_offset.cl2
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_src_offset.cl2
-rw-r--r--src/kernels/cl_internal_copy_buffer_to_image_2d.cl18
-rw-r--r--src/kernels/cl_internal_copy_buffer_to_image_3d.cl19
-rw-r--r--src/kernels/cl_internal_copy_image_2d_to_2d.cl21
-rw-r--r--src/kernels/cl_internal_copy_image_2d_to_3d.cl22
-rw-r--r--src/kernels/cl_internal_copy_image_2d_to_buffer.cl19
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_2d.cl22
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_3d.cl23
-rw-r--r--src/kernels/cl_internal_copy_image_3d_to_buffer.cl22
19 files changed, 280 insertions, 174 deletions
diff --git a/backend/src/gbe_bin_generater.cpp b/backend/src/gbe_bin_generater.cpp
index a8af0dae..50020b5b 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -298,6 +298,8 @@ int main (int argc, const char **argv)
gen_pci_id = (s[0] - '0') << 12 | (s[1] - '0') << 8 | (s[2] - '0') << 4 | (s[3] - '0');
used_index[optind-1] = 1;
+ // We must set the image base index here, as we invoke the backend in a non-standard way.
+ gbe_set_image_base_index(3);
break;
}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f93ddcd5..20e1a4c6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -23,11 +23,30 @@ foreach (KF ${KERNEL_FILES})
endforeach (KF)
endmacro (MakeKernelBinStr)
+macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES)
+ set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl)
+ set (file_content)
+ file (REMOVE ${output_file})
+ foreach (KF ${KERNEL_NAMES})
+ set (input_file ${KERNEL_PATH}/${KF}.cl)
+ file(READ ${input_file} file_content )
+ STRING(REGEX REPLACE ";" "\\\\;" file_content "${file_content}")
+ file(APPEND ${output_file} ${file_content})
+ endforeach (KF)
+endmacro (MakeBuiltInKernelStr)
+
set (KERNEL_STR_FILES)
-set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4
+set (KERNEL_NAMES cl_internal_copy_buf_align4
cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
-cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset)
+cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
+cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d
+cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
+cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
+cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d)
+set (BUILT_IN_NAME cl_internal_built_in_kernel)
+MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
+MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}")
set(OPENCL_SRC
${KERNEL_STR_FILES}
diff --git a/src/cl_context.h b/src/cl_context.h
index 782a9af3..82d3217a 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -46,14 +46,14 @@ enum _cl_internal_ker_type {
CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
CL_ENQUEUE_COPY_BUFFER_RECT,
- CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d
- CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d
- CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d
- CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d
- CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0, //copy image 2d to buffer
- CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1, //copy image 3d tobuffer
- CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0, //copy buffer to image 2d
- CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1, //copy buffer to image 3d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d
+ CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d
CL_INTERNAL_KERNEL_MAX
};
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 7092385a..87ea317b 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -952,33 +952,19 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
- cl_int index = CL_ENQUEUE_COPY_BUFFER_RECT;
cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0];
cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0];
- static const char *str_kernel =
- "kernel void __cl_cpy_buffer_rect ( \n"
- " global char* src, global char* dst, \n"
- " unsigned int region0, unsigned int region1, unsigned int region2, \n"
- " unsigned int src_offset, unsigned int dst_offset, \n"
- " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n"
- " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n"
- " int i = get_global_id(0); \n"
- " int j = get_global_id(1); \n"
- " int k = get_global_id(2); \n"
- " if((i >= region0) || (j>= region1) || (k>=region2)) \n"
- " return; \n"
- " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n"
- " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n"
- " dst[dst_offset] = src[src_offset]; \n"
- "}";
-
-
/* We use one kernel to copy the data. The kernel is lazily created. */
assert(src_buf->ctx == dst_buf->ctx);
/* setup the kernel and run. */
- ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, NULL);
+ extern char cl_internal_copy_buf_rect_str[];
+ extern int cl_internal_copy_buf_rect_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT,
+ cl_internal_copy_buf_rect_str, (size_t)cl_internal_copy_buf_rect_str_size, NULL);
+
if (!ker)
return CL_OUT_OF_RESOURCES;
@@ -1007,8 +993,6 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
size_t global_off[] = {0,0,0};
size_t global_sz[] = {1,1,1};
size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2};
- cl_int index = CL_ENQUEUE_COPY_IMAGE_0;
- char option[40] = "";
uint32_t fixupDataType;
uint32_t savedIntelFmt;
@@ -1018,15 +1002,6 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
- if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
- strcat(option, "-D SRC_IMAGE_3D");
- index += 1;
- }
- if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
- strcat(option, " -D DST_IMAGE_3D");
- index += 2;
- }
-
switch (src_image->fmt.image_channel_data_type) {
case CL_SNORM_INT8:
case CL_UNORM_INT8: fixupDataType = CL_UNSIGNED_INT8; break;
@@ -1049,54 +1024,41 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
src_image->intel_fmt = cl_image_get_intel_format(&fmt);
dst_image->intel_fmt = src_image->intel_fmt;
}
- static const char *str_kernel =
- "#ifdef SRC_IMAGE_3D \n"
- " #define SRC_IMAGE_TYPE image3d_t \n"
- " #define SRC_COORD_TYPE int4 \n"
- "#else \n"
- " #define SRC_IMAGE_TYPE image2d_t \n"
- " #define SRC_COORD_TYPE int2 \n"
- "#endif \n"
- "#ifdef DST_IMAGE_3D \n"
- " #define DST_IMAGE_TYPE image3d_t \n"
- " #define DST_COORD_TYPE int4 \n"
- "#else \n"
- " #define DST_IMAGE_TYPE image2d_t \n"
- " #define DST_COORD_TYPE int2 \n"
- "#endif \n"
- "kernel void __cl_copy_image ( \n"
- " __read_only SRC_IMAGE_TYPE src_image, __write_only DST_IMAGE_TYPE dst_image, \n"
- " unsigned int region0, unsigned int region1, unsigned int region2, \n"
- " unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n"
- " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { \n"
- " int i = get_global_id(0); \n"
- " int j = get_global_id(1); \n"
- " int k = get_global_id(2); \n"
- " int4 color; \n"
- " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n"
- " SRC_COORD_TYPE src_coord; \n"
- " DST_COORD_TYPE dst_coord; \n"
- " if((i >= region0) || (j>= region1) || (k>=region2)) \n"
- " return; \n"
- " src_coord.x = src_origin0 + i; \n"
- " src_coord.y = src_origin1 + j; \n"
- "#ifdef SRC_IMAGE_3D \n"
- " src_coord.z = src_origin2 + k; \n"
- "#endif \n"
- " dst_coord.x = dst_origin0 + i; \n"
- " dst_coord.y = dst_origin1 + j; \n"
- "#ifdef DST_IMAGE_3D \n"
- " dst_coord.z = dst_origin2 + k; \n"
- "#endif \n"
- " color = read_imagei(src_image, sampler, src_coord); \n"
- " write_imagei(dst_image, dst_coord, color); \n"
- "}";
/* We use one kernel to copy the data. The kernel is lazily created. */
assert(src_image->base.ctx == dst_image->base.ctx);
/* setup the kernel and run. */
- ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option);
+ if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ extern char cl_internal_copy_image_2d_to_2d_str[];
+ extern int cl_internal_copy_image_2d_to_2d_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D,
+ cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL);
+ }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ extern char cl_internal_copy_image_2d_to_3d_str[];
+ extern int cl_internal_copy_image_2d_to_3d_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
+ cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL);
+ }
+ }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ extern char cl_internal_copy_image_3d_to_2d_str[];
+ extern int cl_internal_copy_image_3d_to_2d_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,
+ cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL);
+ }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ extern char cl_internal_copy_image_3d_to_3d_str[];
+ extern int cl_internal_copy_image_3d_to_3d_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
+ cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL);
+ }
+ }
+
if (!ker) {
ret = CL_OUT_OF_RESOURCES;
goto fail;
@@ -1132,8 +1094,6 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image,
size_t global_off[] = {0,0,0};
size_t global_sz[] = {1,1,1};
size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2};
- cl_int index = CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0;
- char option[40] = "";
uint32_t intel_fmt, bpp;
cl_image_format fmt;
size_t origin0, region0;
@@ -1144,42 +1104,6 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image,
global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
- if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
- strcat(option, "-D IMAGE_3D");
- index += 1;
- }
-
- static const char *str_kernel =
- "#ifdef IMAGE_3D \n"
- " #define IMAGE_TYPE image3d_t \n"
- " #define COORD_TYPE int4 \n"
- "#else \n"
- " #define IMAGE_TYPE image2d_t \n"
- " #define COORD_TYPE int2 \n"
- "#endif \n"
- "kernel void __cl_copy_image_to_buffer ( \n"
- " __read_only IMAGE_TYPE image, global uchar* buffer, \n"
- " unsigned int region0, unsigned int region1, unsigned int region2, \n"
- " unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n"
- " unsigned int dst_offset) { \n"
- " int i = get_global_id(0); \n"
- " int j = get_global_id(1); \n"
- " int k = get_global_id(2); \n"
- " uint4 color; \n"
- " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n"
- " COORD_TYPE src_coord; \n"
- " if((i >= region0) || (j>= region1) || (k>=region2)) \n"
- " return; \n"
- " src_coord.x = src_origin0 + i; \n"
- " src_coord.y = src_origin1 + j; \n"
- "#ifdef IMAGE_3D \n"
- " src_coord.z = src_origin2 + k; \n"
- "#endif \n"
- " color = read_imageui(image, sampler, src_coord); \n"
- " dst_offset += (k * region1 + j) * region0 + i; \n"
- " buffer[dst_offset] = color.x; \n"
- "}";
-
/* We use one kernel to copy the data. The kernel is lazily created. */
assert(image->base.ctx == buffer->ctx);
@@ -1195,7 +1119,20 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image,
global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
/* setup the kernel and run. */
- ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option);
+ if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ extern char cl_internal_copy_image_2d_to_buffer_str[];
+ extern int cl_internal_copy_image_2d_to_buffer_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,
+ cl_internal_copy_image_2d_to_buffer_str, (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);
+ }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ extern char cl_internal_copy_image_3d_to_buffer_str[];
+ extern int cl_internal_copy_image_3d_to_buffer_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
+ cl_internal_copy_image_3d_to_buffer_str, (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);
+ }
+
if (!ker) {
ret = CL_OUT_OF_RESOURCES;
goto fail;
@@ -1231,8 +1168,6 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me
size_t global_off[] = {0,0,0};
size_t global_sz[] = {1,1,1};
size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2};
- cl_int index = CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0;
- char option[40] = "";
uint32_t intel_fmt, bpp;
cl_image_format fmt;
size_t origin0, region0;
@@ -1243,41 +1178,6 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me
global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
- if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
- strcat(option, "-D IMAGE_3D");
- index += 1;
- }
-
- static const char *str_kernel =
- "#ifdef IMAGE_3D \n"
- " #define IMAGE_TYPE image3d_t \n"
- " #define COORD_TYPE int4 \n"
- "#else \n"
- " #define IMAGE_TYPE image2d_t \n"
- " #define COORD_TYPE int2 \n"
- "#endif \n"
- "kernel void __cl_copy_image_to_buffer ( \n"
- " __read_only IMAGE_TYPE image, global uchar* buffer, \n"
- " unsigned int region0, unsigned int region1, unsigned int region2, \n"
- " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, \n"
- " unsigned int src_offset) { \n"
- " int i = get_global_id(0); \n"
- " int j = get_global_id(1); \n"
- " int k = get_global_id(2); \n"
- " uint4 color = (uint4)(0); \n"
- " COORD_TYPE dst_coord; \n"
- " if((i >= region0) || (j>= region1) || (k>=region2)) \n"
- " return; \n"
- " dst_coord.x = dst_origin0 + i; \n"
- " dst_coord.y = dst_origin1 + j; \n"
- "#ifdef IMAGE_3D \n"
- " dst_coord.z = dst_origin2 + k; \n"
- "#endif \n"
- " src_offset += (k * region1 + j) * region0 + i; \n"
- " color.x = buffer[src_offset]; \n"
- " write_imageui(image, dst_coord, color); \n"
- "}";
-
/* We use one kernel to copy the data. The kernel is lazily created. */
assert(image->base.ctx == buffer->ctx);
@@ -1293,7 +1193,19 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me
global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
/* setup the kernel and run. */
- ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option);
+ if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ extern char cl_internal_copy_buffer_to_image_2d_str[];
+ extern int cl_internal_copy_buffer_to_image_2d_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,
+ cl_internal_copy_buffer_to_image_2d_str, (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);
+ }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ extern char cl_internal_copy_buffer_to_image_3d_str[];
+ extern int cl_internal_copy_buffer_to_image_3d_str_size;
+
+ ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
+ cl_internal_copy_buffer_to_image_3d_str, (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);
+ }
if (!ker)
return CL_OUT_OF_RESOURCES;
diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl
deleted file mode 100644
index cd3ec7bf..00000000
--- a/src/kernels/cl_internal_copy_buf_align1.cl
+++ /dev/null
@@ -1,8 +0,0 @@
-kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset,
- global char* dst, unsigned int dst_offset,
- unsigned int size)
-{
- int i = get_global_id(0);
- if (i < size)
- dst[i+dst_offset] = src[i+src_offset];
-}
diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl
index 75b1a4a1..1abb4e97 100644
--- a/src/kernels/cl_internal_copy_buf_align16.cl
+++ b/src/kernels/cl_internal_copy_buf_align16.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset,
+kernel void __cl_copy_region_align16 ( global float* src, unsigned int src_offset,
global float* dst, unsigned int dst_offset,
unsigned int size)
{
diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl
index 44a0f81d..27174ca5 100644
--- a/src/kernels/cl_internal_copy_buf_align4.cl
+++ b/src/kernels/cl_internal_copy_buf_align4.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset,
+kernel void __cl_copy_region_align4 ( global float* src, unsigned int src_offset,
global float* dst, unsigned int dst_offset,
unsigned int size)
{
diff --git a/src/kernels/cl_internal_copy_buf_rect.cl b/src/kernels/cl_internal_copy_buf_rect.cl
new file mode 100644
index 00000000..71e7484e
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_rect.cl
@@ -0,0 +1,15 @@
+kernel void __cl_copy_buffer_rect ( global char* src, global char* dst,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_offset, unsigned int dst_offset,
+ unsigned int src_row_pitch, unsigned int src_slice_pitch,
+ unsigned int dst_row_pitch, unsigned int dst_slice_pitch)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_offset += k * src_slice_pitch + j * src_row_pitch + i;
+ dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i;
+ dst[dst_offset] = src[src_offset];
+}
diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
index 13f41626..e02d0e5b 100644
--- a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
+++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset,
+kernel void __cl_copy_region_unalign_dst_offset ( global int* src, unsigned int src_offset,
global int* dst, unsigned int dst_offset,
unsigned int size,
unsigned int first_mask, unsigned int last_mask,
diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
index 85102461..83b6e976 100644
--- a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
+++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset,
+kernel void __cl_copy_region_unalign_same_offset ( global int* src, unsigned int src_offset,
global int* dst, unsigned int dst_offset,
unsigned int size,
unsigned int first_mask, unsigned int last_mask)
diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
index f98368ac..ce0aa1db 100644
--- a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
+++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
@@ -1,4 +1,4 @@
-kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset,
+kernel void __cl_copy_region_unalign_src_offset ( global int* src, unsigned int src_offset,
global int* dst, unsigned int dst_offset,
unsigned int size,
unsigned int first_mask, unsigned int last_mask,
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d.cl b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
new file mode 100644
index 00000000..a218b58a
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
@@ -0,0 +1,18 @@
+kernel void __cl_copy_buffer_to_image_2d(__read_only image2d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+ unsigned int src_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color = (uint4)(0);
+ int2 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ src_offset += (k * region1 + j) * region0 + i;
+ color.x = buffer[src_offset];
+ write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
new file mode 100644
index 00000000..84d3b278
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
@@ -0,0 +1,19 @@
+kernel void __cl_copy_buffer_to_image_3d(__read_only image3d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+ unsigned int src_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color = (uint4)(0);
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ src_offset += (k * region1 + j) * region0 + i;
+ color.x = buffer[src_offset];
+ write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_to_2d.cl
new file mode 100644
index 00000000..c5eaab12
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_2d.cl
@@ -0,0 +1,21 @@
+kernel void __cl_copy_image_2d_to_2d(__read_only image2d_t src_image, __write_only image2d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ int2 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_to_3d.cl
new file mode 100644
index 00000000..4c73a745
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_3d.cl
@@ -0,0 +1,22 @@
+kernel void __cl_copy_image_2d_to_3d(__read_only image2d_t src_image, __write_only image3d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer.cl b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl
new file mode 100644
index 00000000..b6c352ec
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_buffer.cl
@@ -0,0 +1,19 @@
+kernel void __cl_copy_image_2d_to_buffer( __read_only image2d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ color = read_imageui(image, sampler, src_coord);
+ dst_offset += (k * region1 + j) * region0 + i;
+ buffer[dst_offset] = color.x;
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d.cl b/src/kernels/cl_internal_copy_image_3d_to_2d.cl
new file mode 100644
index 00000000..e0effa0b
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_2d.cl
@@ -0,0 +1,22 @@
+kernel void __cl_copy_image_3d_to_2d(__read_only image3d_t src_image, __write_only image2d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int2 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_3d.cl b/src/kernels/cl_internal_copy_image_3d_to_3d.cl
new file mode 100644
index 00000000..de80a0a9
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_3d.cl
@@ -0,0 +1,23 @@
+kernel void __cl_copy_image_3d_to_3d(__read_only image3d_t src_image, __write_only image3d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl
new file mode 100644
index 00000000..dcfc8a24
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_buffer.cl
@@ -0,0 +1,22 @@
+#define IMAGE_TYPE image3d_t
+#define COORD_TYPE int4
+kernel void __cl_copy_image_3d_to_buffer ( __read_only IMAGE_TYPE image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_offset)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ uint4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ COORD_TYPE src_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ color = read_imageui(image, sampler, src_coord);
+ dst_offset += (k * region1 + j) * region0 + i;
+ buffer[dst_offset] = color.x;
+}