diff options
-rw-r--r-- | src/CMakeLists.txt | 4 | ||||
-rw-r--r-- | src/cl_api.c | 78 | ||||
-rw-r--r-- | src/cl_context.c | 2 | ||||
-rw-r--r-- | src/cl_context.h | 7 | ||||
-rw-r--r-- | src/cl_enqueue.c | 1 | ||||
-rw-r--r-- | src/cl_enqueue.h | 1 | ||||
-rw-r--r-- | src/cl_gt_device.h | 7 | ||||
-rw-r--r-- | src/cl_khr_icd.c | 2 | ||||
-rw-r--r-- | src/cl_mem.c | 106 | ||||
-rw-r--r-- | src/cl_mem.h | 3 | ||||
-rw-r--r-- | src/kernels/cl_internal_fill_image_1d.cl | 14 | ||||
-rw-r--r-- | src/kernels/cl_internal_fill_image_1d_array.cl | 15 | ||||
-rw-r--r-- | src/kernels/cl_internal_fill_image_2d.cl | 15 | ||||
-rw-r--r-- | src/kernels/cl_internal_fill_image_2d_array.cl | 16 | ||||
-rw-r--r-- | src/kernels/cl_internal_fill_image_3d.cl | 16 |
15 files changed, 261 insertions, 26 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8651af6c..5c89e55b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -47,7 +47,9 @@ cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign -cl_internal_fill_buf_align128) +cl_internal_fill_buf_align128 cl_internal_fill_image_1d +cl_internal_fill_image_1d_array cl_internal_fill_image_2d +cl_internal_fill_image_2d_array cl_internal_fill_image_3d) set (BUILT_IN_NAME cl_internal_built_in_kernel) MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") diff --git a/src/cl_api.c b/src/cl_api.c index 32f91d72..90422432 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1812,6 +1812,79 @@ error: } cl_int +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * porigin, + const size_t * pregion, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) +{ + cl_int err = CL_SUCCESS; + enqueue_data *data, no_wait_data = { 0 }; + + CHECK_QUEUE(command_queue); + CHECK_IMAGE(image, src_image); + FIXUP_IMAGE_REGION(src_image, pregion, region); + FIXUP_IMAGE_ORIGIN(src_image, porigin, origin); + + if (command_queue->ctx != image->ctx) { + err = CL_INVALID_CONTEXT; + goto error; + } + + if (fill_color == NULL) { + err = CL_INVALID_VALUE; + goto error; + } + + if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] + region[1] > src_image->h || origin[2] + region[2] > src_image->depth) { + err = CL_INVALID_VALUE; + goto error; + } + + if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || region[2] != 1)){ + err = CL_INVALID_VALUE; + goto error; + } + + if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){ + err = CL_INVALID_VALUE; + goto error; + } + + err = cl_image_fill(command_queue, fill_color, src_image, origin, region); + if (err) { + goto error; + } + + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx); + + data = &no_wait_data; + data->type = EnqueueFillImage; + data->queue = command_queue; + + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, + event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { + if (event && (*event)->type != CL_COMMAND_USER + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); + } + + err = cl_command_queue_flush(command_queue); + } + + if(b_output_kernel_perf) + time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", "", command_queue); + + return 0; + + error: + return err; +} + +cl_int clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer, const void * pattern, @@ -2637,9 +2710,12 @@ clEnqueueMapImage(cl_command_queue command_queue, goto error; } - *image_row_pitch = image->row_pitch; if (image_slice_pitch) *image_slice_pitch = image->slice_pitch; + if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) + *image_row_pitch = image->slice_pitch; + else + *image_row_pitch = image->row_pitch; if ((map_flags & CL_MAP_READ && mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) || diff --git a/src/cl_context.c b/src/cl_context.c index 8f42a585..152faf32 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -319,7 +319,7 @@ cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kern } cl_kernel -cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, +cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option) { cl_int ret; diff --git a/src/cl_context.h b/src/cl_context.h index cba0a0aa..0e4db734 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -63,6 +63,11 @@ enum _cl_internal_ker_type { CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, //fill buffer with 16 aligne pattern, pattern size=32 CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, //fill buffer with 16 aligne pattern, pattern size=64 CL_ENQUEUE_FILL_BUFFER_ALIGN128, //fill buffer with 128 aligne pattern, pattern size=128 + CL_ENQUEUE_FILL_IMAGE_1D, //fill image 1d + CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, //fill image 1d array + CL_ENQUEUE_FILL_IMAGE_2D, //fill image 2d + CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, //fill image 2d array + CL_ENQUEUE_FILL_IMAGE_3D, //fill image 3d CL_INTERNAL_KERNEL_MAX }; @@ -153,7 +158,7 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx); extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option); /* Get the internal used kernel from binary*/ -extern cl_kernel cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, +extern cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option); #endif /* __CL_CONTEXT_H__ */ diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index bc0ca2c6..52c824d3 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -432,6 +432,7 @@ cl_int cl_enqueue_handle(cl_event event, enqueue_data* data) case EnqueueCopyImageToBuffer: case EnqueueNDRangeKernel: case EnqueueFillBuffer: + case EnqueueFillImage: cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); return CL_SUCCESS; case EnqueueNativeKernel: diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h index 65276027..a9b36017 100644 --- a/src/cl_enqueue.h +++ b/src/cl_enqueue.h @@ -43,6 +43,7 @@ typedef enum { EnqueueMarker, EnqueueBarrier, EnqueueFillBuffer, + EnqueueFillImage, EnqueueMigrateMemObj, EnqueueInvalid } enqueue_type; diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index ba7d66cd..d7855cdc 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -102,7 +102,12 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;" "__cl_fill_region_align8_4;" "__cl_fill_region_align8_8;" "__cl_fill_region_align8_16;" - "__cl_fill_region_align128;") + "__cl_fill_region_align128;" + "__cl_fill_image_1d;" + "__cl_fill_image_1d_array;" + "__cl_fill_image_2d;" + "__cl_fill_image_2d_array;" + "__cl_fill_image_3d;") DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING) #undef DECL_INFO_STRING diff --git a/src/cl_khr_icd.c b/src/cl_khr_icd.c index b23c29d2..6d49db03 100644 --- a/src/cl_khr_icd.c +++ b/src/cl_khr_icd.c @@ -150,7 +150,7 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = { clUnloadPlatformCompiler, clGetKernelArgInfo, clEnqueueFillBuffer, - CL_1_2_NOTYET(clEnqueueFillImage), + clEnqueueFillImage, clEnqueueMigrateMemObjects, clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, diff --git a/src/cl_mem.c b/src/cl_mem.c index 46d9af1a..f860b385 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -1051,7 +1051,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, extern char cl_internal_copy_buf_align16_str[]; extern int cl_internal_copy_buf_align16_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16, cl_internal_copy_buf_align16_str, (size_t)cl_internal_copy_buf_align16_str_size, NULL); cb = cb/16; aligned = 1; @@ -1059,7 +1059,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, extern char cl_internal_copy_buf_align4_str[]; extern int cl_internal_copy_buf_align4_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4, cl_internal_copy_buf_align4_str, (size_t)cl_internal_copy_buf_align4_str_size, NULL); cb = cb/4; aligned = 1; @@ -1106,7 +1106,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, extern char cl_internal_copy_buf_unalign_same_offset_str[]; extern int cl_internal_copy_buf_unalign_same_offset_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET, cl_internal_copy_buf_unalign_same_offset_str, (size_t)cl_internal_copy_buf_unalign_same_offset_str_size, NULL); @@ -1133,7 +1133,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, unsigned int dw_mask = masks[align_diff]; int shift = align_diff * 8; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, cl_internal_copy_buf_unalign_dst_offset_str, (size_t)cl_internal_copy_buf_unalign_dst_offset_str_size, NULL); @@ -1163,7 +1163,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, int shift = align_diff * 8; int src_less = !(src_offset % 4) && !((src_offset + cb) % 4); - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, cl_internal_copy_buf_unalign_src_offset_str, (size_t)cl_internal_copy_buf_unalign_src_offset_str_size, NULL); @@ -1188,6 +1188,72 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, } LOCAL cl_int +cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image* src_image, + const size_t * origin, const size_t * region) +{ + cl_int ret = CL_SUCCESS; + cl_kernel ker = NULL; + size_t global_off[] = {0,0,0}; + size_t global_sz[] = {1,1,1}; + size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; + + if(region[1] == 1) local_sz[1] = 1; + if(region[2] == 1) local_sz[2] = 1; + global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; + global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; + global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; + + if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) { + extern char cl_internal_fill_image_1d_str[]; + extern int cl_internal_fill_image_1d_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D, + cl_internal_fill_image_1d_str, (size_t)cl_internal_fill_image_1d_str_size, NULL); + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + extern char cl_internal_fill_image_1d_array_str[]; + extern int cl_internal_fill_image_1d_array_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, + cl_internal_fill_image_1d_array_str, (size_t)cl_internal_fill_image_1d_array_str_size, NULL); + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { + extern char cl_internal_fill_image_2d_str[]; + extern int cl_internal_fill_image_2d_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D, + cl_internal_fill_image_2d_str, (size_t)cl_internal_fill_image_2d_str_size, NULL); + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + extern char cl_internal_fill_image_2d_array_str[]; + extern int cl_internal_fill_image_2d_array_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, + cl_internal_fill_image_2d_array_str, (size_t)cl_internal_fill_image_2d_array_str_size, NULL); + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + extern char cl_internal_fill_image_3d_str[]; + extern int cl_internal_fill_image_3d_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_3D, + cl_internal_fill_image_3d_str, (size_t)cl_internal_fill_image_3d_str_size, NULL); + }else{ + return CL_IMAGE_FORMAT_NOT_SUPPORTED; + } + + if (!ker) + return CL_OUT_OF_RESOURCES; + + cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image); + cl_kernel_set_arg(ker, 1, sizeof(float)*4, pattern); + cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion[0]); + cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); + cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); + cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin[0]); + cl_kernel_set_arg(ker, 6, sizeof(cl_int), &origin[1]); + cl_kernel_set_arg(ker, 7, sizeof(cl_int), &origin[2]); + + ret = cl_command_queue_ND_range(queue, ker, 3, global_off, global_sz, local_sz); + return ret; +} + +LOCAL cl_int cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, cl_mem buffer, size_t offset, size_t size) { @@ -1212,7 +1278,7 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, extern char cl_internal_fill_buf_align128_str[]; extern int cl_internal_fill_buf_align128_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN128, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN128, cl_internal_fill_buf_align128_str, (size_t)cl_internal_fill_buf_align128_str_size, NULL); is_128 = 1; pattern_size = pattern_size / 2; @@ -1223,13 +1289,13 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, extern int cl_internal_fill_buf_align8_str_size; int order = ffs(pattern_size / 8) - 1; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 + order, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 + order, cl_internal_fill_buf_align8_str, (size_t)cl_internal_fill_buf_align8_str_size, NULL); } else if (pattern_size == 4) { extern char cl_internal_fill_buf_align4_str[]; extern int cl_internal_fill_buf_align4_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4, cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL); } else if (size >= 4 && size % 4 == 0 && offset % 4 == 0) { /* The unaligned case. But if copy size and offset are aligned to 4, we can fake @@ -1246,7 +1312,7 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, = pattern_comb[3] = *(char *)pattern; } - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4, cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL); pattern_size = 4; pattern = pattern_comb; @@ -1256,12 +1322,12 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, else if (pattern_size == 2) { extern char cl_internal_fill_buf_align2_str[]; extern int cl_internal_fill_buf_align2_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN2, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN2, cl_internal_fill_buf_align2_str, (size_t)cl_internal_fill_buf_align2_str_size, NULL); } else if (pattern_size == 1) { extern char cl_internal_fill_buf_unalign_str[]; extern int cl_internal_fill_buf_unalign_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_UNALIGN, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_UNALIGN, cl_internal_fill_buf_unalign_str, (size_t)cl_internal_fill_buf_unalign_str_size, NULL); } else assert(0); @@ -1314,7 +1380,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, extern char cl_internal_copy_buf_rect_str[]; extern int cl_internal_copy_buf_rect_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT, cl_internal_copy_buf_rect_str, (size_t)cl_internal_copy_buf_rect_str_size, NULL); if (!ker) @@ -1386,13 +1452,13 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image extern char cl_internal_copy_image_2d_to_2d_str[]; extern int cl_internal_copy_image_2d_to_2d_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL); }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_2d_to_3d_str[]; extern int cl_internal_copy_image_2d_to_3d_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL); } }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { @@ -1400,13 +1466,13 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image extern char cl_internal_copy_image_3d_to_2d_str[]; extern int cl_internal_copy_image_3d_to_2d_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL); }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_3d_to_3d_str[]; extern int cl_internal_copy_image_3d_to_3d_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL); } } @@ -1475,13 +1541,13 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, extern char cl_internal_copy_image_2d_to_buffer_str[]; extern int cl_internal_copy_image_2d_to_buffer_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, cl_internal_copy_image_2d_to_buffer_str, (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL); }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_3d_to_buffer_str[]; extern int cl_internal_copy_image_3d_to_buffer_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, cl_internal_copy_image_3d_to_buffer_str, (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL); } @@ -1549,13 +1615,13 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me extern char cl_internal_copy_buffer_to_image_2d_str[]; extern int cl_internal_copy_buffer_to_image_2d_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, cl_internal_copy_buffer_to_image_2d_str, (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL); }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_buffer_to_image_3d_str[]; extern int cl_internal_copy_buffer_to_image_3d_str_size; - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, cl_internal_copy_buffer_to_image_3d_str, (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL); } if (!ker) diff --git a/src/cl_mem.h b/src/cl_mem.h index d5890930..8ed8e2d5 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -205,6 +205,9 @@ extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf extern cl_int cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, cl_mem buffer, size_t offset, size_t size); +extern cl_int cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image*, + const size_t *, const size_t *); + /* api clEnqueueCopyBufferRect help function */ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, const size_t *, diff --git a/src/kernels/cl_internal_fill_image_1d.cl b/src/kernels/cl_internal_fill_image_1d.cl new file mode 100644 index 00000000..b3b0cbf3 --- /dev/null +++ b/src/kernels/cl_internal_fill_image_1d.cl @@ -0,0 +1,14 @@ +kernel void __cl_fill_image_1d( __write_only image1d_t image, float4 pattern, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int origin0, unsigned int origin1, unsigned int origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + coord = origin0 + i; + write_imagef(image, coord, pattern); + +} diff --git a/src/kernels/cl_internal_fill_image_1d_array.cl b/src/kernels/cl_internal_fill_image_1d_array.cl new file mode 100644 index 00000000..f1eb2412 --- /dev/null +++ b/src/kernels/cl_internal_fill_image_1d_array.cl @@ -0,0 +1,15 @@ +kernel void __cl_fill_image_1d_array( __write_only image1d_array_t image, float4 pattern, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int origin0, unsigned int origin1, unsigned int origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int2 coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + coord.x = origin0 + i; + coord.y = origin2 + k; + write_imagef(image, coord, pattern); + +} diff --git a/src/kernels/cl_internal_fill_image_2d.cl b/src/kernels/cl_internal_fill_image_2d.cl new file mode 100644 index 00000000..0e29f3e1 --- /dev/null +++ b/src/kernels/cl_internal_fill_image_2d.cl @@ -0,0 +1,15 @@ +kernel void __cl_fill_image_2d( __write_only image2d_t image, float4 pattern, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int origin0, unsigned int origin1, unsigned int origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int2 coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + coord.x = origin0 + i; + coord.y = origin1 + j; + write_imagef(image, coord, pattern); + +} diff --git a/src/kernels/cl_internal_fill_image_2d_array.cl b/src/kernels/cl_internal_fill_image_2d_array.cl new file mode 100644 index 00000000..f29c9e76 --- /dev/null +++ b/src/kernels/cl_internal_fill_image_2d_array.cl @@ -0,0 +1,16 @@ +kernel void __cl_fill_image_2d_array( __write_only image2d_array_t image, float4 pattern, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int origin0, unsigned int origin1, unsigned int origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + coord.x = origin0 + i; + coord.y = origin1 + j; + coord.z = origin2 + k; + write_imagef(image, coord, pattern); + +} diff --git a/src/kernels/cl_internal_fill_image_3d.cl b/src/kernels/cl_internal_fill_image_3d.cl new file mode 100644 index 00000000..042b8ab2 --- /dev/null +++ b/src/kernels/cl_internal_fill_image_3d.cl @@ -0,0 +1,16 @@ +kernel void __cl_fill_image_3d( __write_only image3d_t image, float4 pattern, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int origin0, unsigned int origin1, unsigned int origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + coord.x = origin0 + i; + coord.y = origin1 + j; + coord.z = origin2 + k; + write_imagef(image, coord, pattern); + +} |