/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_command_queue.h" #include "cl_enqueue.h" #include "cl_event.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_mem.h" #include "cl_image.h" #include "cl_sampler.h" #include "cl_alloc.h" #include "cl_utils.h" #include "CL/cl.h" #include "CL/cl_ext.h" #include "CL/cl_intel.h" #include #include #include #include #include "performance.h" #ifndef CL_VERSION_1_2 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) #define CL_DEVICE_TYPE_CUSTOM (1 << 4) #define CL_MEM_HOST_WRITE_ONLY (1 << 7) #define CL_MEM_HOST_READ_ONLY (1 << 8) #define CL_MEM_HOST_NO_ACCESS (1 << 9) typedef intptr_t cl_device_partition_property; #endif #define FILL_GETINFO_RET(TYPE, ELT, VAL, RET) \ do { \ if (param_value && param_value_size < sizeof(TYPE)*ELT) \ return CL_INVALID_VALUE; \ if (param_value) { \ memcpy(param_value, (VAL), sizeof(TYPE)*ELT); \ } \ \ if (param_value_size_ret) \ *param_value_size_ret = sizeof(TYPE)*ELT; \ return RET; \ } while(0) inline cl_int handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list, cl_event* event, enqueue_data* data, cl_command_type type) { cl_int status = cl_event_wait_events(num, wait_list, queue); cl_event e = NULL; if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) { e = cl_event_new(queue->ctx, queue, type, event!=NULL); /* if need profiling, add the submit timestamp here. */ if (e->type != CL_COMMAND_USER && e->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(e, CL_PROFILING_COMMAND_QUEUED); } if(event != NULL) *event = e; if(status == CL_ENQUEUE_EXECUTE_DEFER) { cl_event_new_enqueue_callback(e, data, num, wait_list); } } queue->current_event = e; return status; } /* The following code checking overlap is from Appendix of openCL spec 1.1 */ cl_bool check_copy_overlap(const size_t src_offset[3], const size_t dst_offset[3], const size_t region[3], size_t row_pitch, size_t slice_pitch) { const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]}; const size_t src_max[] = {src_offset[0] + region[0], src_offset[1] + region[1], src_offset[2] + region[2]}; const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]}; const size_t dst_max[] = {dst_offset[0] + region[0], dst_offset[1] + region[1], dst_offset[2] + region[2]}; // Check for overlap cl_bool overlap = CL_TRUE; unsigned i; size_t dst_start = dst_offset[2] * slice_pitch + dst_offset[1] * row_pitch + dst_offset[0]; size_t dst_end = dst_start + (region[2] * slice_pitch + region[1] * row_pitch + region[0]); size_t src_start = src_offset[2] * slice_pitch + src_offset[1] * row_pitch + src_offset[0]; size_t src_end = src_start + (region[2] * slice_pitch + region[1] * row_pitch + region[0]); for (i=0; i != 3; ++i) { overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]); } if (!overlap) { size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ? src_offset[0] + region[0] - row_pitch : 0; size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ? dst_offset[0] + region[0] - row_pitch : 0; if ( (delta_src_x > 0 && delta_src_x > dst_offset[0]) || (delta_dst_x > 0 && delta_dst_x > src_offset[0]) ) { if ( (src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end) ) overlap = CL_TRUE; } if (region[2] > 1) { size_t src_height = slice_pitch / row_pitch; size_t dst_height = slice_pitch / row_pitch; size_t delta_src_y = (src_offset[1] + region[1] > src_height) ? src_offset[1] + region[1] - src_height : 0; size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ? dst_offset[1] + region[1] - dst_height : 0; if ( (delta_src_y > 0 && delta_src_y > dst_offset[1]) || (delta_dst_y > 0 && delta_dst_y > src_offset[1]) ) { if ( (src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end) ) overlap = CL_TRUE; } } } return overlap; } static cl_int cl_check_device_type(cl_device_type device_type) { const cl_device_type valid = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT | CL_DEVICE_TYPE_CUSTOM; if( (device_type & valid) == 0) { return CL_INVALID_DEVICE_TYPE; } if(UNLIKELY(!(device_type & CL_DEVICE_TYPE_DEFAULT) && !(device_type & CL_DEVICE_TYPE_GPU))) return CL_DEVICE_NOT_FOUND; return CL_SUCCESS; } static cl_int cl_device_id_is_ok(const cl_device_id device) { if(UNLIKELY(device == NULL)) return CL_FALSE; return device != cl_get_gt_device() ? CL_FALSE : CL_TRUE; } cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { if(UNLIKELY(platforms == NULL && num_platforms == NULL)) return CL_INVALID_VALUE; if(UNLIKELY(num_entries == 0 && platforms != NULL)) return CL_INVALID_VALUE; return cl_get_platform_ids(num_entries, platforms, num_platforms); } cl_int clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { /* Only one platform. This is easy */ if (UNLIKELY(platform != NULL && platform != intel_platform)) return CL_INVALID_PLATFORM; return cl_get_platform_info(platform, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) { cl_int err = CL_SUCCESS; /* Check parameter consistency */ if (UNLIKELY(devices == NULL && num_devices == NULL)) return CL_INVALID_VALUE; if (UNLIKELY(platform && platform != intel_platform)) return CL_INVALID_PLATFORM; if (UNLIKELY(devices && num_entries == 0)) return CL_INVALID_VALUE; err = cl_check_device_type(device_type); if(err != CL_SUCCESS) return err; return cl_get_device_ids(platform, device_type, num_entries, devices, num_devices); } cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_device_info(device, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clCreateSubDevices(cl_device_id in_device, const cl_device_partition_property * properties, cl_uint num_devices, cl_device_id * out_devices, cl_uint * num_devices_ret) { /* Check parameter consistency */ if (UNLIKELY(out_devices == NULL && num_devices_ret == NULL)) return CL_INVALID_VALUE; if (UNLIKELY(in_device == NULL && properties == NULL)) return CL_INVALID_VALUE; *num_devices_ret = 0; return CL_INVALID_DEVICE_PARTITION_COUNT; } cl_int clRetainDevice(cl_device_id device) { // XXX stub for C++ Bindings return CL_SUCCESS; } cl_int clReleaseDevice(cl_device_id device) { // XXX stub for C++ Bindings return CL_SUCCESS; } cl_context clCreateContext(const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (* pfn_notify) (const char*, const void*, size_t, void*), void * user_data, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_context context = NULL; /* Assert parameters correctness */ INVALID_VALUE_IF (devices == NULL); INVALID_VALUE_IF (num_devices == 0); INVALID_VALUE_IF (pfn_notify == NULL && user_data != NULL); /* Now check if the user is asking for the right device */ INVALID_DEVICE_IF (cl_device_id_is_ok(*devices) == CL_FALSE); context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err); initialize_env_var(); error: if (errcode_ret) *errcode_ret = err; return context; } cl_context clCreateContextFromType(const cl_context_properties * properties, cl_device_type device_type, void (CL_CALLBACK *pfn_notify) (const char *, const void *, size_t, void *), void * user_data, cl_int * errcode_ret) { cl_context context = NULL; cl_int err = CL_SUCCESS; cl_device_id devices[1]; cl_uint num_devices = 1; INVALID_VALUE_IF (pfn_notify == NULL && user_data != NULL); err = cl_check_device_type(device_type); if(err != CL_SUCCESS) { goto error; } err = cl_get_device_ids(NULL, device_type, 1, &devices[0], &num_devices); if (err != CL_SUCCESS) { goto error; } context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err); error: if (errcode_ret) *errcode_ret = err; return context; } cl_int clRetainContext(cl_context context) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_context_add_ref(context); error: return err; } cl_int clReleaseContext(cl_context context) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_context_delete(context); error: return err; } cl_int clGetContextInfo(cl_context context, cl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (param_name == CL_CONTEXT_DEVICES) { FILL_GETINFO_RET (cl_device_id, 1, &context->device, CL_SUCCESS); } else if (param_name == CL_CONTEXT_NUM_DEVICES) { cl_uint n = 1; FILL_GETINFO_RET (cl_uint, 1, &n, CL_SUCCESS); } else if (param_name == CL_CONTEXT_REFERENCE_COUNT) { cl_uint ref = context->ref_n; FILL_GETINFO_RET (cl_uint, 1, &ref, CL_SUCCESS); } else if (param_name == CL_CONTEXT_PROPERTIES) { if(context->prop_len > 0) { FILL_GETINFO_RET (cl_context_properties, context->prop_len, context->prop_user, CL_SUCCESS); } else { cl_context_properties n = 0; FILL_GETINFO_RET (cl_context_properties, 1, &n, CL_SUCCESS); } } else { return CL_INVALID_VALUE; } error: return err; } cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret) { cl_command_queue queue = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); INVALID_DEVICE_IF (device != context->device); INVALID_VALUE_IF (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE)); if(properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {/*not supported now.*/ err = CL_INVALID_QUEUE_PROPERTIES; goto error; } queue = cl_context_create_queue(context, device, properties, &err); error: if (errcode_ret) *errcode_ret = err; return queue; } cl_int clRetainCommandQueue(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); cl_command_queue_add_ref(command_queue); error: return err; } cl_int clReleaseCommandQueue(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); cl_command_queue_delete(command_queue); error: return err; } cl_int clGetCommandQueueInfo(cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); if (param_name == CL_QUEUE_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &command_queue->ctx, CL_SUCCESS); } else if (param_name == CL_QUEUE_DEVICE) { FILL_GETINFO_RET (cl_device_id, 1, &command_queue->ctx->device, CL_SUCCESS); } else if (param_name == CL_QUEUE_REFERENCE_COUNT) { cl_uint ref = command_queue->ref_n; FILL_GETINFO_RET (cl_uint, 1, &ref, CL_SUCCESS); } else if (param_name == CL_QUEUE_PROPERTIES) { FILL_GETINFO_RET (cl_command_queue_properties, 1, &command_queue->props, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); mem = cl_mem_new_buffer(context, flags, size, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateSubBuffer(cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type buffer_create_type, const void * buffer_create_info, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_MEM(buffer); mem = cl_mem_new_sub_buffer(buffer, flags, buffer_create_type, buffer_create_info, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (image_format == NULL) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } if (image_format->image_channel_order < CL_R || image_format->image_channel_order > CL_RGBx) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } if (image_format->image_channel_data_type < CL_SNORM_INT8 || image_format->image_channel_data_type > CL_FLOAT) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } if (image_desc == NULL) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } if (image_desc->image_type <= CL_MEM_OBJECT_BUFFER || image_desc->image_type > CL_MEM_OBJECT_IMAGE1D_BUFFER) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } /* buffer refers to a valid buffer memory object if image_type is CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */ if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER && image_desc->buffer) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } if (image_desc->num_mip_levels || image_desc->num_samples) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } /* Other details check for image_desc will leave to image create. */ mem = cl_mem_new_image(context, flags, image_format, image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_row_pitch, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_image_desc image_desc; memset(&image_desc, 0, sizeof(image_desc)); image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image_desc.image_width = image_width; image_desc.image_height = image_height; image_desc.image_row_pitch = image_row_pitch; mem = cl_mem_new_image(context, flags, image_format, &image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_image_desc image_desc; image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image_desc.image_width = image_width; image_desc.image_height = image_height; image_desc.image_depth = image_depth; image_desc.image_row_pitch = image_row_pitch; image_desc.image_slice_pitch = image_slice_pitch; mem = cl_mem_new_image(context, flags, image_format, &image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_int clRetainMemObject(cl_mem memobj) { cl_int err = CL_SUCCESS; CHECK_MEM (memobj); cl_mem_add_ref(memobj); error: return err; } cl_int clReleaseMemObject(cl_mem memobj) { cl_int err = CL_SUCCESS; CHECK_MEM (memobj); cl_mem_delete(memobj); error: return err; } cl_int clGetSupportedImageFormats(cl_context ctx, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format * image_formats, cl_uint * num_image_formats) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (ctx); if (UNLIKELY(num_entries == 0 && image_formats != NULL)) { err = CL_INVALID_VALUE; goto error; } if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D && image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY && image_type != CL_MEM_OBJECT_IMAGE2D && image_type != CL_MEM_OBJECT_IMAGE3D)) { err = CL_INVALID_VALUE; goto error; } err = cl_image_get_supported_fmt(ctx, image_type, num_entries, image_formats, num_image_formats); error: return err; } cl_int clGetMemObjectInfo(cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_MEM(memobj); err = cl_get_mem_object_info(memobj, param_name, param_value_size, param_value, param_value_size_ret); error: return err; } cl_int clGetImageInfo(cl_mem mem, cl_image_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_image_info(mem, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clSetMemObjectDestructorCallback(cl_mem memobj, void (CL_CALLBACK *pfn_notify) (cl_mem, void*), void * user_data) { cl_int err = CL_SUCCESS; CHECK_MEM(memobj); INVALID_VALUE_IF (pfn_notify == 0); cl_mem_dstr_cb *cb = (cl_mem_dstr_cb*)malloc(sizeof(cl_mem_dstr_cb)); if (!cb) { err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(cb, 0, sizeof(cl_mem_dstr_cb)); cb->pfn_notify = pfn_notify; cb->user_data = user_data; cb->next = memobj->dstr_cb; memobj->dstr_cb = cb; error: return err; } cl_sampler clCreateSampler(cl_context context, cl_bool normalized, cl_addressing_mode addressing, cl_filter_mode filter, cl_int * errcode_ret) { cl_sampler sampler = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); sampler = cl_sampler_new(context, normalized, addressing, filter, &err); error: if (errcode_ret) *errcode_ret = err; return sampler; } cl_int clRetainSampler(cl_sampler sampler) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); cl_sampler_add_ref(sampler); error: return err; } cl_int clReleaseSampler(cl_sampler sampler) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); cl_sampler_delete(sampler); error: return err; } cl_int clGetSamplerInfo(cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); if (param_name == CL_SAMPLER_REFERENCE_COUNT) { FILL_GETINFO_RET (cl_uint, 1, (cl_uint*)&sampler->ref_n, CL_SUCCESS); } else if (param_name == CL_SAMPLER_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &sampler->ctx, CL_SUCCESS); } else if (param_name == CL_SAMPLER_NORMALIZED_COORDS) { FILL_GETINFO_RET (cl_bool, 1, &sampler->normalized_coords, CL_SUCCESS); } else if (param_name == CL_SAMPLER_ADDRESSING_MODE) { FILL_GETINFO_RET (cl_addressing_mode, 1, &sampler->address, CL_SUCCESS); } else if (param_name == CL_SAMPLER_FILTER_MODE ) { FILL_GETINFO_RET (cl_filter_mode, 1, &sampler->filter, CL_SUCCESS); } else{ return CL_INVALID_VALUE; } error: return err; } cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const char ** strings, const size_t * lengths, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; cl_uint i; CHECK_CONTEXT (context); INVALID_VALUE_IF (count == 0); INVALID_VALUE_IF (strings == NULL); for(i = 0; i < count; i++) { if(UNLIKELY(strings[i] == NULL)) { err = CL_INVALID_VALUE; goto error; } } program = cl_program_create_from_source(context, count, strings, lengths, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id * devices, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); program = cl_program_create_from_binary(context, num_devices, devices, lengths, binaries, binary_status, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_program clCreateProgramWithBuiltInKernels(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * kernel_names, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); INVALID_VALUE_IF (kernel_names == NULL); program = cl_program_create_with_built_in_kernles(context, num_devices, device_list, kernel_names, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_int clRetainProgram(cl_program program) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); cl_program_add_ref(program); error: return err; } cl_int clReleaseProgram(cl_program program) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); cl_program_delete(program); error: return err; } cl_int clBuildProgram(cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, void (CL_CALLBACK *pfn_notify) (cl_program, void*), void * user_data) { cl_int err = CL_SUCCESS; CHECK_PROGRAM(program); INVALID_VALUE_IF (num_devices > 1); INVALID_VALUE_IF (num_devices == 0 && device_list != NULL); INVALID_VALUE_IF (num_devices != 0 && device_list == NULL); INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL); /* Everything is easy. We only support one device anyway */ if (num_devices != 0) { assert(program->ctx); INVALID_DEVICE_IF (device_list[0] != program->ctx->device); } /* TODO support create program from binary */ assert(program->source_type == FROM_LLVM || program->source_type == FROM_SOURCE || program->source_type == FROM_BINARY); if((err = cl_program_build(program, options)) != CL_SUCCESS) { goto error; } program->is_built = CL_TRUE; if (pfn_notify) pfn_notify(program, user_data); error: return err; } cl_int clCompileProgram(cl_program program , cl_uint num_devices , const cl_device_id * device_list , const char * options , cl_uint num_input_headers , const cl_program * input_headers , const char ** header_include_names , void (CL_CALLBACK * pfn_notify )(cl_program, void *), void * user_data ) { cl_int err = CL_SUCCESS; CHECK_PROGRAM(program); INVALID_VALUE_IF (num_devices > 1); INVALID_VALUE_IF (num_devices == 0 && device_list != NULL); INVALID_VALUE_IF (num_devices != 0 && device_list == NULL); INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL); INVALID_VALUE_IF (num_input_headers == 0 && input_headers != NULL); INVALID_VALUE_IF (num_input_headers != 0 && input_headers == NULL); /* Everything is easy. We only support one device anyway */ if (num_devices != 0) { assert(program->ctx); INVALID_DEVICE_IF (device_list[0] != program->ctx->device); } /* TODO support create program from binary */ assert(program->source_type == FROM_LLVM || program->source_type == FROM_SOURCE || program->source_type == FROM_BINARY); if((err = cl_program_compile(program, num_input_headers, input_headers, header_include_names, options)) != CL_SUCCESS) { goto error; } program->is_built = CL_TRUE; if (pfn_notify) pfn_notify(program, user_data); error: return err; } cl_program clLinkProgram(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * options, cl_uint num_input_programs, const cl_program * input_programs, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), void * user_data, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_program program = NULL; CHECK_CONTEXT (context); INVALID_VALUE_IF (num_devices > 1); INVALID_VALUE_IF (num_devices == 0 && device_list != NULL); INVALID_VALUE_IF (num_devices != 0 && device_list == NULL); INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL); INVALID_VALUE_IF (num_input_programs == 0 && input_programs != NULL); INVALID_VALUE_IF (num_input_programs != 0 && input_programs == NULL); program = cl_program_link(context, num_input_programs, input_programs, options, &err); program->is_built = CL_TRUE; if (pfn_notify) pfn_notify(program, user_data); error: if (errcode_ret) *errcode_ret = err; return program; } cl_int clUnloadCompiler(void) { return CL_SUCCESS; } cl_int clUnloadPlatformCompiler(cl_platform_id platform) { return CL_SUCCESS; } cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; char * ret_str = ""; CHECK_PROGRAM (program); if (param_name == CL_PROGRAM_REFERENCE_COUNT) { cl_uint ref = program->ref_n; FILL_GETINFO_RET (cl_uint, 1, (&ref), CL_SUCCESS); } else if (param_name == CL_PROGRAM_CONTEXT) { cl_context context = program->ctx; FILL_GETINFO_RET (cl_context, 1, &context, CL_SUCCESS); } else if (param_name == CL_PROGRAM_NUM_DEVICES) { cl_uint num_dev = 1; // Just 1 dev now. FILL_GETINFO_RET (cl_uint, 1, &num_dev, CL_SUCCESS); } else if (param_name == CL_PROGRAM_DEVICES) { cl_device_id dev_id = program->ctx->device; FILL_GETINFO_RET (cl_device_id, 1, &dev_id, CL_SUCCESS); } else if (param_name == CL_PROGRAM_NUM_KERNELS) { cl_uint kernels_num = program->ker_n; FILL_GETINFO_RET (cl_uint, 1, &kernels_num, CL_SUCCESS); } else if (param_name == CL_PROGRAM_SOURCE) { if (!program->source) FILL_GETINFO_RET (char, 1, &ret_str, CL_SUCCESS); FILL_GETINFO_RET (char, (strlen(program->source) + 1), program->source, CL_SUCCESS); } else if(param_name == CL_PROGRAM_KERNEL_NAMES) { cl_program_get_kernel_names(program, param_value_size, (char *)param_value, param_value_size_ret); } else if (param_name == CL_PROGRAM_BINARY_SIZES) { if (program->binary == NULL){ if( program->binary_type == CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 0); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 1); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 2); }else{ return CL_INVALID_BINARY; } } if (program->binary == NULL || program->binary_sz == 0) { return CL_OUT_OF_RESOURCES; } FILL_GETINFO_RET (size_t, 1, (&program->binary_sz), CL_SUCCESS); } else if (param_name == CL_PROGRAM_BINARIES) { if (param_value_size_ret) *param_value_size_ret = sizeof(void*); if (!param_value) return CL_SUCCESS; /* param_value points to an array of n pointers allocated by the caller */ if (program->binary == NULL) { if( program->binary_type == CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 0); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 1); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 2); }else{ return CL_INVALID_BINARY; } } if (program->binary == NULL || program->binary_sz == 0) { return CL_OUT_OF_RESOURCES; } memcpy(*((void **)param_value), program->binary, program->binary_sz); return CL_SUCCESS; } else { return CL_INVALID_VALUE; } error: return err; } cl_int clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; char * ret_str = ""; CHECK_PROGRAM (program); INVALID_DEVICE_IF (device != program->ctx->device); if (param_name == CL_PROGRAM_BUILD_STATUS) { FILL_GETINFO_RET (cl_build_status, 1, &program->build_status, CL_SUCCESS); } else if (param_name == CL_PROGRAM_BUILD_OPTIONS) { if (program->is_built && program->build_opts) ret_str = program->build_opts; FILL_GETINFO_RET (char, (strlen(ret_str)+1), ret_str, CL_SUCCESS); } else if (param_name == CL_PROGRAM_BUILD_LOG) { FILL_GETINFO_RET (char, program->build_log_sz + 1, program->build_log, CL_SUCCESS); if (param_value_size_ret) *param_value_size_ret = program->build_log_sz + 1; }else if (param_name == CL_PROGRAM_BINARY_TYPE){ FILL_GETINFO_RET (cl_uint, 1, &program->binary_type, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_kernel clCreateKernel(cl_program program, const char * kernel_name, cl_int * errcode_ret) { cl_kernel kernel = NULL; cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); if (program->ker_n <= 0) { err = CL_INVALID_PROGRAM_EXECUTABLE; goto error; } INVALID_VALUE_IF (kernel_name == NULL); kernel = cl_program_create_kernel(program, kernel_name, &err); error: if (errcode_ret) *errcode_ret = err; return kernel; } cl_int clCreateKernelsInProgram(cl_program program, cl_uint num_kernels, cl_kernel * kernels, cl_uint * num_kernels_ret) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); if (program->ker_n <= 0) { err = CL_INVALID_PROGRAM_EXECUTABLE; goto error; } if (kernels && num_kernels < program->ker_n) { err = CL_INVALID_VALUE; goto error; } if(num_kernels_ret) *num_kernels_ret = program->ker_n; if(kernels) err = cl_program_create_kernels_in_program(program, kernels); error: return err; } cl_int clRetainKernel(cl_kernel kernel) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); cl_kernel_add_ref(kernel); error: return err; } cl_int clReleaseKernel(cl_kernel kernel) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); cl_kernel_delete(kernel); error: return err; } cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void * arg_value) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value); error: return err; } cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); if (param_name != CL_KERNEL_ARG_ADDRESS_QUALIFIER && param_name != CL_KERNEL_ARG_ACCESS_QUALIFIER && param_name != CL_KERNEL_ARG_TYPE_NAME && param_name != CL_KERNEL_ARG_TYPE_QUALIFIER && param_name != CL_KERNEL_ARG_NAME) { err = CL_INVALID_VALUE; goto error; } if (arg_index >= kernel->arg_n) { err = CL_INVALID_ARG_INDEX; goto error; } err = cl_get_kernel_arg_info(kernel, arg_index, param_name, param_value_size, param_value, param_value_size_ret); error: return err; } cl_int clGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err; CHECK_KERNEL(kernel); if (param_name == CL_KERNEL_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &kernel->program->ctx, CL_SUCCESS); } else if (param_name == CL_KERNEL_PROGRAM) { FILL_GETINFO_RET (cl_program, 1, &kernel->program, CL_SUCCESS); } else if (param_name == CL_KERNEL_NUM_ARGS) { cl_uint n = kernel->arg_n; FILL_GETINFO_RET (cl_uint, 1, &n, CL_SUCCESS); } else if (param_name == CL_KERNEL_REFERENCE_COUNT) { cl_int ref = kernel->ref_n; FILL_GETINFO_RET (cl_int, 1, &ref, CL_SUCCESS); } else if (param_name == CL_KERNEL_FUNCTION_NAME) { const char * n = cl_kernel_get_name(kernel); FILL_GETINFO_RET (cl_char, strlen(n)+1, n, CL_SUCCESS); } else if (param_name == CL_KERNEL_ATTRIBUTES) { const char * n = cl_kernel_get_attributes(kernel); FILL_GETINFO_RET (cl_char, strlen(n)+1, n, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_kernel_workgroup_info(kernel, device, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clWaitForEvents(cl_uint num_events, const cl_event * event_list) { cl_int err = CL_SUCCESS; cl_context ctx = NULL; if(num_events > 0 && event_list) ctx = event_list[0]->ctx; TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx); while(cl_event_wait_events(num_events, event_list, NULL) == CL_ENQUEUE_EXECUTE_DEFER) { usleep(8000); //sleep 8ms to wait other thread } error: return err; } cl_int clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if (param_name == CL_EVENT_COMMAND_QUEUE) { FILL_GETINFO_RET (cl_command_queue, 1, &event->queue, CL_SUCCESS); } else if (param_name == CL_EVENT_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &event->ctx, CL_SUCCESS); } else if (param_name == CL_EVENT_COMMAND_TYPE) { FILL_GETINFO_RET (cl_command_type, 1, &event->type, CL_SUCCESS); } else if (param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) { cl_event_update_status(event, 0); FILL_GETINFO_RET (cl_int, 1, &event->status, CL_SUCCESS); } else if (param_name == CL_EVENT_REFERENCE_COUNT) { cl_uint ref = event->ref_n; FILL_GETINFO_RET (cl_int, 1, &ref, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_event clCreateUserEvent(cl_context context, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_event event = NULL; CHECK_CONTEXT(context); TRY_ALLOC(event, cl_event_new(context, NULL, CL_COMMAND_USER, CL_TRUE)); error: if(errcode_ret) *errcode_ret = err; return event; } cl_int clRetainEvent(cl_event event) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); cl_event_add_ref(event); error: return err; } cl_int clReleaseEvent(cl_event event) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); cl_event_delete(event); error: return err; } cl_int clSetUserEventStatus(cl_event event, cl_int execution_status) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if(execution_status > CL_COMPLETE) { err = CL_INVALID_VALUE; goto error; } if(event->status != CL_SUBMITTED) { err = CL_INVALID_OPERATION; goto error; } cl_event_set_status(event, execution_status); error: return err; } cl_int clSetEventCallback(cl_event event, cl_int command_exec_callback_type, void (CL_CALLBACK * pfn_notify) (cl_event, cl_int, void *), void * user_data) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if((pfn_notify == NULL) || (command_exec_callback_type > CL_SUBMITTED) || (command_exec_callback_type < CL_COMPLETE)) { err = CL_INVALID_VALUE; goto error; } err = cl_event_set_callback(event, command_exec_callback_type, pfn_notify, user_data); error: return err; } cl_int clGetEventProfilingInfo(cl_event event, cl_profiling_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; cl_ulong ret_val; CHECK_EVENT(event); if (event->type == CL_COMMAND_USER || !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) || event->status != CL_COMPLETE) { err = CL_PROFILING_INFO_NOT_AVAILABLE; goto error; } if (param_value && param_value_size < sizeof(cl_ulong)) { err = CL_INVALID_VALUE; goto error; } if (param_name == CL_PROFILING_COMMAND_QUEUED) { ret_val = event->timestamp[0]; } else if (param_name == CL_PROFILING_COMMAND_SUBMIT) { ret_val = event->timestamp[1]; } else if (param_name == CL_PROFILING_COMMAND_START) { err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_START); ret_val = event->timestamp[2]; } else if (param_name == CL_PROFILING_COMMAND_END) { err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_END); ret_val = event->timestamp[3]; } else { err = CL_INVALID_VALUE; goto error; } if (err == CL_SUCCESS) { if (param_value) *(cl_ulong*)param_value = ret_val; if (param_value_size_ret) *param_value_size_ret = sizeof(cl_ulong); } error: return err; } cl_int clFlush(cl_command_queue command_queue) { /* have nothing to do now, as currently * clEnqueueNDRangeKernel will flush at * the end of each calling. we may need * to optimize it latter.*/ return 0; } cl_int clFinish(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); err = cl_command_queue_finish(command_queue); error: return err; } cl_int clEnqueueReadBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, defer_enqueue_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &defer_enqueue_data; data->type = EnqueueReadBuffer; data->mem_obj = buffer; data->ptr = ptr; data->offset = offset; data->size = size; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueReadBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(buffer_row_pitch == 0) buffer_row_pitch = region[0]; if(buffer_slice_pitch == 0) buffer_slice_pitch = region[1] * buffer_row_pitch; if(host_row_pitch == 0) host_row_pitch = region[0]; if(host_slice_pitch == 0) host_slice_pitch = region[1] * host_row_pitch; if (buffer_row_pitch < region[0] || host_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) || (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch + buffer_origin[0] + region[0] > buffer->size) { err = CL_INVALID_VALUE; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueReadBufferRect; data->mem_obj = buffer; data->ptr = ptr; data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2]; data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = buffer_row_pitch; data->slice_pitch = buffer_slice_pitch; data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueWriteBuffer; data->mem_obj = buffer; data->const_ptr = ptr; data->offset = offset; data->size = size; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(buffer_row_pitch == 0) buffer_row_pitch = region[0]; if(buffer_slice_pitch == 0) buffer_slice_pitch = region[1] * buffer_row_pitch; if(host_row_pitch == 0) host_row_pitch = region[0]; if(host_slice_pitch == 0) host_slice_pitch = region[1] * host_row_pitch; if (buffer_row_pitch < region[0] || host_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) || (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch + buffer_origin[0] + region[0] > buffer->size) { err = CL_INVALID_VALUE; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueWriteBufferRect; data->mem_obj = buffer; data->const_ptr = ptr; data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2]; data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = buffer_row_pitch; data->slice_pitch = buffer_slice_pitch; data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueFillImage(cl_command_queue command_queue, cl_mem image, const void * fill_color, const size_t * porigin, const size_t * pregion, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(image, src_image); FIXUP_IMAGE_REGION(src_image, pregion, region); FIXUP_IMAGE_ORIGIN(src_image, porigin, origin); if (command_queue->ctx != image->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (fill_color == NULL) { err = CL_INVALID_VALUE; goto error; } if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] + region[1] > src_image->h || origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || region[2] != 1)){ err = CL_INVALID_VALUE; goto error; } if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){ err = CL_INVALID_VALUE; goto error; } err = cl_image_fill(command_queue, fill_color, src_image, origin, region); if (err) { goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx); data = &no_wait_data; data->type = EnqueueFillImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", "", command_queue); return 0; error: return err; } cl_int clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer, const void * pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; static size_t valid_sz[] = {1, 2, 4, 8, 16, 32, 64, 128}; int i = 0; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (pattern == NULL) { err = CL_INVALID_VALUE; goto error; } for (i = 0; i < sizeof(valid_sz) / sizeof(size_t); i++) { if (valid_sz[i] == pattern_size) break; } if (i == sizeof(valid_sz) / sizeof(size_t)) { err = CL_INVALID_VALUE; goto error; } if (offset % pattern_size || size % pattern_size) { err = CL_INVALID_VALUE; goto error; } err = cl_mem_fill(command_queue, pattern, pattern_size, buffer, offset, size); if (err) { goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueFillBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_fill_buffer", "", command_queue); return 0; error: return err; } cl_int clEnqueueCopyBuffer(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_MEM(dst_buffer); if (command_queue->ctx != src_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (command_queue->ctx != dst_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_offset + cb > src_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (dst_offset + cb > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } /* Check overlap */ if (src_buffer == dst_buffer && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1) && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) { err = CL_MEM_COPY_OVERLAP; goto error; } /* Check sub overlap */ if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type == CL_MEM_SUBBUFFER_TYPE ) { struct _cl_mem_buffer* src_b = (struct _cl_mem_buffer*)src_buffer; struct _cl_mem_buffer* dst_b = (struct _cl_mem_buffer*)dst_buffer; size_t src_sub_offset = src_b->sub_offset; size_t dst_sub_offset = dst_b->sub_offset; if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset && dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1) && (dst_offset + dst_sub_offset <= src_offset + src_sub_offset && src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb - 1)) { err = CL_MEM_COPY_OVERLAP; goto error; } } err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx); data = &no_wait_data; data->type = EnqueueCopyBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy", "", command_queue); return 0; error: return err; } cl_int clEnqueueCopyBufferRect(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t * src_origin, const size_t * dst_origin, const size_t * region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_MEM(dst_buffer); if ((command_queue->ctx != src_buffer->ctx) || (command_queue->ctx != dst_buffer->ctx)) { err = CL_INVALID_CONTEXT; goto error; } if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(src_row_pitch == 0) src_row_pitch = region[0]; if(src_slice_pitch == 0) src_slice_pitch = region[1] * src_row_pitch; if(dst_row_pitch == 0) dst_row_pitch = region[0]; if(dst_slice_pitch == 0) dst_slice_pitch = region[1] * dst_row_pitch; if (src_row_pitch < region[0] || dst_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((src_slice_pitch < region[1] * src_row_pitch || src_slice_pitch % src_row_pitch != 0 ) || (dst_slice_pitch < region[1] * dst_row_pitch || dst_slice_pitch % dst_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((src_origin[2] + region[2] - 1) * src_slice_pitch + (src_origin[1] + region[1] - 1) * src_row_pitch + src_origin[0] + region[0] > src_buffer->size ||(dst_origin[2] + region[2] - 1) * dst_slice_pitch + (dst_origin[1] + region[1] - 1) * dst_row_pitch + dst_origin[0] + region[0] > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (src_buffer == dst_buffer && (src_row_pitch != dst_row_pitch || src_slice_pitch != dst_slice_pitch)) { err = CL_INVALID_VALUE; goto error; } if (src_buffer == dst_buffer && check_copy_overlap(src_origin, dst_origin, region, src_row_pitch, src_slice_pitch)) { err = CL_MEM_COPY_OVERLAP; goto error; } cl_mem_copy_buffer_rect(command_queue, src_buffer, dst_buffer, src_origin, dst_origin, region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx); data = &no_wait_data; data->type = EnqueueCopyBufferRect; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_rect", "", command_queue); error: return err; } cl_int clEnqueueReadImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_read, const size_t * porigin, const size_t * pregion, size_t row_pitch, size_t slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); FIXUP_IMAGE_REGION(image, pregion, region); FIXUP_IMAGE_ORIGIN(image, porigin, origin); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!row_pitch) row_pitch = image->bpp*region[0]; else if (row_pitch < image->bpp*region[0]) { err = CL_INVALID_VALUE; goto error; } if (image->slice_pitch) { if (!slice_pitch) slice_pitch = row_pitch*region[1]; else if (slice_pitch < row_pitch*region[1]) { err = CL_INVALID_VALUE; goto error; } } else if (slice_pitch) { err = CL_INVALID_VALUE; goto error; } if (!ptr) { err = CL_INVALID_VALUE; goto error; } if (mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueReadImage; data->mem_obj = mem; data->ptr = ptr; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_write, const size_t * porigin, const size_t * pregion, size_t row_pitch, size_t slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); FIXUP_IMAGE_REGION(image, pregion, region); FIXUP_IMAGE_ORIGIN(image, porigin, origin); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!row_pitch) row_pitch = image->bpp*region[0]; else if (row_pitch < image->bpp*region[0]) { err = CL_INVALID_VALUE; goto error; } if (image->slice_pitch) { if (!slice_pitch) slice_pitch = row_pitch*region[1]; else if (slice_pitch < row_pitch*region[1]) { err = CL_INVALID_VALUE; goto error; } } else if (slice_pitch) { err = CL_INVALID_VALUE; goto error; } if (!ptr) { err = CL_INVALID_VALUE; goto error; } if (mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueWriteImage; data->mem_obj = mem; data->const_ptr = ptr; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueCopyImage(cl_command_queue command_queue, cl_mem src_mem, cl_mem dst_mem, const size_t * psrc_origin, const size_t * pdst_origin, const size_t * pregion, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; cl_bool overlap = CL_TRUE; cl_int i = 0; CHECK_QUEUE(command_queue); CHECK_IMAGE(src_mem, src_image); CHECK_IMAGE(dst_mem, dst_image); FIXUP_IMAGE_REGION(src_image, pregion, region); FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin); FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin); if (command_queue->ctx != src_mem->ctx || command_queue->ctx != dst_mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_image->fmt.image_channel_order != dst_image->fmt.image_channel_order || src_image->fmt.image_channel_data_type != dst_image->fmt.image_channel_data_type) { err = CL_IMAGE_FORMAT_MISMATCH; goto error; } if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { err = CL_INVALID_VALUE; goto error; } if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) || (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1))) { err = CL_INVALID_VALUE; goto error; } if (src_image == dst_image) { for(i = 0; i < 3; i++) overlap = overlap && (src_origin[i] < dst_origin[i] + region[i]) && (dst_origin[i] < src_origin[i] + region[i]); if(overlap == CL_TRUE) { err = CL_MEM_COPY_OVERLAP; goto error; } } cl_mem_kernel_copy_image(command_queue, src_image, dst_image, src_origin, dst_origin, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_kernel_copy_image", "", command_queue); error: return err; } cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_mem, cl_mem dst_buffer, const size_t * psrc_origin, const size_t * pregion, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(src_mem, src_image); CHECK_MEM(dst_buffer); FIXUP_IMAGE_REGION(src_image, pregion, region); FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin); if (command_queue->ctx != src_mem->ctx || command_queue->ctx != dst_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (dst_offset + region[0]*region[1]*region[2]*src_image->bpp > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) { err = CL_INVALID_VALUE; goto error; } cl_mem_copy_image_to_buffer(command_queue, src_image, dst_buffer, src_origin, dst_offset, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyImageToBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_image_to_buffer", "", command_queue); error: return err; } cl_int clEnqueueCopyBufferToImage(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_mem, size_t src_offset, const size_t * pdst_origin, const size_t * pregion, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_IMAGE(dst_mem, dst_image); FIXUP_IMAGE_REGION(dst_image, pregion, region); FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin); if (command_queue->ctx != src_buffer->ctx || command_queue->ctx != dst_mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_offset + region[0]*region[1]*region[2]*dst_image->bpp > src_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { err = CL_INVALID_VALUE; goto error; } if (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1)) { err = CL_INVALID_VALUE; goto error; } cl_mem_copy_buffer_to_image(command_queue, src_buffer, dst_image, src_offset, dst_origin, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, dst_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyBufferToImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER_TO_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_to_image", "", command_queue); error: return err; } static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset, size_t size, const size_t *origin, const size_t *region) { cl_int slot = -1; int err = CL_SUCCESS; size_t sub_offset = 0; if(mem->type == CL_MEM_SUBBUFFER_TYPE) { struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; sub_offset = buffer->sub_offset; } ptr = (char*)ptr + offset + sub_offset; if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); //only calc ptr here, will do memcpy in enqueue *mem_ptr = (char *)mem->host_ptr + offset + sub_offset; } else { *mem_ptr = ptr; } /* Record the mapped address. */ if (!mem->mapped_ptr_sz) { mem->mapped_ptr_sz = 16; mem->mapped_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz); if (!mem->mapped_ptr) { cl_mem_unmap_auto(mem); err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); slot = 0; } else { int i = 0; for (; i < mem->mapped_ptr_sz; i++) { if (mem->mapped_ptr[i].ptr == NULL) { slot = i; break; } } if (i == mem->mapped_ptr_sz) { cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2); if (!new_ptr) { cl_mem_unmap_auto(mem); err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); memcpy(new_ptr, mem->mapped_ptr, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); slot = mem->mapped_ptr_sz; mem->mapped_ptr_sz *= 2; free(mem->mapped_ptr); mem->mapped_ptr = new_ptr; } } assert(slot != -1); mem->mapped_ptr[slot].ptr = *mem_ptr; mem->mapped_ptr[slot].v_ptr = ptr; mem->mapped_ptr[slot].size = size; if(origin) { assert(region); mem->mapped_ptr[slot].origin[0] = origin[0]; mem->mapped_ptr[slot].origin[1] = origin[1]; mem->mapped_ptr[slot].origin[2] = origin[2]; mem->mapped_ptr[slot].region[0] = region[0]; mem->mapped_ptr[slot].region[1] = region[1]; mem->mapped_ptr[slot].region[2] = region[2]; } mem->map_ref++; error: if (err != CL_SUCCESS) *mem_ptr = NULL; return err; } void * clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; void *ptr = NULL; void *mem_ptr = NULL; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if ((map_flags & CL_MAP_READ && buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) || (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) && buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueMapBuffer; data->mem_obj = buffer; data->offset = offset; data->size = size; data->ptr = ptr; data->unsync_map = 1; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { data->unsync_map = 0; err = cl_enqueue_handle(event ? *event : NULL, data); if (err != CL_SUCCESS) goto error; ptr = data->ptr; if(event) cl_event_set_status(*event, CL_COMPLETE); } else { if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) { err = CL_MAP_FAILURE; goto error; } } err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL); if (err != CL_SUCCESS) goto error; error: if (errcode_ret) *errcode_ret = err; return mem_ptr; } void * clEnqueueMapImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_map, cl_map_flags map_flags, const size_t * porigin, const size_t * pregion, size_t * image_row_pitch, size_t * image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; void *ptr = NULL; void *mem_ptr = NULL; size_t offset = 0; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); FIXUP_IMAGE_REGION(image, pregion, region); FIXUP_IMAGE_ORIGIN(image, porigin, origin); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!image_row_pitch || (image->slice_pitch && !image_slice_pitch)) { err = CL_INVALID_VALUE; goto error; } if ((map_flags & CL_MAP_READ && mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) || (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) && mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueMapImage; data->mem_obj = mem; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->ptr = ptr; data->unsync_map = 1; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { data->unsync_map = 0; err = cl_enqueue_handle(event ? *event : NULL, data); if (err != CL_SUCCESS) goto error; ptr = data->ptr; if(event) cl_event_set_status(*event, CL_COMPLETE); } else { if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) { err = CL_MAP_FAILURE; goto error; } } if(mem->flags & CL_MEM_USE_HOST_PTR) { if (image_slice_pitch) *image_slice_pitch = image->host_slice_pitch; *image_row_pitch = image->host_row_pitch; offset = image->bpp*origin[0] + image->host_row_pitch*origin[1] + image->host_slice_pitch*origin[2]; } else { if (image_slice_pitch) *image_slice_pitch = image->slice_pitch; if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) *image_row_pitch = image->slice_pitch; else *image_row_pitch = image->row_pitch; offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; } err = _cl_map_mem(mem, ptr, &mem_ptr, offset, 0, origin, region); error: if (errcode_ret) *errcode_ret = err; return mem_ptr; //TODO: map and unmap first } cl_int clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(memobj); if (command_queue->ctx != memobj->ctx) { err = CL_INVALID_CONTEXT; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, memobj->ctx); data = &no_wait_data; data->type = EnqueueUnmapMemObject; data->mem_obj = memobj; data->ptr = mapped_ptr; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_UNMAP_MEM_OBJECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueMigrateMemObjects(cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { /* So far, we just support 1 device and no subdevice. So all the command queues belong to the small context. There is no need to migrate the mem objects by now. */ cl_int err = CL_SUCCESS; cl_uint i = 0; enqueue_data *data, defer_enqueue_data = { 0 }; if (!flags & CL_MIGRATE_MEM_OBJECT_HOST) CHECK_QUEUE(command_queue); if (num_mem_objects == 0 || mem_objects == NULL) { err = CL_INVALID_VALUE; goto error; } if (flags && flags & ~(CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) { err = CL_INVALID_VALUE; goto error; } for (i = 0; i < num_mem_objects; i++) { CHECK_MEM(mem_objects[i]); if (mem_objects[i]->ctx != command_queue->ctx) { err = CL_INVALID_CONTEXT; goto error; } } /* really nothing to do, fill the event. */ TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); data = &defer_enqueue_data; data->type = EnqueueMigrateMemObj; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { size_t fixed_global_off[] = {0,0,0}; size_t fixed_global_sz[] = {1,1,1}; size_t fixed_local_sz[] = {1,1,1}; cl_int err = CL_SUCCESS; cl_uint i; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_KERNEL(kernel); /* Check number of dimensions we have */ if (UNLIKELY(work_dim == 0 || work_dim > 3)) { err = CL_INVALID_WORK_DIMENSION; goto error; } /* We need a work size per dimension */ if (UNLIKELY(global_work_size == NULL)) { err = CL_INVALID_GLOBAL_WORK_SIZE; goto error; } if (global_work_offset != NULL) for (i = 0; i < work_dim; ++i) { if (UNLIKELY(global_work_offset[i] + global_work_size[i] > (size_t)-1)) { err = CL_INVALID_GLOBAL_OFFSET; goto error; } } /* Local sizes must be non-null and divide global sizes */ if (local_work_size != NULL) for (i = 0; i < work_dim; ++i) if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] % local_work_size[i])) { err = CL_INVALID_WORK_GROUP_SIZE; goto error; } /* Queue and kernel must share the same context */ assert(kernel->program); if (command_queue->ctx != kernel->program->ctx) { err = CL_INVALID_CONTEXT; goto error; } /* XXX No event right now */ //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported"); //FATAL_IF(event_wait_list != NULL, "Events are not supported"); //FATAL_IF(event != NULL, "Events are not supported"); if (local_work_size != NULL) { for (i = 0; i < work_dim; ++i) fixed_local_sz[i] = local_work_size[i]; } else { uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256; //MAX_WORK_GROUP_SIZE may too large for (i = 0; i< work_dim; i++) { for (j = maxDimSize; j > 1; j--) { if (global_work_size[i] % j == 0 && j <= maxGroupSize) { fixed_local_sz[i] = j; maxGroupSize = maxGroupSize /j; maxDimSize = maxGroupSize > maxDimSize ? maxDimSize : maxGroupSize; break; //choose next work_dim } } } } if (global_work_size != NULL) for (i = 0; i < work_dim; ++i) fixed_global_sz[i] = global_work_size[i]; if (global_work_offset != NULL) for (i = 0; i < work_dim; ++i) fixed_global_off[i] = global_work_offset[i]; if (kernel->compile_wg_sz[0] || kernel->compile_wg_sz[1] || kernel->compile_wg_sz[2]) { if (fixed_local_sz[0] != kernel->compile_wg_sz[0] || fixed_local_sz[1] != kernel->compile_wg_sz[1] || fixed_local_sz[2] != kernel->compile_wg_sz[2]) { err = CL_INVALID_WORK_GROUP_SIZE; goto error; } } /* Do device specific checks are enqueue the kernel */ err = cl_command_queue_ND_range(command_queue, kernel, work_dim, fixed_global_off, fixed_global_sz, fixed_local_sz); if(err != CL_SUCCESS) goto error; data = &no_wait_data; data->type = EnqueueNDRangeKernel; data->queue = command_queue; printf("run kernel %s \n", cl_kernel_get_name(kernel)); fflush(stdout); if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_NDRANGE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } clFinish(command_queue); printf("end kernel %s \n", cl_kernel_get_name(kernel)); fflush(stdout); if(b_output_kernel_perf) { if(kernel->program->build_opts != NULL) time_end(command_queue->ctx, cl_kernel_get_name(kernel), kernel->program->build_opts, command_queue); else time_end(command_queue->ctx, cl_kernel_get_name(kernel), "", command_queue); } error: return err; } cl_int clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { const size_t global_size[3] = {1, 0, 0}; const size_t local_size[3] = {1, 0, 0}; return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size, num_events_in_wait_list, event_wait_list, event); } cl_int clEnqueueNativeKernel(cl_command_queue command_queue, void (*user_func)(void *), void * args, size_t cb_args, cl_uint num_mem_objects, const cl_mem * mem_list, const void ** args_mem_loc, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; void *new_args = NULL; enqueue_data *data, no_wait_data = { 0 }; cl_int i; if(user_func == NULL || (args == NULL && cb_args > 0) || (args == NULL && num_mem_objects ==0) || (args != NULL && cb_args == 0) || (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) || (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) { err = CL_INVALID_VALUE; goto error; } //Per spec, need copy args if (cb_args) { new_args = malloc(cb_args); if (!new_args) { err = CL_OUT_OF_HOST_MEMORY; goto error; } memcpy(new_args, args, cb_args); for (i=0; ictx); data = &no_wait_data; data->type = EnqueueNativeKernel; data->mem_list = mem_list; data->ptr = new_args; data->size = cb_args; data->offset = (size_t)num_mem_objects; data->const_ptr = args_mem_loc; data->user_func = user_func; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueMarker(cl_command_queue command_queue, cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); if(event == NULL) { err = CL_INVALID_VALUE; goto error; } cl_event_marker_with_wait_list(command_queue, 0, NULL, event); error: return err; } cl_int clEnqueueMarkerWithWaitList(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } cl_int clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, const cl_event * event_list) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); err = clWaitForEvents(num_events, event_list); error: return err; } cl_int clEnqueueBarrier(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL); error: return err; } cl_int clEnqueueBarrierWithWaitList(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); cl_event_barrier_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } #define EXTFUNC(x) \ if (strcmp(#x, func_name) == 0) \ return (void *)x; static void* internal_clGetExtensionFunctionAddress(const char *func_name) { if (func_name == NULL) return NULL; #ifdef HAS_OCLIcd /* cl_khr_icd */ EXTFUNC(clIcdGetPlatformIDsKHR) #endif EXTFUNC(clCreateProgramWithLLVMIntel) EXTFUNC(clGetGenVersionIntel) EXTFUNC(clMapBufferIntel) EXTFUNC(clUnmapBufferIntel) EXTFUNC(clMapBufferGTTIntel) EXTFUNC(clUnmapBufferGTTIntel) EXTFUNC(clPinBufferIntel) EXTFUNC(clUnpinBufferIntel) EXTFUNC(clReportUnfreedIntel) EXTFUNC(clCreateBufferFromLibvaIntel) EXTFUNC(clCreateImageFromLibvaIntel) EXTFUNC(clGetMemObjectFdIntel) return NULL; } void* clGetExtensionFunctionAddress(const char *func_name) { return internal_clGetExtensionFunctionAddress(func_name); } void* clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, const char *func_name) { if (UNLIKELY(platform != NULL && platform != intel_platform)) return NULL; return internal_clGetExtensionFunctionAddress(func_name); } #undef EXTFUNC cl_int clReportUnfreedIntel(void) { return cl_report_unfreed(); } void* clMapBufferIntel(cl_mem mem, cl_int *errcode_ret) { void *ptr = NULL; cl_int err = CL_SUCCESS; CHECK_MEM (mem); ptr = cl_mem_map(mem); error: if (errcode_ret) *errcode_ret = err; return ptr; } cl_int clUnmapBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); err = cl_mem_unmap(mem); error: return err; } void* clMapBufferGTTIntel(cl_mem mem, cl_int *errcode_ret) { void *ptr = NULL; cl_int err = CL_SUCCESS; CHECK_MEM (mem); ptr = cl_mem_map_gtt(mem); error: if (errcode_ret) *errcode_ret = err; return ptr; } cl_int clUnmapBufferGTTIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); err = cl_mem_unmap_gtt(mem); error: return err; } cl_int clPinBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); cl_mem_pin(mem); error: return err; } cl_int clUnpinBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); cl_mem_unpin(mem); error: return err; } cl_int clGetGenVersionIntel(cl_device_id device, cl_int *ver) { return cl_device_get_version(device, ver); } cl_program clCreateProgramWithLLVMIntel(cl_context context, cl_uint num_devices, const cl_device_id * devices, const char * filename, cl_int * errcode_ret) { return cl_program_create_from_llvm(context, num_devices, devices, filename, errcode_ret); } cl_mem clCreateBufferFromLibvaIntel(cl_context context, unsigned int bo_name, cl_int *errorcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); mem = cl_mem_new_libva_buffer(context, bo_name, &err); error: if (errorcode_ret) *errorcode_ret = err; return mem; } cl_mem clCreateImageFromLibvaIntel(cl_context context, const cl_libva_image *info, cl_int *errorcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (!info) { err = CL_INVALID_VALUE; goto error; } mem = cl_mem_new_libva_image(context, info->bo_name, info->offset, info->width, info->height, info->fmt, info->row_pitch, &err); error: if (errorcode_ret) *errorcode_ret = err; return mem; } extern CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectFdIntel(cl_context context, cl_mem memobj, int* fd) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_MEM (memobj); err = cl_mem_get_fd(memobj, fd); error: return err; }