/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Rong Yang */ #include #include #include #include #include "cl_enqueue.h" #include "cl_image.h" #include "cl_driver.h" #include "cl_event.h" #include "cl_command_queue.h" #include "cl_utils.h" cl_int cl_enqueue_read_buffer(enqueue_data* data) { cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); void* src_ptr; struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } memcpy(data->ptr, (char*)src_ptr + data->offset + buffer->sub_offset, data->size); err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_read_buffer_rect(enqueue_data* data) { cl_int err = CL_SUCCESS; void* src_ptr; void* dst_ptr; const size_t* origin = data->origin; const size_t* host_origin = data->host_origin; const size_t* region = data->region; if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2]; src_ptr = (char*)src_ptr + offset; offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; dst_ptr = (char *)data->ptr + offset; if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = dst_ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, region[0]); src += data->row_pitch; dst += data->host_row_pitch; } src_ptr = (char*)src_ptr + data->slice_pitch; dst_ptr = (char*)dst_ptr + data->host_slice_pitch; } } err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_write_buffer(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; void* dst_ptr; if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } memcpy((char*)dst_ptr + data->offset + buffer->sub_offset, data->const_ptr, data->size); err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_write_buffer_rect(enqueue_data *data) { cl_int err = CL_SUCCESS; void* src_ptr; void* dst_ptr; const size_t* origin = data->origin; const size_t* host_origin = data->host_origin; const size_t* region = data->region; if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2]; dst_ptr = (char *)dst_ptr + offset; offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; src_ptr = (char*)data->const_ptr + offset; if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = dst_ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, region[0]); src += data->host_row_pitch; dst += data->row_pitch; } src_ptr = (char*)src_ptr + data->host_slice_pitch; dst_ptr = (char*)dst_ptr + data->slice_pitch; } } err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_read_image(enqueue_data *data) { cl_int err = CL_SUCCESS; void* src_ptr; cl_mem mem = data->mem_obj; CHECK_IMAGE(mem, image); const size_t* origin = data->origin; const size_t* region = data->region; if (!(src_ptr = cl_mem_map_auto(mem))) { err = CL_MAP_FAILURE; goto error; } size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; src_ptr = (char*)src_ptr + offset; if (!origin[0] && region[0] == image->w && data->row_pitch == image->row_pitch && (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch))) { memcpy(data->ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = data->ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, image->bpp*region[0]); src += image->row_pitch; dst += data->row_pitch; } src_ptr = (char*)src_ptr + image->slice_pitch; data->ptr = (char*)data->ptr + data->slice_pitch; } } err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_write_image(enqueue_data *data) { cl_int err = CL_SUCCESS; void* dst_ptr; cl_mem mem = data->mem_obj; CHECK_IMAGE(mem, image); if (!(dst_ptr = cl_mem_map_auto(mem))) { err = CL_MAP_FAILURE; goto error; } cl_mem_copy_image_region(data->origin, data->region, dst_ptr, image->row_pitch, image->slice_pitch, data->const_ptr, data->row_pitch, data->slice_pitch, image); err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_map_buffer(enqueue_data *data) { void *ptr = NULL; cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if(data->unsync_map == 1) //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here ptr = cl_mem_map_gtt(mem); else ptr = cl_mem_map_auto(mem); if (ptr == NULL) { err = CL_MAP_FAILURE; goto error; } data->ptr = ptr; if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); ptr = (char*)ptr + data->offset + buffer->sub_offset; memcpy(mem->host_ptr + data->offset, ptr, data->size); } error: return err; } cl_int cl_enqueue_map_image(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; void *ptr = NULL; CHECK_IMAGE(mem, image); if(data->unsync_map == 1) //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here ptr = cl_mem_map_gtt(mem); else ptr = cl_mem_map_auto(mem); if (ptr == NULL) { err = CL_MAP_FAILURE; goto error; } data->ptr = ptr; if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); cl_mem_copy_image_region(data->origin, data->region, mem->host_ptr, image->host_row_pitch, image->host_slice_pitch, data->ptr, data->row_pitch, data->slice_pitch, image); } error: return err; } cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) { cl_int err = CL_SUCCESS; int i; size_t mapped_size = 0; void * v_ptr = NULL; void * mapped_ptr = data->ptr; cl_mem memobj = data->mem_obj; assert(memobj->mapped_ptr_sz >= memobj->map_ref); INVALID_VALUE_IF(!mapped_ptr); for (i = 0; i < memobj->mapped_ptr_sz; i++) { if (memobj->mapped_ptr[i].ptr == mapped_ptr) { memobj->mapped_ptr[i].ptr = NULL; mapped_size = memobj->mapped_ptr[i].size; v_ptr = memobj->mapped_ptr[i].v_ptr; memobj->mapped_ptr[i].size = 0; memobj->mapped_ptr[i].v_ptr = NULL; memobj->map_ref--; break; } } /* can not find a mapped address? */ INVALID_VALUE_IF(i == memobj->mapped_ptr_sz); if (memobj->flags & CL_MEM_USE_HOST_PTR) { assert(mapped_ptr >= memobj->host_ptr && mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); /* Sync the data. */ memcpy(v_ptr, mapped_ptr, mapped_size); } else { assert(v_ptr == mapped_ptr); } cl_mem_unmap_auto(memobj); /* shrink the mapped slot. */ if (memobj->mapped_ptr_sz/2 > memobj->map_ref) { int j = 0; cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2)); if (!new_ptr) { /* Just do nothing. */ goto error; } memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr)); for (i = 0; i < memobj->mapped_ptr_sz; i++) { if (memobj->mapped_ptr[i].ptr) { new_ptr[j] = memobj->mapped_ptr[i]; j++; assert(j < memobj->mapped_ptr_sz/2); } } memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2; free(memobj->mapped_ptr); memobj->mapped_ptr = new_ptr; } error: return err; } cl_int cl_enqueue_native_kernel(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_uint num_mem_objects = (cl_uint)data->offset; const cl_mem *mem_list = data->mem_list; const void **args_mem_loc = (const void **)data->const_ptr; cl_uint i; for (i=0; iuser_func(data->ptr); for (i=0; iptr); error: return err; } cl_int cl_enqueue_handle(cl_event event, enqueue_data* data) { /* if need profiling, add the submit timestamp here. */ if (event && event->type != CL_COMMAND_USER && event->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT); } switch(data->type) { case EnqueueReadBuffer: return cl_enqueue_read_buffer(data); case EnqueueReadBufferRect: return cl_enqueue_read_buffer_rect(data); case EnqueueWriteBuffer: return cl_enqueue_write_buffer(data); case EnqueueWriteBufferRect: return cl_enqueue_write_buffer_rect(data); case EnqueueReadImage: return cl_enqueue_read_image(data); case EnqueueWriteImage: return cl_enqueue_write_image(data); case EnqueueMapBuffer: return cl_enqueue_map_buffer(data); case EnqueueMapImage: return cl_enqueue_map_image(data); case EnqueueUnmapMemObject: return cl_enqueue_unmap_mem_object(data); case EnqueueCopyBufferRect: case EnqueueCopyBuffer: case EnqueueCopyImage: case EnqueueCopyBufferToImage: case EnqueueCopyImageToBuffer: case EnqueueNDRangeKernel: cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); return CL_SUCCESS; case EnqueueNativeKernel: return cl_enqueue_native_kernel(data); default: return CL_SUCCESS; } }