diff options
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/driver/cl_gen_driver.h | 3 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_driver.hpp | 5 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_mem.cpp | 83 |
3 files changed, 12 insertions, 79 deletions
diff --git a/backend/src/driver/cl_gen_driver.h b/backend/src/driver/cl_gen_driver.h index 5e475b17..82c344e3 100644 --- a/backend/src/driver/cl_gen_driver.h +++ b/backend/src/driver/cl_gen_driver.h @@ -57,8 +57,7 @@ cl_int GenSupportImageFmt(const cl_device_id device, cl_mem_object_type image_ty cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device); cl_int GenReleaseMem(cl_mem mem, const cl_device_id device); cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block, - cl_map_flags flags, size_t offset, size_t size, cl_uint num_events, - const cl_event *event_list, cl_event event_ret); + cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item); cl_int GenEnqueueUnmapMem(cl_command_queue queue, cl_mem mem, void *mapped_ptr, cl_uint num_events, const cl_event *event_list, cl_event event_ret); cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim, diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp index 6f880174..39607c16 100644 --- a/backend/src/driver/cl_gen_driver.hpp +++ b/backend/src/driver/cl_gen_driver.hpp @@ -254,6 +254,7 @@ struct GenGPUCommandQueue { struct GenGPUMem { drm_intel_bo *bo; GenGPUContext* gpuCtx; + bool bindUserPtr; void* alignedHostPtr; void* mappedAddr; bool writeMap; @@ -262,8 +263,8 @@ struct GenGPUMem { bool mappedGtt; volatile int mapRef; pthread_mutex_t mutex; - GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), alignedHostPtr(NULL), mappedAddr(NULL), - writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) { + GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindUserPtr(false), alignedHostPtr(NULL), + mappedAddr(NULL), writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) { pthread_mutex_init(&this->mutex, NULL); } ~GenGPUMem(void); diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp index 49975000..f69a9f5d 100644 --- a/backend/src/driver/cl_gen_mem.cpp +++ b/backend/src/driver/cl_gen_mem.cpp @@ -442,13 +442,17 @@ bool GenGPUWorkItemMapBuf::submit(void) extern "C" cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block, - cl_map_flags flags, size_t offset, size_t size, cl_uint num_events, - const cl_event *event_list, cl_event event_ret) + cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item) { + assert(item); + if (mem->type != CL_MEM_OBJECT_BUFFER) { return CL_INVALID_VALUE; } + GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device)); + GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue); + GBE_ASSERT(gpuQueue); void* retAddr = NULL; if (genMem == NULL) @@ -458,90 +462,19 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, return CL_MEM_OBJECT_ALLOCATION_FAILURE; } - GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue); - GBE_ASSERT(gpuQueue); - - if (block) { - /* According to spec, when in block mode, we need to ensure all the - commands in queue are flushed. */ - gpuQueue->waitForFlush(); - - if (event_list) { // Need to wait for events. - if (gpuQueue->waitForEvents(event_list, num_events) == false) { - /* canceled or some errors. */ - return CL_MAP_FAILURE; - } - } - + if (block || item->depend_events == NULL) { // We do not need to do it async, just map it. retAddr = genDoMapBuffer(genMem, mem, flags, offset, size); if (retAddr == NULL) { - if (event_ret) { - event_ret->set_status(event_ret, -1); // Set error for that event. - } - return CL_MAP_FAILURE; - } - - if (ret_addr) - *ret_addr = retAddr; - - if (event_ret) { - event_ret->set_status(event_ret, CL_COMPLETE); - } - return CL_SUCCESS; - } else if (event_list == NULL) { - /* We do not have any events to wait, map it in sync mode. */ - retAddr = genDoMapBuffer(genMem, mem, flags, offset, size); - if (retAddr == NULL) { - if (event_ret) { - event_ret->set_status(event_ret, -1); // Set error for that event. - } - return CL_MAP_FAILURE; - } - - if (ret_addr) - *ret_addr = retAddr; - - if (event_ret) { - event_ret->set_status(event_ret, CL_COMPLETE); - } - return CL_SUCCESS; - } else { - GBE_ASSERT(num_events > 0); - /* A awkward case, we have events to wait, so we can not do the map operation sync. - But the spec require us to return the mapped address immediately. We can just - allocate a host mem and return that address. After really mapped, we then - copy back the data. */ - void* addr = NULL; - if (!mem->flags & CL_MEM_USE_HOST_PTR) { - addr = GBE_MALLOC(size); - if (addr == NULL) - return CL_OUT_OF_HOST_MEMORY; - } - - GenGPUWorkItemMapBuf* mapItem = GBE_NEW(GenGPUWorkItemMapBuf, mem, genMem, addr, flags, - offset, size, event_ret, event_list, num_events); - if (mapItem == NULL) { - if (addr) - GBE_FREE(addr); - return CL_OUT_OF_HOST_MEMORY; - } - - /* Final,enqueue it in the queue worker thread. */ - if (gpuQueue->enqueueWorkItem(mapItem) == false) { - GBE_DELETE(mapItem); - if (addr) - GBE_FREE(addr); return CL_MAP_FAILURE; } if (ret_addr) *ret_addr = retAddr; + item->status = CL_COMPLETE; return CL_SUCCESS; } - // Never come to here. - GBE_ASSERT(0); return CL_MAP_FAILURE; } |