diff options
author | Junyan He <junyan.he@intel.com> | 2016-04-27 18:04:15 +0800 |
---|---|---|
committer | Junyan He <junyan.he@intel.com> | 2016-04-27 18:04:15 +0800 |
commit | 6eaaaaeed4cee6c5d9e5e345162864c79a1de751 (patch) | |
tree | 194f73ecde06ae158d19c10052d570b5e3a61e9d | |
parent | 8c7d57dad366c0ccedb2c1ace1bc61836b99b2bf (diff) |
add info
-rw-r--r-- | backend/src/driver/cl_gen_driver.h | 3 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_driver.hpp | 5 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_mem.cpp | 83 | ||||
-rw-r--r-- | include/cl_driver.h | 3 | ||||
-rw-r--r-- | include/cl_mem.h | 7 | ||||
-rw-r--r-- | libclapi/cl_kernel.c | 6 | ||||
-rw-r--r-- | libclapi/cl_mem.c | 83 | ||||
-rw-r--r-- | src/cl_mem.c | 2 |
8 files changed, 77 insertions, 115 deletions
diff --git a/backend/src/driver/cl_gen_driver.h b/backend/src/driver/cl_gen_driver.h index 5e475b17..82c344e3 100644 --- a/backend/src/driver/cl_gen_driver.h +++ b/backend/src/driver/cl_gen_driver.h @@ -57,8 +57,7 @@ cl_int GenSupportImageFmt(const cl_device_id device, cl_mem_object_type image_ty cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device); cl_int GenReleaseMem(cl_mem mem, const cl_device_id device); cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block, - cl_map_flags flags, size_t offset, size_t size, cl_uint num_events, - const cl_event *event_list, cl_event event_ret); + cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item); cl_int GenEnqueueUnmapMem(cl_command_queue queue, cl_mem mem, void *mapped_ptr, cl_uint num_events, const cl_event *event_list, cl_event event_ret); cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim, diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp index 6f880174..39607c16 100644 --- a/backend/src/driver/cl_gen_driver.hpp +++ b/backend/src/driver/cl_gen_driver.hpp @@ -254,6 +254,7 @@ struct GenGPUCommandQueue { struct GenGPUMem { drm_intel_bo *bo; GenGPUContext* gpuCtx; + bool bindUserPtr; void* alignedHostPtr; void* mappedAddr; bool writeMap; @@ -262,8 +263,8 @@ struct GenGPUMem { bool mappedGtt; volatile int mapRef; pthread_mutex_t mutex; - GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), alignedHostPtr(NULL), mappedAddr(NULL), - writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) { + GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindUserPtr(false), alignedHostPtr(NULL), + mappedAddr(NULL), writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) { pthread_mutex_init(&this->mutex, NULL); } ~GenGPUMem(void); diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp index 49975000..f69a9f5d 100644 --- a/backend/src/driver/cl_gen_mem.cpp +++ b/backend/src/driver/cl_gen_mem.cpp @@ -442,13 +442,17 @@ bool GenGPUWorkItemMapBuf::submit(void) extern "C" cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block, - cl_map_flags flags, size_t offset, size_t size, cl_uint num_events, - const cl_event *event_list, cl_event event_ret) + cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item) { + assert(item); + if (mem->type != CL_MEM_OBJECT_BUFFER) { return CL_INVALID_VALUE; } + GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device)); + GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue); + GBE_ASSERT(gpuQueue); void* retAddr = NULL; if (genMem == NULL) @@ -458,90 +462,19 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, return CL_MEM_OBJECT_ALLOCATION_FAILURE; } - GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue); - GBE_ASSERT(gpuQueue); - - if (block) { - /* According to spec, when in block mode, we need to ensure all the - commands in queue are flushed. */ - gpuQueue->waitForFlush(); - - if (event_list) { // Need to wait for events. - if (gpuQueue->waitForEvents(event_list, num_events) == false) { - /* canceled or some errors. */ - return CL_MAP_FAILURE; - } - } - + if (block || item->depend_events == NULL) { // We do not need to do it async, just map it. retAddr = genDoMapBuffer(genMem, mem, flags, offset, size); if (retAddr == NULL) { - if (event_ret) { - event_ret->set_status(event_ret, -1); // Set error for that event. - } - return CL_MAP_FAILURE; - } - - if (ret_addr) - *ret_addr = retAddr; - - if (event_ret) { - event_ret->set_status(event_ret, CL_COMPLETE); - } - return CL_SUCCESS; - } else if (event_list == NULL) { - /* We do not have any events to wait, map it in sync mode. */ - retAddr = genDoMapBuffer(genMem, mem, flags, offset, size); - if (retAddr == NULL) { - if (event_ret) { - event_ret->set_status(event_ret, -1); // Set error for that event. - } - return CL_MAP_FAILURE; - } - - if (ret_addr) - *ret_addr = retAddr; - - if (event_ret) { - event_ret->set_status(event_ret, CL_COMPLETE); - } - return CL_SUCCESS; - } else { - GBE_ASSERT(num_events > 0); - /* A awkward case, we have events to wait, so we can not do the map operation sync. - But the spec require us to return the mapped address immediately. We can just - allocate a host mem and return that address. After really mapped, we then - copy back the data. */ - void* addr = NULL; - if (!mem->flags & CL_MEM_USE_HOST_PTR) { - addr = GBE_MALLOC(size); - if (addr == NULL) - return CL_OUT_OF_HOST_MEMORY; - } - - GenGPUWorkItemMapBuf* mapItem = GBE_NEW(GenGPUWorkItemMapBuf, mem, genMem, addr, flags, - offset, size, event_ret, event_list, num_events); - if (mapItem == NULL) { - if (addr) - GBE_FREE(addr); - return CL_OUT_OF_HOST_MEMORY; - } - - /* Final,enqueue it in the queue worker thread. */ - if (gpuQueue->enqueueWorkItem(mapItem) == false) { - GBE_DELETE(mapItem); - if (addr) - GBE_FREE(addr); return CL_MAP_FAILURE; } if (ret_addr) *ret_addr = retAddr; + item->status = CL_COMPLETE; return CL_SUCCESS; } - // Never come to here. - GBE_ASSERT(0); return CL_MAP_FAILURE; } diff --git a/include/cl_driver.h b/include/cl_driver.h index fce765a0..0db53e1e 100644 --- a/include/cl_driver.h +++ b/include/cl_driver.h @@ -67,8 +67,7 @@ typedef struct _cl_driver { cl_int (*create_buffer)(cl_mem mem, const cl_device_id device); cl_int (*release_mem)(cl_mem mem, const cl_device_id device); cl_int (*enqueue_map_buffer)(cl_command_queue queue, cl_mem buffer, void** ret_addr, cl_bool block, - cl_map_flags flags, size_t offset, size_t size, cl_uint num_events, const cl_event *event_list, - cl_event event_ret); + cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item); cl_int (*enqueue_unmap_mem)(cl_command_queue queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event event_ret); cl_int (*support_image_fmt)(const cl_device_id device, cl_mem_object_type image_type, cl_image_format* image_formats); diff --git a/include/cl_mem.h b/include/cl_mem.h index 3cc985dc..257ae747 100644 --- a/include/cl_mem.h +++ b/include/cl_mem.h @@ -36,7 +36,8 @@ typedef struct _cl_mem_dstr_cb { struct _cl_mem_dstr_cb * next; void (CL_CALLBACK *pfn_notify)(cl_mem memobj, void *user_data); void *user_data; -} cl_mem_dstr_cb; +} _cl_mem_dstr_cb; +typedef _cl_mem_dstr_cb* cl_mem_dstr_cb; typedef struct _cl_mem { uint64_t magic; /* To identify it as a memory object */ @@ -50,8 +51,8 @@ typedef struct _cl_mem { cl_mapped_ptr_info mapped_ptr; /* Store the mapped addresses and size by caller. */ int mapped_ptr_sz; /* The array size of mapped_ptr. */ int map_ref; /* The mapped count. */ - cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ - cl_bool* enqueued_devices; /* Record whether the mem enqueued on that device. */ + cl_mem_dstr_cb dstr_cb; /* The destroy callback. */ + cl_device_id enqueued_device; /* Record which device the mem enqueued on. */ pthread_mutex_t lock; /* The lock to protect the mem. */ void* pdata; } _cl_mem; diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c index e4b2a0bd..1fc0f785 100644 --- a/libclapi/cl_kernel.c +++ b/libclapi/cl_kernel.c @@ -803,13 +803,13 @@ static cl_int cl_enqueue_ND_range(cl_command_queue queue, cl_kernel kernel, cons if (event == NULL) goto error; } - cl_enqueue_set_work_item_event(it, event); + if (event) + cl_enqueue_set_work_item_event(it, event); if (it->status > CL_COMPLETE) { // Still something todo err = cl_enqueue_insert_work_item(queue, it); - if (err != CL_SUCCESS) - goto error; + assert(err == CL_SUCCESS); // queue must be avaible. } else { cl_enqueue_destroy_work_item(queue, it); } diff --git a/libclapi/cl_mem.c b/libclapi/cl_mem.c index ad46a6f8..21c2e787 100644 --- a/libclapi/cl_mem.c +++ b/libclapi/cl_mem.c @@ -18,6 +18,7 @@ #include <string.h> #include <assert.h> +#include <unistd.h> #include "cl_context.h" #include "cl_internals.h" #include "cl_alloc.h" @@ -137,17 +138,10 @@ LOCAL cl_mem cl_mem_new(cl_mem_object_type type, cl_context ctx, cl_mem_flags fl if (mem == NULL) return NULL; - mem->enqueued_devices = CL_CALLOC(ctx->device_num, sizeof(cl_bool)); - if (mem->enqueued_devices == NULL) { - CL_FREE(mem); - return NULL; - } - /* Create the private pointer array if device > 1 */ if (ctx->device_num > 1) { mem->pdata = CL_CALLOC(ctx->device_num, sizeof(void*)); if (mem->pdata == NULL) { - CL_FREE(mem->enqueued_devices); CL_FREE(mem); return NULL; } @@ -189,11 +183,16 @@ static void cl_mem_delete(cl_mem mem) CL_MUTEX_UNLOCK(&mem->ctx->lock); cl_release_context(mem->ctx); + if (mem->flags & CL_MEM_ALLOC_HOST_PTR) { + assert(mem->host_ptr); + CL_FREE(mem->host_ptr); + } + if (mem->mapped_ptr) CL_FREE(mem->mapped_ptr); if (mem->dstr_cb) { - cl_mem_dstr_cb *cb = NULL; + cl_mem_dstr_cb cb = NULL; while (mem->dstr_cb) { cb = mem->dstr_cb; mem->dstr_cb = cb->next; @@ -206,7 +205,6 @@ static void cl_mem_delete(cl_mem mem) if (mem->ctx->device_num > 1) { CL_FREE(mem->pdata); } - CL_FREE(mem->enqueued_devices); CL_FREE(mem); } @@ -221,7 +219,7 @@ LOCAL void cl_release_mem(cl_mem mem) /* Call the user callback. No need to lock, we are the last user. */ if (mem->dstr_cb) { - cl_mem_dstr_cb *cb = mem->dstr_cb; + cl_mem_dstr_cb cb = mem->dstr_cb; while (mem->dstr_cb) { cb = mem->dstr_cb; cb->pfn_notify(mem, cb->user_data); @@ -328,8 +326,20 @@ static cl_mem cl_mem_create_buffer(cl_context ctx, cl_mem_flags flags, size_t sz goto error; } - if (flags & (CL_MEM_USE_HOST_PTR|CL_MEM_COPY_HOST_PTR)) + if (flags & CL_MEM_ALLOC_HOST_PTR) { + /*FIXME: For peformance and HW limitation, we need the address align to page size. */ + int page_size = getpagesize(); + mem->host_ptr = CL_MEMALIGN(sz, page_size); + if (mem->host_ptr == NULL) { + err = CL_OUT_OF_HOST_MEMORY; + goto error; + } + + if (flags & CL_MEM_COPY_HOST_PTR) + memcpy(mem->host_ptr, data, sz); + } else if (flags & (CL_MEM_USE_HOST_PTR|CL_MEM_COPY_HOST_PTR)) { mem->host_ptr = data; + } for (i = 0; i < ctx->device_num; i++) { err = ctx->devices[i]->driver->create_buffer(mem, ctx->devices[i]); @@ -624,7 +634,6 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo cl_command_queue_work_item it = NULL; cl_int err = CL_SUCCESS; void *mem_ptr = NULL; - cl_int index; err = cl_mem_find_mapped(buffer, offset, map_flags, size); if (err != CL_SUCCESS) @@ -650,12 +659,24 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo goto error; } - err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr, blocking_map, map_flags, - offset, size, num_events, event_list, event); + CL_MUTEX_LOCK(&buffer->lock); + if (buffer->enqueued_device && buffer->enqueued_device != queue->device) { + CL_MUTEX_UNLOCK(&buffer->lock); + err = CL_INVALID_OPERATION; + goto error; + } else if (buffer->enqueued_device == NULL) { + cl_retain_device_id(queue->device); + buffer->enqueued_device = queue->device; + } + CL_MUTEX_UNLOCK(&buffer->lock); + + err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr, + blocking_map, map_flags, offset, size, it); if (err != CL_SUCCESS) goto error; - cl_enqueue_set_work_item_event(it, event); + if (event) + cl_enqueue_set_work_item_event(it, event); /* We need to store the map info for unmap and debug. */ err = cl_mem_record_mapped(buffer, mem_ptr, offset, map_flags, size, NULL, NULL); @@ -665,11 +686,13 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo goto error; } - index = cl_context_get_device_index(queue->ctx, queue->device); - - CL_MUTEX_LOCK(&buffer->lock); - buffer->enqueued_devices[index] = CL_TRUE; - CL_MUTEX_UNLOCK(&buffer->lock); + if (it->status > CL_COMPLETE) { // Still something todo + err = cl_enqueue_insert_work_item(queue, it); + assert(err == CL_SUCCESS); // queue must be avaible. + } else { + cl_enqueue_destroy_work_item(queue, it); + it = NULL; + } if (errcode_ret) *errcode_ret = err; @@ -702,6 +725,17 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void * goto error; } + CL_MUTEX_LOCK(&memobj->lock); + if (memobj->enqueued_device && memobj->enqueued_device != queue->device) { + CL_MUTEX_UNLOCK(&memobj->lock); + err = CL_INVALID_OPERATION; + goto error; + } else if (memobj->enqueued_device == NULL) { + cl_retain_device_id(queue->device); + memobj->enqueued_device = queue->device; + } + CL_MUTEX_UNLOCK(&memobj->lock); + /* Check the pointer valid. */ INVALID_VALUE_IF(!mapped_ptr); @@ -715,11 +749,6 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void * if (err != CL_SUCCESS) goto error; - CL_MUTEX_LOCK(&memobj->lock); - index = cl_context_get_device_index(queue->ctx, queue->device); - memobj->enqueued_devices[index] = CL_TRUE; - CL_MUTEX_UNLOCK(&memobj->lock); - if (event_ret) *event_ret = event; @@ -804,14 +833,14 @@ clSetMemObjectDestructorCallback(cl_mem memobj, CHECK_MEM(memobj); INVALID_VALUE_IF (pfn_notify == 0); - cl_mem_dstr_cb *cb = (cl_mem_dstr_cb*)malloc(sizeof(cl_mem_dstr_cb)); + cl_mem_dstr_cb cb = malloc(sizeof(_cl_mem_dstr_cb)); if (!cb) { err = CL_OUT_OF_HOST_MEMORY; goto error; } CL_MUTEX_LOCK(&memobj->lock); - memset(cb, 0, sizeof(cl_mem_dstr_cb)); + memset(cb, 0, sizeof(_cl_mem_dstr_cb)); cb->pfn_notify = pfn_notify; cb->user_data = user_data; cb->next = memobj->dstr_cb; diff --git a/src/cl_mem.c b/src/cl_mem.c index 5d28fa92..ce53037c 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -1227,7 +1227,7 @@ cl_mem_delete(cl_mem mem) free(mem->mapped_ptr); if (mem->dstr_cb) { - cl_mem_dstr_cb *cb = mem->dstr_cb; + cl_mem_dstr_cb cb = mem->dstr_cb; while (mem->dstr_cb) { cb = mem->dstr_cb; cb->pfn_notify(mem, cb->user_data); |