summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-27 18:04:15 +0800
committerJunyan He <junyan.he@intel.com>2016-04-27 18:04:15 +0800
commit6eaaaaeed4cee6c5d9e5e345162864c79a1de751 (patch)
tree194f73ecde06ae158d19c10052d570b5e3a61e9d
parent8c7d57dad366c0ccedb2c1ace1bc61836b99b2bf (diff)
add info
-rw-r--r--backend/src/driver/cl_gen_driver.h3
-rw-r--r--backend/src/driver/cl_gen_driver.hpp5
-rw-r--r--backend/src/driver/cl_gen_mem.cpp83
-rw-r--r--include/cl_driver.h3
-rw-r--r--include/cl_mem.h7
-rw-r--r--libclapi/cl_kernel.c6
-rw-r--r--libclapi/cl_mem.c83
-rw-r--r--src/cl_mem.c2
8 files changed, 77 insertions, 115 deletions
diff --git a/backend/src/driver/cl_gen_driver.h b/backend/src/driver/cl_gen_driver.h
index 5e475b17..82c344e3 100644
--- a/backend/src/driver/cl_gen_driver.h
+++ b/backend/src/driver/cl_gen_driver.h
@@ -57,8 +57,7 @@ cl_int GenSupportImageFmt(const cl_device_id device, cl_mem_object_type image_ty
cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device);
cl_int GenReleaseMem(cl_mem mem, const cl_device_id device);
cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block,
- cl_map_flags flags, size_t offset, size_t size, cl_uint num_events,
- const cl_event *event_list, cl_event event_ret);
+ cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item);
cl_int GenEnqueueUnmapMem(cl_command_queue queue, cl_mem mem, void *mapped_ptr,
cl_uint num_events, const cl_event *event_list, cl_event event_ret);
cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim,
diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp
index 6f880174..39607c16 100644
--- a/backend/src/driver/cl_gen_driver.hpp
+++ b/backend/src/driver/cl_gen_driver.hpp
@@ -254,6 +254,7 @@ struct GenGPUCommandQueue {
struct GenGPUMem {
drm_intel_bo *bo;
GenGPUContext* gpuCtx;
+ bool bindUserPtr;
void* alignedHostPtr;
void* mappedAddr;
bool writeMap;
@@ -262,8 +263,8 @@ struct GenGPUMem {
bool mappedGtt;
volatile int mapRef;
pthread_mutex_t mutex;
- GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), alignedHostPtr(NULL), mappedAddr(NULL),
- writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) {
+ GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindUserPtr(false), alignedHostPtr(NULL),
+ mappedAddr(NULL), writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) {
pthread_mutex_init(&this->mutex, NULL);
}
~GenGPUMem(void);
diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp
index 49975000..f69a9f5d 100644
--- a/backend/src/driver/cl_gen_mem.cpp
+++ b/backend/src/driver/cl_gen_mem.cpp
@@ -442,13 +442,17 @@ bool GenGPUWorkItemMapBuf::submit(void)
extern "C"
cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block,
- cl_map_flags flags, size_t offset, size_t size, cl_uint num_events,
- const cl_event *event_list, cl_event event_ret)
+ cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item)
{
+ assert(item);
+
if (mem->type != CL_MEM_OBJECT_BUFFER) {
return CL_INVALID_VALUE;
}
+
GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device));
+ GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue);
+ GBE_ASSERT(gpuQueue);
void* retAddr = NULL;
if (genMem == NULL)
@@ -458,90 +462,19 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr,
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
}
- GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue);
- GBE_ASSERT(gpuQueue);
-
- if (block) {
- /* According to spec, when in block mode, we need to ensure all the
- commands in queue are flushed. */
- gpuQueue->waitForFlush();
-
- if (event_list) { // Need to wait for events.
- if (gpuQueue->waitForEvents(event_list, num_events) == false) {
- /* canceled or some errors. */
- return CL_MAP_FAILURE;
- }
- }
-
+ if (block || item->depend_events == NULL) { // We do not need to do it async, just map it.
retAddr = genDoMapBuffer(genMem, mem, flags, offset, size);
if (retAddr == NULL) {
- if (event_ret) {
- event_ret->set_status(event_ret, -1); // Set error for that event.
- }
- return CL_MAP_FAILURE;
- }
-
- if (ret_addr)
- *ret_addr = retAddr;
-
- if (event_ret) {
- event_ret->set_status(event_ret, CL_COMPLETE);
- }
- return CL_SUCCESS;
- } else if (event_list == NULL) {
- /* We do not have any events to wait, map it in sync mode. */
- retAddr = genDoMapBuffer(genMem, mem, flags, offset, size);
- if (retAddr == NULL) {
- if (event_ret) {
- event_ret->set_status(event_ret, -1); // Set error for that event.
- }
- return CL_MAP_FAILURE;
- }
-
- if (ret_addr)
- *ret_addr = retAddr;
-
- if (event_ret) {
- event_ret->set_status(event_ret, CL_COMPLETE);
- }
- return CL_SUCCESS;
- } else {
- GBE_ASSERT(num_events > 0);
- /* A awkward case, we have events to wait, so we can not do the map operation sync.
- But the spec require us to return the mapped address immediately. We can just
- allocate a host mem and return that address. After really mapped, we then
- copy back the data. */
- void* addr = NULL;
- if (!mem->flags & CL_MEM_USE_HOST_PTR) {
- addr = GBE_MALLOC(size);
- if (addr == NULL)
- return CL_OUT_OF_HOST_MEMORY;
- }
-
- GenGPUWorkItemMapBuf* mapItem = GBE_NEW(GenGPUWorkItemMapBuf, mem, genMem, addr, flags,
- offset, size, event_ret, event_list, num_events);
- if (mapItem == NULL) {
- if (addr)
- GBE_FREE(addr);
- return CL_OUT_OF_HOST_MEMORY;
- }
-
- /* Final,enqueue it in the queue worker thread. */
- if (gpuQueue->enqueueWorkItem(mapItem) == false) {
- GBE_DELETE(mapItem);
- if (addr)
- GBE_FREE(addr);
return CL_MAP_FAILURE;
}
if (ret_addr)
*ret_addr = retAddr;
+ item->status = CL_COMPLETE;
return CL_SUCCESS;
}
- // Never come to here.
- GBE_ASSERT(0);
return CL_MAP_FAILURE;
}
diff --git a/include/cl_driver.h b/include/cl_driver.h
index fce765a0..0db53e1e 100644
--- a/include/cl_driver.h
+++ b/include/cl_driver.h
@@ -67,8 +67,7 @@ typedef struct _cl_driver {
cl_int (*create_buffer)(cl_mem mem, const cl_device_id device);
cl_int (*release_mem)(cl_mem mem, const cl_device_id device);
cl_int (*enqueue_map_buffer)(cl_command_queue queue, cl_mem buffer, void** ret_addr, cl_bool block,
- cl_map_flags flags, size_t offset, size_t size, cl_uint num_events, const cl_event *event_list,
- cl_event event_ret);
+ cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item);
cl_int (*enqueue_unmap_mem)(cl_command_queue queue, cl_mem memobj, void *mapped_ptr,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event event_ret);
cl_int (*support_image_fmt)(const cl_device_id device, cl_mem_object_type image_type, cl_image_format* image_formats);
diff --git a/include/cl_mem.h b/include/cl_mem.h
index 3cc985dc..257ae747 100644
--- a/include/cl_mem.h
+++ b/include/cl_mem.h
@@ -36,7 +36,8 @@ typedef struct _cl_mem_dstr_cb {
struct _cl_mem_dstr_cb * next;
void (CL_CALLBACK *pfn_notify)(cl_mem memobj, void *user_data);
void *user_data;
-} cl_mem_dstr_cb;
+} _cl_mem_dstr_cb;
+typedef _cl_mem_dstr_cb* cl_mem_dstr_cb;
typedef struct _cl_mem {
uint64_t magic; /* To identify it as a memory object */
@@ -50,8 +51,8 @@ typedef struct _cl_mem {
cl_mapped_ptr_info mapped_ptr; /* Store the mapped addresses and size by caller. */
int mapped_ptr_sz; /* The array size of mapped_ptr. */
int map_ref; /* The mapped count. */
- cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
- cl_bool* enqueued_devices; /* Record whether the mem enqueued on that device. */
+ cl_mem_dstr_cb dstr_cb; /* The destroy callback. */
+ cl_device_id enqueued_device; /* Record which device the mem enqueued on. */
pthread_mutex_t lock; /* The lock to protect the mem. */
void* pdata;
} _cl_mem;
diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c
index e4b2a0bd..1fc0f785 100644
--- a/libclapi/cl_kernel.c
+++ b/libclapi/cl_kernel.c
@@ -803,13 +803,13 @@ static cl_int cl_enqueue_ND_range(cl_command_queue queue, cl_kernel kernel, cons
if (event == NULL)
goto error;
}
- cl_enqueue_set_work_item_event(it, event);
+ if (event)
+ cl_enqueue_set_work_item_event(it, event);
if (it->status > CL_COMPLETE) { // Still something todo
err = cl_enqueue_insert_work_item(queue, it);
- if (err != CL_SUCCESS)
- goto error;
+ assert(err == CL_SUCCESS); // queue must be avaible.
} else {
cl_enqueue_destroy_work_item(queue, it);
}
diff --git a/libclapi/cl_mem.c b/libclapi/cl_mem.c
index ad46a6f8..21c2e787 100644
--- a/libclapi/cl_mem.c
+++ b/libclapi/cl_mem.c
@@ -18,6 +18,7 @@
#include <string.h>
#include <assert.h>
+#include <unistd.h>
#include "cl_context.h"
#include "cl_internals.h"
#include "cl_alloc.h"
@@ -137,17 +138,10 @@ LOCAL cl_mem cl_mem_new(cl_mem_object_type type, cl_context ctx, cl_mem_flags fl
if (mem == NULL)
return NULL;
- mem->enqueued_devices = CL_CALLOC(ctx->device_num, sizeof(cl_bool));
- if (mem->enqueued_devices == NULL) {
- CL_FREE(mem);
- return NULL;
- }
-
/* Create the private pointer array if device > 1 */
if (ctx->device_num > 1) {
mem->pdata = CL_CALLOC(ctx->device_num, sizeof(void*));
if (mem->pdata == NULL) {
- CL_FREE(mem->enqueued_devices);
CL_FREE(mem);
return NULL;
}
@@ -189,11 +183,16 @@ static void cl_mem_delete(cl_mem mem)
CL_MUTEX_UNLOCK(&mem->ctx->lock);
cl_release_context(mem->ctx);
+ if (mem->flags & CL_MEM_ALLOC_HOST_PTR) {
+ assert(mem->host_ptr);
+ CL_FREE(mem->host_ptr);
+ }
+
if (mem->mapped_ptr)
CL_FREE(mem->mapped_ptr);
if (mem->dstr_cb) {
- cl_mem_dstr_cb *cb = NULL;
+ cl_mem_dstr_cb cb = NULL;
while (mem->dstr_cb) {
cb = mem->dstr_cb;
mem->dstr_cb = cb->next;
@@ -206,7 +205,6 @@ static void cl_mem_delete(cl_mem mem)
if (mem->ctx->device_num > 1) {
CL_FREE(mem->pdata);
}
- CL_FREE(mem->enqueued_devices);
CL_FREE(mem);
}
@@ -221,7 +219,7 @@ LOCAL void cl_release_mem(cl_mem mem)
/* Call the user callback. No need to lock, we are the last user. */
if (mem->dstr_cb) {
- cl_mem_dstr_cb *cb = mem->dstr_cb;
+ cl_mem_dstr_cb cb = mem->dstr_cb;
while (mem->dstr_cb) {
cb = mem->dstr_cb;
cb->pfn_notify(mem, cb->user_data);
@@ -328,8 +326,20 @@ static cl_mem cl_mem_create_buffer(cl_context ctx, cl_mem_flags flags, size_t sz
goto error;
}
- if (flags & (CL_MEM_USE_HOST_PTR|CL_MEM_COPY_HOST_PTR))
+ if (flags & CL_MEM_ALLOC_HOST_PTR) {
+ /*FIXME: For peformance and HW limitation, we need the address align to page size. */
+ int page_size = getpagesize();
+ mem->host_ptr = CL_MEMALIGN(sz, page_size);
+ if (mem->host_ptr == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+
+ if (flags & CL_MEM_COPY_HOST_PTR)
+ memcpy(mem->host_ptr, data, sz);
+ } else if (flags & (CL_MEM_USE_HOST_PTR|CL_MEM_COPY_HOST_PTR)) {
mem->host_ptr = data;
+ }
for (i = 0; i < ctx->device_num; i++) {
err = ctx->devices[i]->driver->create_buffer(mem, ctx->devices[i]);
@@ -624,7 +634,6 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo
cl_command_queue_work_item it = NULL;
cl_int err = CL_SUCCESS;
void *mem_ptr = NULL;
- cl_int index;
err = cl_mem_find_mapped(buffer, offset, map_flags, size);
if (err != CL_SUCCESS)
@@ -650,12 +659,24 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo
goto error;
}
- err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr, blocking_map, map_flags,
- offset, size, num_events, event_list, event);
+ CL_MUTEX_LOCK(&buffer->lock);
+ if (buffer->enqueued_device && buffer->enqueued_device != queue->device) {
+ CL_MUTEX_UNLOCK(&buffer->lock);
+ err = CL_INVALID_OPERATION;
+ goto error;
+ } else if (buffer->enqueued_device == NULL) {
+ cl_retain_device_id(queue->device);
+ buffer->enqueued_device = queue->device;
+ }
+ CL_MUTEX_UNLOCK(&buffer->lock);
+
+ err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr,
+ blocking_map, map_flags, offset, size, it);
if (err != CL_SUCCESS)
goto error;
- cl_enqueue_set_work_item_event(it, event);
+ if (event)
+ cl_enqueue_set_work_item_event(it, event);
/* We need to store the map info for unmap and debug. */
err = cl_mem_record_mapped(buffer, mem_ptr, offset, map_flags, size, NULL, NULL);
@@ -665,11 +686,13 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo
goto error;
}
- index = cl_context_get_device_index(queue->ctx, queue->device);
-
- CL_MUTEX_LOCK(&buffer->lock);
- buffer->enqueued_devices[index] = CL_TRUE;
- CL_MUTEX_UNLOCK(&buffer->lock);
+ if (it->status > CL_COMPLETE) { // Still something todo
+ err = cl_enqueue_insert_work_item(queue, it);
+ assert(err == CL_SUCCESS); // queue must be avaible.
+ } else {
+ cl_enqueue_destroy_work_item(queue, it);
+ it = NULL;
+ }
if (errcode_ret)
*errcode_ret = err;
@@ -702,6 +725,17 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void *
goto error;
}
+ CL_MUTEX_LOCK(&memobj->lock);
+ if (memobj->enqueued_device && memobj->enqueued_device != queue->device) {
+ CL_MUTEX_UNLOCK(&memobj->lock);
+ err = CL_INVALID_OPERATION;
+ goto error;
+ } else if (memobj->enqueued_device == NULL) {
+ cl_retain_device_id(queue->device);
+ memobj->enqueued_device = queue->device;
+ }
+ CL_MUTEX_UNLOCK(&memobj->lock);
+
/* Check the pointer valid. */
INVALID_VALUE_IF(!mapped_ptr);
@@ -715,11 +749,6 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void *
if (err != CL_SUCCESS)
goto error;
- CL_MUTEX_LOCK(&memobj->lock);
- index = cl_context_get_device_index(queue->ctx, queue->device);
- memobj->enqueued_devices[index] = CL_TRUE;
- CL_MUTEX_UNLOCK(&memobj->lock);
-
if (event_ret)
*event_ret = event;
@@ -804,14 +833,14 @@ clSetMemObjectDestructorCallback(cl_mem memobj,
CHECK_MEM(memobj);
INVALID_VALUE_IF (pfn_notify == 0);
- cl_mem_dstr_cb *cb = (cl_mem_dstr_cb*)malloc(sizeof(cl_mem_dstr_cb));
+ cl_mem_dstr_cb cb = malloc(sizeof(_cl_mem_dstr_cb));
if (!cb) {
err = CL_OUT_OF_HOST_MEMORY;
goto error;
}
CL_MUTEX_LOCK(&memobj->lock);
- memset(cb, 0, sizeof(cl_mem_dstr_cb));
+ memset(cb, 0, sizeof(_cl_mem_dstr_cb));
cb->pfn_notify = pfn_notify;
cb->user_data = user_data;
cb->next = memobj->dstr_cb;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 5d28fa92..ce53037c 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -1227,7 +1227,7 @@ cl_mem_delete(cl_mem mem)
free(mem->mapped_ptr);
if (mem->dstr_cb) {
- cl_mem_dstr_cb *cb = mem->dstr_cb;
+ cl_mem_dstr_cb cb = mem->dstr_cb;
while (mem->dstr_cb) {
cb = mem->dstr_cb;
cb->pfn_notify(mem, cb->user_data);