summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-27 16:00:48 +0800
committerJunyan He <junyan.he@intel.com>2016-04-27 16:00:48 +0800
commit8c7d57dad366c0ccedb2c1ace1bc61836b99b2bf (patch)
treea1162f4f784c8ce301290f1e8d02b7717b66d951
parent761edcf7c7a65ca2d36820da36804204291479af (diff)
add info
-rw-r--r--include/cl_mem.h8
-rw-r--r--libclapi/cl_kernel.c12
-rw-r--r--libclapi/cl_mem.c207
3 files changed, 132 insertions, 95 deletions
diff --git a/include/cl_mem.h b/include/cl_mem.h
index fa687f1c..3cc985dc 100644
--- a/include/cl_mem.h
+++ b/include/cl_mem.h
@@ -22,13 +22,15 @@
#include <assert.h>
#include "CL/cl.h"
-typedef struct _cl_mapped_ptr {
+typedef struct _cl_mapped_ptr_info {
void* ptr;
size_t offset;
size_t size;
+ cl_map_flags flags;
size_t origin[3]; /* mapped origin */
size_t region[3]; /* mapped region */
-} cl_mapped_ptr;
+} _cl_mapped_ptr_info;
+typedef _cl_mapped_ptr_info* cl_mapped_ptr_info;
typedef struct _cl_mem_dstr_cb {
struct _cl_mem_dstr_cb * next;
@@ -45,7 +47,7 @@ typedef struct _cl_mem {
cl_context ctx; /* Context it belongs to */
cl_mem_flags flags; /* Flags specified at the creation time */
void* host_ptr; /* Pointer of the host mem specified by CL_MEM_USE_HOST_PTR */
- cl_mapped_ptr* mapped_ptr; /* Store the mapped addresses and size by caller. */
+ cl_mapped_ptr_info mapped_ptr; /* Store the mapped addresses and size by caller. */
int mapped_ptr_sz; /* The array size of mapped_ptr. */
int map_ref; /* The mapped count. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c
index 351e06f8..e4b2a0bd 100644
--- a/libclapi/cl_kernel.c
+++ b/libclapi/cl_kernel.c
@@ -767,10 +767,10 @@ static cl_int cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const voi
return CL_SUCCESS;
}
-static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim,
- const size_t *global_wk_off, const size_t *global_wk_sz,
- const size_t *local_wk_sz, cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list, cl_event *event_ret)
+static cl_int cl_enqueue_ND_range(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim,
+ const size_t *global_wk_off, const size_t *global_wk_sz,
+ const size_t *local_wk_sz, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_event *event_ret)
{
cl_event event = NULL;
cl_command_queue_work_item it = NULL;
@@ -1038,8 +1038,8 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
}
/* Do device specific checks are enqueue the kernel */
- err = cl_command_queue_ND_range(command_queue, kernel, work_dim, fixed_global_off, fixed_global_sz,
- fixed_local_sz, num_events_in_wait_list, event_wait_list, event);
+ err = cl_enqueue_ND_range(command_queue, kernel, work_dim, fixed_global_off, fixed_global_sz,
+ fixed_local_sz, num_events_in_wait_list, event_wait_list, event);
if(err != CL_SUCCESS)
goto error;
diff --git a/libclapi/cl_mem.c b/libclapi/cl_mem.c
index 441a7c3c..ad46a6f8 100644
--- a/libclapi/cl_mem.c
+++ b/libclapi/cl_mem.c
@@ -454,50 +454,146 @@ error:
goto exit;
}
-static cl_int cl_record_mapped(cl_mem mem, void *ptr, size_t offset,
- size_t size, const size_t *origin, const size_t *region)
+static cl_int cl_mem_find_mapped(cl_mem mem, size_t offset, cl_map_flags map_flags, size_t size)
+{
+ int i;
+
+ CL_MUTEX_LOCK(&mem->lock);
+ if (!mem->mapped_ptr_sz) {
+ CL_MUTEX_UNLOCK(&mem->lock);
+ return CL_SUCCESS;
+ }
+
+ for (i = 0; i < mem->mapped_ptr_sz; i++) {
+ if (offset + size <= mem->mapped_ptr[i].offset ||
+ offset >= mem->mapped_ptr[i].offset + mem->mapped_ptr[i].size)
+ continue; // No overlap, continue.
+
+ /* overlap, check the flags. Write is mutual exclusive. */
+ if (map_flags != CL_MAP_READ || mem->mapped_ptr[i].flags != CL_MAP_READ) {
+ CL_MUTEX_UNLOCK(&mem->lock);
+ return CL_INVALID_OPERATION;
+ }
+ }
+
+ CL_MUTEX_UNLOCK(&mem->lock);
+ return CL_SUCCESS;
+}
+
+static cl_int cl_mem_delete_mapped(cl_mem mem, void *mapped_ptr, cl_mapped_ptr_info info)
+{
+ int i, j;
+ cl_bool ptr_invalid = CL_FALSE;
+
+ CL_MUTEX_LOCK(&mem->lock);
+ assert(mem->mapped_ptr_sz >= mem->map_ref);
+
+ for (i = 0; i < mem->mapped_ptr_sz; i++) {
+ if (mem->mapped_ptr[i].ptr == mapped_ptr) {
+ /* We may find several slots have same mapped_ptr, but we will be sure that
+ the overlapped map should be read only map, and so just find the first
+ one and unmap it. */
+ *info = mem->mapped_ptr[i]; // copy it.
+ break;
+ }
+ }
+
+ if (i == mem->mapped_ptr_sz) {
+ ptr_invalid = CL_TRUE;
+ } else {
+ /* So some shrink thing.*/
+ mem->mapped_ptr[i].ptr = NULL;
+ mem->map_ref--;
+ if (mem->mapped_ptr_sz/2 > mem->map_ref) {
+ j = 0;
+ cl_mapped_ptr_info new_ptr = CL_MALLOC(sizeof(_cl_mapped_ptr_info) * (mem->mapped_ptr_sz/2));
+ if (!new_ptr) {
+ /* Just do nothing. */
+ CL_MUTEX_UNLOCK(&mem->lock);
+ goto RETURN;
+ }
+ memset(new_ptr, 0, (mem->mapped_ptr_sz/2) * sizeof(_cl_mapped_ptr_info));
+
+ for (i = 0; i < mem->mapped_ptr_sz; i++) {
+ if (mem->mapped_ptr[i].ptr) {
+ new_ptr[j] = mem->mapped_ptr[i];
+ j++;
+ assert(j < mem->mapped_ptr_sz/2);
+ }
+ }
+ mem->mapped_ptr_sz = mem->mapped_ptr_sz/2;
+ CL_FREE(mem->mapped_ptr);
+ mem->mapped_ptr = new_ptr;
+ }
+ }
+ CL_MUTEX_UNLOCK(&mem->lock);
+
+RETURN:
+ if (ptr_invalid)
+ return CL_INVALID_VALUE;
+
+ return CL_SUCCESS;
+}
+
+static cl_int cl_mem_record_mapped(cl_mem mem, void *ptr, size_t offset, cl_map_flags map_flags,
+ size_t size, const size_t *origin, const size_t *region)
{
cl_int slot = -1;
+ int i;
CL_MUTEX_LOCK(&mem->lock);
- /* Record the mapped address. */
if (!mem->mapped_ptr_sz) {
mem->mapped_ptr_sz = 16;
- mem->mapped_ptr = (cl_mapped_ptr *)CL_MALLOC(sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
+ mem->mapped_ptr = CL_MALLOC(sizeof(_cl_mapped_ptr_info) * mem->mapped_ptr_sz);
if (!mem->mapped_ptr) {
CL_MUTEX_UNLOCK(&mem->lock);
return CL_OUT_OF_HOST_MEMORY;
}
- memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(_cl_mapped_ptr_info));
slot = 0;
} else {
- int i = 0;
- for (; i < mem->mapped_ptr_sz; i++) {
+ /* Someone may already add a slot when we do the map, may conflict. check it again*/
+ for (i = 0; i < mem->mapped_ptr_sz; i++) {
+ if (offset + size <= mem->mapped_ptr[i].offset ||
+ offset >= mem->mapped_ptr[i].offset + mem->mapped_ptr[i].size)
+ continue; // No overlap, continue.
+
+ /* overlap, check the flags. Write is mutual exclusive. */
+ if (map_flags != CL_MAP_READ || mem->mapped_ptr[i].flags != CL_MAP_READ) {
+ CL_MUTEX_UNLOCK(&mem->lock);
+ return CL_INVALID_OPERATION;
+ }
+ }
+
+ /* Insert a new one. */
+ for (i = 0; i < mem->mapped_ptr_sz; i++) {
if (mem->mapped_ptr[i].ptr == NULL) {
slot = i;
break;
}
}
- if (i == mem->mapped_ptr_sz) {
- cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)CL_MALLOC(sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
+
+ if (i == mem->mapped_ptr_sz) { /* Expand the list double. */
+ cl_mapped_ptr_info new_ptr = CL_MALLOC(sizeof(_cl_mapped_ptr_info) * mem->mapped_ptr_sz * 2);
if (!new_ptr) {
CL_MUTEX_UNLOCK(&mem->lock);
return CL_OUT_OF_HOST_MEMORY;
}
- memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
- memcpy(new_ptr, mem->mapped_ptr,
- mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(_cl_mapped_ptr_info));
+ memcpy(new_ptr, mem->mapped_ptr, mem->mapped_ptr_sz * sizeof(_cl_mapped_ptr_info));
slot = mem->mapped_ptr_sz;
mem->mapped_ptr_sz *= 2;
CL_FREE(mem->mapped_ptr);
mem->mapped_ptr = new_ptr;
}
}
+
assert(slot != -1);
mem->mapped_ptr[slot].ptr = ptr;
mem->mapped_ptr[slot].offset = offset;
mem->mapped_ptr[slot].size = size;
+ mem->mapped_ptr[slot].flags = map_flags;
if(origin) {
assert(region);
mem->mapped_ptr[slot].origin[0] = origin[0];
@@ -530,6 +626,9 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo
void *mem_ptr = NULL;
cl_int index;
+ err = cl_mem_find_mapped(buffer, offset, map_flags, size);
+ if (err != CL_SUCCESS)
+ goto error;
if (blocking_map) {
/* According to spec, when in block mode, we need to ensure all the
@@ -545,51 +644,21 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo
goto error;
}
- err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr, blocking_map, map_flags,
- offset, size, num_events, event_list, event);
- if (err != CL_SUCCESS)
- goto error;
-
if (event_ret) {
event = cl_create_event(buffer->ctx, queue, CL_FALSE, num_events, event_list, &err);
if (event == NULL)
goto error;
}
- cl_enqueue_set_work_item_event(it, event);
-
-#if 0
- if (blocking_map) {
- /* According to spec, when in block mode, we need to ensure all the
- commands in queue are flushed. */
- err = cl_enqueue_wait_for_flush(queue);
- if (err != CL_SUCCESS)
- goto error;
-
- if (event_list) { // Need to wait for events.
- if (cl_enqueue_wait_for_events(event_list, num_events) == false) {
- /* canceled or some errors. */
- return CL_MAP_FAILURE;
- }
- }
- }
-
-
- it = cl_enqueue_create_work_item(queue, num_events, event_list, event);
- if (it == NULL) {
- err = CL_OUT_OF_HOST_MEMORY;
+ err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr, blocking_map, map_flags,
+ offset, size, num_events, event_list, event);
+ if (err != CL_SUCCESS)
goto error;
- }
-
-
-
-
-
-#endif
+ cl_enqueue_set_work_item_event(it, event);
/* We need to store the map info for unmap and debug. */
- err = cl_record_mapped(buffer, mem_ptr, offset, size, NULL, NULL);
+ err = cl_mem_record_mapped(buffer, mem_ptr, offset, map_flags, size, NULL, NULL);
if (err != CL_SUCCESS) {
// Unmap and return error.
queue->device->driver->enqueue_unmap_mem(queue, buffer, mem_ptr, 0, NULL, NULL);
@@ -625,8 +694,7 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void *
cl_event event = NULL;
cl_int err = CL_SUCCESS;
cl_int index;
- cl_bool ptr_invalid = CL_FALSE;
- int i;
+ _cl_mapped_ptr_info ptr_info;
if (event_ret) {
event = cl_create_event(memobj->ctx, queue, CL_FALSE, num_events, event_list, &err);
@@ -636,51 +704,18 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void *
/* Check the pointer valid. */
INVALID_VALUE_IF(!mapped_ptr);
- CL_MUTEX_LOCK(&memobj->lock);
- assert(memobj->mapped_ptr_sz >= memobj->map_ref);
- for (i = 0; i < memobj->mapped_ptr_sz; i++) {
- if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
- break;
- }
- }
- if (i == memobj->mapped_ptr_sz)
- ptr_invalid = CL_TRUE;
- CL_MUTEX_UNLOCK(&memobj->lock);
+
/* can not find a mapped address? */
- INVALID_VALUE_IF(ptr_invalid == CL_TRUE);
+ err = cl_mem_delete_mapped(memobj, mapped_ptr, &ptr_info);
+ if (err != CL_SUCCESS)
+ goto error;
err = queue->device->driver->enqueue_unmap_mem(queue, memobj, mapped_ptr,
num_events, event_list, event);
if (err != CL_SUCCESS)
goto error;
- /* shrink the mapped slot. */
CL_MUTEX_LOCK(&memobj->lock);
- memobj->mapped_ptr[i].ptr = NULL;
- memobj->map_ref--;
- if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
- int j = 0;
- cl_mapped_ptr *new_ptr =
- (cl_mapped_ptr *)CL_MALLOC(sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
- if (!new_ptr) {
- /* Just do nothing. */
- CL_MUTEX_UNLOCK(&memobj->lock);
- goto error;
- }
- memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
-
- for (i = 0; i < memobj->mapped_ptr_sz; i++) {
- if (memobj->mapped_ptr[i].ptr) {
- new_ptr[j] = memobj->mapped_ptr[i];
- j++;
- assert(j < memobj->mapped_ptr_sz/2);
- }
- }
- memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
- CL_FREE(memobj->mapped_ptr);
- memobj->mapped_ptr = new_ptr;
- }
-
index = cl_context_get_device_index(queue->ctx, queue->device);
memobj->enqueued_devices[index] = CL_TRUE;
CL_MUTEX_UNLOCK(&memobj->lock);