diff options
author | Junyan He <junyan.he@intel.com> | 2016-04-27 16:00:48 +0800 |
---|---|---|
committer | Junyan He <junyan.he@intel.com> | 2016-04-27 16:00:48 +0800 |
commit | 8c7d57dad366c0ccedb2c1ace1bc61836b99b2bf (patch) | |
tree | a1162f4f784c8ce301290f1e8d02b7717b66d951 | |
parent | 761edcf7c7a65ca2d36820da36804204291479af (diff) |
add info
-rw-r--r-- | include/cl_mem.h | 8 | ||||
-rw-r--r-- | libclapi/cl_kernel.c | 12 | ||||
-rw-r--r-- | libclapi/cl_mem.c | 207 |
3 files changed, 132 insertions, 95 deletions
diff --git a/include/cl_mem.h b/include/cl_mem.h index fa687f1c..3cc985dc 100644 --- a/include/cl_mem.h +++ b/include/cl_mem.h @@ -22,13 +22,15 @@ #include <assert.h> #include "CL/cl.h" -typedef struct _cl_mapped_ptr { +typedef struct _cl_mapped_ptr_info { void* ptr; size_t offset; size_t size; + cl_map_flags flags; size_t origin[3]; /* mapped origin */ size_t region[3]; /* mapped region */ -} cl_mapped_ptr; +} _cl_mapped_ptr_info; +typedef _cl_mapped_ptr_info* cl_mapped_ptr_info; typedef struct _cl_mem_dstr_cb { struct _cl_mem_dstr_cb * next; @@ -45,7 +47,7 @@ typedef struct _cl_mem { cl_context ctx; /* Context it belongs to */ cl_mem_flags flags; /* Flags specified at the creation time */ void* host_ptr; /* Pointer of the host mem specified by CL_MEM_USE_HOST_PTR */ - cl_mapped_ptr* mapped_ptr; /* Store the mapped addresses and size by caller. */ + cl_mapped_ptr_info mapped_ptr; /* Store the mapped addresses and size by caller. */ int mapped_ptr_sz; /* The array size of mapped_ptr. */ int map_ref; /* The mapped count. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c index 351e06f8..e4b2a0bd 100644 --- a/libclapi/cl_kernel.c +++ b/libclapi/cl_kernel.c @@ -767,10 +767,10 @@ static cl_int cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const voi return CL_SUCCESS; } -static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim, - const size_t *global_wk_off, const size_t *global_wk_sz, - const size_t *local_wk_sz, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event_ret) +static cl_int cl_enqueue_ND_range(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim, + const size_t *global_wk_off, const size_t *global_wk_sz, + const size_t *local_wk_sz, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event_ret) { cl_event event = NULL; cl_command_queue_work_item it = NULL; @@ -1038,8 +1038,8 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, } /* Do device specific checks are enqueue the kernel */ - err = cl_command_queue_ND_range(command_queue, kernel, work_dim, fixed_global_off, fixed_global_sz, - fixed_local_sz, num_events_in_wait_list, event_wait_list, event); + err = cl_enqueue_ND_range(command_queue, kernel, work_dim, fixed_global_off, fixed_global_sz, + fixed_local_sz, num_events_in_wait_list, event_wait_list, event); if(err != CL_SUCCESS) goto error; diff --git a/libclapi/cl_mem.c b/libclapi/cl_mem.c index 441a7c3c..ad46a6f8 100644 --- a/libclapi/cl_mem.c +++ b/libclapi/cl_mem.c @@ -454,50 +454,146 @@ error: goto exit; } -static cl_int cl_record_mapped(cl_mem mem, void *ptr, size_t offset, - size_t size, const size_t *origin, const size_t *region) +static cl_int cl_mem_find_mapped(cl_mem mem, size_t offset, cl_map_flags map_flags, size_t size) +{ + int i; + + CL_MUTEX_LOCK(&mem->lock); + if (!mem->mapped_ptr_sz) { + CL_MUTEX_UNLOCK(&mem->lock); + return CL_SUCCESS; + } + + for (i = 0; i < mem->mapped_ptr_sz; i++) { + if (offset + size <= mem->mapped_ptr[i].offset || + offset >= mem->mapped_ptr[i].offset + mem->mapped_ptr[i].size) + continue; // No overlap, continue. + + /* overlap, check the flags. Write is mutual exclusive. */ + if (map_flags != CL_MAP_READ || mem->mapped_ptr[i].flags != CL_MAP_READ) { + CL_MUTEX_UNLOCK(&mem->lock); + return CL_INVALID_OPERATION; + } + } + + CL_MUTEX_UNLOCK(&mem->lock); + return CL_SUCCESS; +} + +static cl_int cl_mem_delete_mapped(cl_mem mem, void *mapped_ptr, cl_mapped_ptr_info info) +{ + int i, j; + cl_bool ptr_invalid = CL_FALSE; + + CL_MUTEX_LOCK(&mem->lock); + assert(mem->mapped_ptr_sz >= mem->map_ref); + + for (i = 0; i < mem->mapped_ptr_sz; i++) { + if (mem->mapped_ptr[i].ptr == mapped_ptr) { + /* We may find several slots have same mapped_ptr, but we will be sure that + the overlapped map should be read only map, and so just find the first + one and unmap it. */ + *info = mem->mapped_ptr[i]; // copy it. + break; + } + } + + if (i == mem->mapped_ptr_sz) { + ptr_invalid = CL_TRUE; + } else { + /* So some shrink thing.*/ + mem->mapped_ptr[i].ptr = NULL; + mem->map_ref--; + if (mem->mapped_ptr_sz/2 > mem->map_ref) { + j = 0; + cl_mapped_ptr_info new_ptr = CL_MALLOC(sizeof(_cl_mapped_ptr_info) * (mem->mapped_ptr_sz/2)); + if (!new_ptr) { + /* Just do nothing. */ + CL_MUTEX_UNLOCK(&mem->lock); + goto RETURN; + } + memset(new_ptr, 0, (mem->mapped_ptr_sz/2) * sizeof(_cl_mapped_ptr_info)); + + for (i = 0; i < mem->mapped_ptr_sz; i++) { + if (mem->mapped_ptr[i].ptr) { + new_ptr[j] = mem->mapped_ptr[i]; + j++; + assert(j < mem->mapped_ptr_sz/2); + } + } + mem->mapped_ptr_sz = mem->mapped_ptr_sz/2; + CL_FREE(mem->mapped_ptr); + mem->mapped_ptr = new_ptr; + } + } + CL_MUTEX_UNLOCK(&mem->lock); + +RETURN: + if (ptr_invalid) + return CL_INVALID_VALUE; + + return CL_SUCCESS; +} + +static cl_int cl_mem_record_mapped(cl_mem mem, void *ptr, size_t offset, cl_map_flags map_flags, + size_t size, const size_t *origin, const size_t *region) { cl_int slot = -1; + int i; CL_MUTEX_LOCK(&mem->lock); - /* Record the mapped address. */ if (!mem->mapped_ptr_sz) { mem->mapped_ptr_sz = 16; - mem->mapped_ptr = (cl_mapped_ptr *)CL_MALLOC(sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz); + mem->mapped_ptr = CL_MALLOC(sizeof(_cl_mapped_ptr_info) * mem->mapped_ptr_sz); if (!mem->mapped_ptr) { CL_MUTEX_UNLOCK(&mem->lock); return CL_OUT_OF_HOST_MEMORY; } - memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(_cl_mapped_ptr_info)); slot = 0; } else { - int i = 0; - for (; i < mem->mapped_ptr_sz; i++) { + /* Someone may already add a slot when we do the map, may conflict. check it again*/ + for (i = 0; i < mem->mapped_ptr_sz; i++) { + if (offset + size <= mem->mapped_ptr[i].offset || + offset >= mem->mapped_ptr[i].offset + mem->mapped_ptr[i].size) + continue; // No overlap, continue. + + /* overlap, check the flags. Write is mutual exclusive. */ + if (map_flags != CL_MAP_READ || mem->mapped_ptr[i].flags != CL_MAP_READ) { + CL_MUTEX_UNLOCK(&mem->lock); + return CL_INVALID_OPERATION; + } + } + + /* Insert a new one. */ + for (i = 0; i < mem->mapped_ptr_sz; i++) { if (mem->mapped_ptr[i].ptr == NULL) { slot = i; break; } } - if (i == mem->mapped_ptr_sz) { - cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)CL_MALLOC(sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2); + + if (i == mem->mapped_ptr_sz) { /* Expand the list double. */ + cl_mapped_ptr_info new_ptr = CL_MALLOC(sizeof(_cl_mapped_ptr_info) * mem->mapped_ptr_sz * 2); if (!new_ptr) { CL_MUTEX_UNLOCK(&mem->lock); return CL_OUT_OF_HOST_MEMORY; } - memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); - memcpy(new_ptr, mem->mapped_ptr, - mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(_cl_mapped_ptr_info)); + memcpy(new_ptr, mem->mapped_ptr, mem->mapped_ptr_sz * sizeof(_cl_mapped_ptr_info)); slot = mem->mapped_ptr_sz; mem->mapped_ptr_sz *= 2; CL_FREE(mem->mapped_ptr); mem->mapped_ptr = new_ptr; } } + assert(slot != -1); mem->mapped_ptr[slot].ptr = ptr; mem->mapped_ptr[slot].offset = offset; mem->mapped_ptr[slot].size = size; + mem->mapped_ptr[slot].flags = map_flags; if(origin) { assert(region); mem->mapped_ptr[slot].origin[0] = origin[0]; @@ -530,6 +626,9 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo void *mem_ptr = NULL; cl_int index; + err = cl_mem_find_mapped(buffer, offset, map_flags, size); + if (err != CL_SUCCESS) + goto error; if (blocking_map) { /* According to spec, when in block mode, we need to ensure all the @@ -545,51 +644,21 @@ static void* cl_enqueue_map_buffer(cl_command_queue queue, cl_mem buffer, cl_boo goto error; } - err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr, blocking_map, map_flags, - offset, size, num_events, event_list, event); - if (err != CL_SUCCESS) - goto error; - if (event_ret) { event = cl_create_event(buffer->ctx, queue, CL_FALSE, num_events, event_list, &err); if (event == NULL) goto error; } - cl_enqueue_set_work_item_event(it, event); - -#if 0 - if (blocking_map) { - /* According to spec, when in block mode, we need to ensure all the - commands in queue are flushed. */ - err = cl_enqueue_wait_for_flush(queue); - if (err != CL_SUCCESS) - goto error; - - if (event_list) { // Need to wait for events. - if (cl_enqueue_wait_for_events(event_list, num_events) == false) { - /* canceled or some errors. */ - return CL_MAP_FAILURE; - } - } - } - - - it = cl_enqueue_create_work_item(queue, num_events, event_list, event); - if (it == NULL) { - err = CL_OUT_OF_HOST_MEMORY; + err = queue->device->driver->enqueue_map_buffer(queue, buffer, &mem_ptr, blocking_map, map_flags, + offset, size, num_events, event_list, event); + if (err != CL_SUCCESS) goto error; - } - - - - - -#endif + cl_enqueue_set_work_item_event(it, event); /* We need to store the map info for unmap and debug. */ - err = cl_record_mapped(buffer, mem_ptr, offset, size, NULL, NULL); + err = cl_mem_record_mapped(buffer, mem_ptr, offset, map_flags, size, NULL, NULL); if (err != CL_SUCCESS) { // Unmap and return error. queue->device->driver->enqueue_unmap_mem(queue, buffer, mem_ptr, 0, NULL, NULL); @@ -625,8 +694,7 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void * cl_event event = NULL; cl_int err = CL_SUCCESS; cl_int index; - cl_bool ptr_invalid = CL_FALSE; - int i; + _cl_mapped_ptr_info ptr_info; if (event_ret) { event = cl_create_event(memobj->ctx, queue, CL_FALSE, num_events, event_list, &err); @@ -636,51 +704,18 @@ static cl_int cl_enqueue_unmap_mem(cl_command_queue queue, cl_mem memobj, void * /* Check the pointer valid. */ INVALID_VALUE_IF(!mapped_ptr); - CL_MUTEX_LOCK(&memobj->lock); - assert(memobj->mapped_ptr_sz >= memobj->map_ref); - for (i = 0; i < memobj->mapped_ptr_sz; i++) { - if (memobj->mapped_ptr[i].ptr == mapped_ptr) { - break; - } - } - if (i == memobj->mapped_ptr_sz) - ptr_invalid = CL_TRUE; - CL_MUTEX_UNLOCK(&memobj->lock); + /* can not find a mapped address? */ - INVALID_VALUE_IF(ptr_invalid == CL_TRUE); + err = cl_mem_delete_mapped(memobj, mapped_ptr, &ptr_info); + if (err != CL_SUCCESS) + goto error; err = queue->device->driver->enqueue_unmap_mem(queue, memobj, mapped_ptr, num_events, event_list, event); if (err != CL_SUCCESS) goto error; - /* shrink the mapped slot. */ CL_MUTEX_LOCK(&memobj->lock); - memobj->mapped_ptr[i].ptr = NULL; - memobj->map_ref--; - if (memobj->mapped_ptr_sz/2 > memobj->map_ref) { - int j = 0; - cl_mapped_ptr *new_ptr = - (cl_mapped_ptr *)CL_MALLOC(sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2)); - if (!new_ptr) { - /* Just do nothing. */ - CL_MUTEX_UNLOCK(&memobj->lock); - goto error; - } - memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr)); - - for (i = 0; i < memobj->mapped_ptr_sz; i++) { - if (memobj->mapped_ptr[i].ptr) { - new_ptr[j] = memobj->mapped_ptr[i]; - j++; - assert(j < memobj->mapped_ptr_sz/2); - } - } - memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2; - CL_FREE(memobj->mapped_ptr); - memobj->mapped_ptr = new_ptr; - } - index = cl_context_get_device_index(queue->ctx, queue->device); memobj->enqueued_devices[index] = CL_TRUE; CL_MUTEX_UNLOCK(&memobj->lock); |