summaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-27 18:04:15 +0800
committerJunyan He <junyan.he@intel.com>2016-04-27 18:04:15 +0800
commit6eaaaaeed4cee6c5d9e5e345162864c79a1de751 (patch)
tree194f73ecde06ae158d19c10052d570b5e3a61e9d /backend
parent8c7d57dad366c0ccedb2c1ace1bc61836b99b2bf (diff)
add info
Diffstat (limited to 'backend')
-rw-r--r--backend/src/driver/cl_gen_driver.h3
-rw-r--r--backend/src/driver/cl_gen_driver.hpp5
-rw-r--r--backend/src/driver/cl_gen_mem.cpp83
3 files changed, 12 insertions, 79 deletions
diff --git a/backend/src/driver/cl_gen_driver.h b/backend/src/driver/cl_gen_driver.h
index 5e475b17..82c344e3 100644
--- a/backend/src/driver/cl_gen_driver.h
+++ b/backend/src/driver/cl_gen_driver.h
@@ -57,8 +57,7 @@ cl_int GenSupportImageFmt(const cl_device_id device, cl_mem_object_type image_ty
cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device);
cl_int GenReleaseMem(cl_mem mem, const cl_device_id device);
cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block,
- cl_map_flags flags, size_t offset, size_t size, cl_uint num_events,
- const cl_event *event_list, cl_event event_ret);
+ cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item);
cl_int GenEnqueueUnmapMem(cl_command_queue queue, cl_mem mem, void *mapped_ptr,
cl_uint num_events, const cl_event *event_list, cl_event event_ret);
cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim,
diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp
index 6f880174..39607c16 100644
--- a/backend/src/driver/cl_gen_driver.hpp
+++ b/backend/src/driver/cl_gen_driver.hpp
@@ -254,6 +254,7 @@ struct GenGPUCommandQueue {
struct GenGPUMem {
drm_intel_bo *bo;
GenGPUContext* gpuCtx;
+ bool bindUserPtr;
void* alignedHostPtr;
void* mappedAddr;
bool writeMap;
@@ -262,8 +263,8 @@ struct GenGPUMem {
bool mappedGtt;
volatile int mapRef;
pthread_mutex_t mutex;
- GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), alignedHostPtr(NULL), mappedAddr(NULL),
- writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) {
+ GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindUserPtr(false), alignedHostPtr(NULL),
+ mappedAddr(NULL), writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) {
pthread_mutex_init(&this->mutex, NULL);
}
~GenGPUMem(void);
diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp
index 49975000..f69a9f5d 100644
--- a/backend/src/driver/cl_gen_mem.cpp
+++ b/backend/src/driver/cl_gen_mem.cpp
@@ -442,13 +442,17 @@ bool GenGPUWorkItemMapBuf::submit(void)
extern "C"
cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, cl_bool block,
- cl_map_flags flags, size_t offset, size_t size, cl_uint num_events,
- const cl_event *event_list, cl_event event_ret)
+ cl_map_flags flags, size_t offset, size_t size, cl_command_queue_work_item item)
{
+ assert(item);
+
if (mem->type != CL_MEM_OBJECT_BUFFER) {
return CL_INVALID_VALUE;
}
+
GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device));
+ GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue);
+ GBE_ASSERT(gpuQueue);
void* retAddr = NULL;
if (genMem == NULL)
@@ -458,90 +462,19 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr,
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
}
- GenGPUCommandQueue* gpuQueue = (GenGPUCommandQueue*)getGenCommandQueuePrivate(queue);
- GBE_ASSERT(gpuQueue);
-
- if (block) {
- /* According to spec, when in block mode, we need to ensure all the
- commands in queue are flushed. */
- gpuQueue->waitForFlush();
-
- if (event_list) { // Need to wait for events.
- if (gpuQueue->waitForEvents(event_list, num_events) == false) {
- /* canceled or some errors. */
- return CL_MAP_FAILURE;
- }
- }
-
+ if (block || item->depend_events == NULL) { // We do not need to do it async, just map it.
retAddr = genDoMapBuffer(genMem, mem, flags, offset, size);
if (retAddr == NULL) {
- if (event_ret) {
- event_ret->set_status(event_ret, -1); // Set error for that event.
- }
- return CL_MAP_FAILURE;
- }
-
- if (ret_addr)
- *ret_addr = retAddr;
-
- if (event_ret) {
- event_ret->set_status(event_ret, CL_COMPLETE);
- }
- return CL_SUCCESS;
- } else if (event_list == NULL) {
- /* We do not have any events to wait, map it in sync mode. */
- retAddr = genDoMapBuffer(genMem, mem, flags, offset, size);
- if (retAddr == NULL) {
- if (event_ret) {
- event_ret->set_status(event_ret, -1); // Set error for that event.
- }
- return CL_MAP_FAILURE;
- }
-
- if (ret_addr)
- *ret_addr = retAddr;
-
- if (event_ret) {
- event_ret->set_status(event_ret, CL_COMPLETE);
- }
- return CL_SUCCESS;
- } else {
- GBE_ASSERT(num_events > 0);
- /* A awkward case, we have events to wait, so we can not do the map operation sync.
- But the spec require us to return the mapped address immediately. We can just
- allocate a host mem and return that address. After really mapped, we then
- copy back the data. */
- void* addr = NULL;
- if (!mem->flags & CL_MEM_USE_HOST_PTR) {
- addr = GBE_MALLOC(size);
- if (addr == NULL)
- return CL_OUT_OF_HOST_MEMORY;
- }
-
- GenGPUWorkItemMapBuf* mapItem = GBE_NEW(GenGPUWorkItemMapBuf, mem, genMem, addr, flags,
- offset, size, event_ret, event_list, num_events);
- if (mapItem == NULL) {
- if (addr)
- GBE_FREE(addr);
- return CL_OUT_OF_HOST_MEMORY;
- }
-
- /* Final,enqueue it in the queue worker thread. */
- if (gpuQueue->enqueueWorkItem(mapItem) == false) {
- GBE_DELETE(mapItem);
- if (addr)
- GBE_FREE(addr);
return CL_MAP_FAILURE;
}
if (ret_addr)
*ret_addr = retAddr;
+ item->status = CL_COMPLETE;
return CL_SUCCESS;
}
- // Never come to here.
- GBE_ASSERT(0);
return CL_MAP_FAILURE;
}