summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-28 19:05:16 +0800
committerJunyan He <junyan.he@intel.com>2016-04-28 19:05:16 +0800
commit69ea32876e7a8c6689cab56331ecb94600c39508 (patch)
treee1f04fabf99d48bd8c52f26d7c1c03620a697f29
parent40658a626bb08eb122c466869fa02031d120241c (diff)
modify map
-rw-r--r--backend/src/driver/cl_gen_driver.hpp12
-rw-r--r--backend/src/driver/cl_gen_kernel.cpp2
-rw-r--r--backend/src/driver/cl_gen_mem.cpp107
3 files changed, 70 insertions, 51 deletions
diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp
index 39607c16..feaea18f 100644
--- a/backend/src/driver/cl_gen_driver.hpp
+++ b/backend/src/driver/cl_gen_driver.hpp
@@ -254,22 +254,22 @@ struct GenGPUCommandQueue {
struct GenGPUMem {
drm_intel_bo *bo;
GenGPUContext* gpuCtx;
- bool bindUserPtr;
- void* alignedHostPtr;
+ void* bindedHostPtr;
+ void* alignedHostPtr; // for bind user ptr
void* mappedAddr;
bool writeMap;
size_t realSize; // Maybe diff from the size in buffer, because align, etc.
cl_gpgpu_tiling tiling;
bool mappedGtt;
- volatile int mapRef;
+ int mapRef;
pthread_mutex_t mutex;
- GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindUserPtr(false), alignedHostPtr(NULL),
+ GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindedHostPtr(NULL), alignedHostPtr(NULL),
mappedAddr(NULL), writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) {
pthread_mutex_init(&this->mutex, NULL);
}
~GenGPUMem(void);
- bool genAllocMemBo(cl_mem mem);
- void* genMapBo(cl_mem mem, bool write);
+ bool genAllocBo(cl_mem mem);
+ void* genMapBo(bool write);
void genUnMapBo(cl_mem mem);
};
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp
index 078a6331..9b41979b 100644
--- a/backend/src/driver/cl_gen_kernel.cpp
+++ b/backend/src/driver/cl_gen_kernel.cpp
@@ -607,7 +607,7 @@ static cl_int genAllocateArgBufs(cl_kernel kernel, Kernel* ker, cl_command_queue
GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device));
GBE_ASSERT(genMem != NULL);
- if (genMem->genAllocMemBo(mem) == false) {
+ if (genMem->genAllocBo(mem) == false) {
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
}
}
diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp
index f69a9f5d..3d8cd219 100644
--- a/backend/src/driver/cl_gen_mem.cpp
+++ b/backend/src/driver/cl_gen_mem.cpp
@@ -86,7 +86,7 @@ void GenGPUMem::genUnMapBo(cl_mem mem)
this->writeMap = false;
}
-void* GenGPUMem::genMapBo(cl_mem mem, bool write)
+void* GenGPUMem::genMapBo(bool write)
{
/* From here, we want to lock. */
GenGPULockerHelper mutexAlloc(&this->mutex);
@@ -95,22 +95,25 @@ void* GenGPUMem::genMapBo(cl_mem mem, bool write)
return NULL; // Not the same operation.
if (this->mappedAddr != NULL) {
+ GBE_ASSERT(this->mapRef > 0);
this->mapRef++;
return this->mappedAddr;
}
GBE_ASSERT(this->mapRef == 0);
- if (IS_IMAGE(mem) && this->tiling != GPGPU_NO_TILE) {
- /* If we are image from User PTR, the tiling must be GPGPU_NO_TILE. */
+ if (this->tiling != GPGPU_NO_TILE) {
+ /* If we are in tiling mode, we must be image and no User PTR. */
+ GBE_ASSERT(this->bindedHostPtr == NULL);
+
drm_intel_gem_bo_map_gtt(this->bo);
GBE_ASSERT(this->bo->virt);
this->mappedGtt = 1;
this->mappedAddr = this->bo->virt;
} else {
- if (this->alignedHostPtr) {
- //not created from userptr, the offset should not be always zero.
+ if (this->bindedHostPtr) {
+ // created from userptr, esay, set the mapped addr to host ptr.
drm_intel_bo_wait_rendering(this->bo);
- this->mappedAddr = mem->host_ptr;
+ this->mappedAddr = this->bindedHostPtr;
} else {
this->writeMap = write;
drm_intel_bo_map(this->bo, write);
@@ -123,56 +126,67 @@ void* GenGPUMem::genMapBo(cl_mem mem, bool write)
return this->mappedAddr;
}
-bool GenGPUMem::genAllocMemBo(cl_mem mem)
+bool GenGPUMem::genAllocBo(cl_mem mem)
{
/* From here, we want to lock. */
GenGPULockerHelper mutexAlloc(&this->mutex);
+ /* Never alloc bo for subbuffer, subbuffer need to use parent's GenGPUMem. */
+ GBE_ASSERT(!(cl_mem_to_buffer(mem) && cl_mem_to_buffer(mem)->parent != NULL));
+ GBE_ASSERT(mem->size > 0);
+ size_t alignment = 64;
if (this->bo != NULL) {
return true;
}
- if (!IS_IMAGE(mem)) {
- if (mem->flags & CL_MEM_USE_HOST_PTR) {
-#ifdef HAS_USERPTR
- if (queue->device->host_unified_memory) { // Need to alloc host accessible mem.
- int page_size = getpagesize();
- int cacheline_size = queue->device->global_mem_cache_line_size;
-
- if (ALIGN((unsigned long)(mem->host_ptr), cacheline_size) == (unsigned long)(mem->host_ptr)) {
- this->alignedHostPtr = (void*)(((unsigned long)(mem->host_ptr)) & (~(page_size - 1)));
+ if (this->tiling != GPGPU_NO_TILE)
+ alignment = 4096;
+
+ if (mem->flags & (CL_MEM_USE_HOST_PTR|CL_MEM_ALLOC_HOST_PTR)) {
+ GBE_ASSERT(mem->host_ptr);
+#ifdef HAS_USERPTR // Try to bind user ptr
+ if (this->tiling == GPGPU_NO_TILE && mem->enqueued_device->host_unified_memory) {
+ /* userptr not support tiling */
+ int page_size = getpagesize();
+ int cacheline_size = mem->enqueued_device->global_mem_cache_line_size;
+ if (ALIGN((unsigned long)(mem->host_ptr), cacheline_size) == (unsigned long)(mem->host_ptr)) {
+ /* Need to be cache line aligned. */
+ this->alignedHostPtr = (void*)(((unsigned long)(mem->host_ptr)) & (~(page_size - 1)));
+ this->bo = genBufferAllocUserptr(gpuCtx->bufmgr, "CL userptr memory object",
+ this->alignedHostPtr, mem->size, 0);
+ if (this->bo) {
this->realSize =
ALIGN(((unsigned long)mem->host_ptr - (unsigned long)alignedHostPtr + mem->size), page_size);
- this->bo = genBufferAllocUserptr(gpuCtx->bufmgr, "CL userptr memory object",
- this->alignedHostPtr, this->realSize, 0);
+ this->bindedHostPtr = mem->host_ptr;
+ } else {
+ this->alignedHostPtr = this->bindedHostPtr = NULL;
}
}
-
- /* We can not create host accessible mem for GPU, we need to fallback to a fake
- CL_MEM_USE_HOST_PTR policy. We just keep user ptr, every time, when MAP,
- we copy the data to GPU and when UNMAP, copy back the data to host. */
- if (this->bo == NULL)
- this->alignedHostPtr = NULL;
-#endif
}
+#endif
+ }
- if (this->bo == NULL) {
- this->bo = drm_intel_bo_alloc(this->gpuCtx->bufmgr, "CL memory object", this->realSize, 64);
- if (UNLIKELY(this->bo == NULL)) {
- return false;
- }
+ if (this->bo == NULL) {
+ /* HSW: Byte scattered Read/Write has limitation that
+ the buffer size must be a multiple of 4 bytes. */
+ this->realSize = ALIGN(mem->size, 4);
+ this->bo = drm_intel_bo_alloc(this->gpuCtx->bufmgr, "CL memory object", this->realSize, alignment);
+ this->bindedHostPtr = NULL;
+ this->alignedHostPtr = NULL;
+ if (UNLIKELY(this->bo == NULL)) {
+ return false;
}
}
+ GBE_ASSERT(this->bo != NULL);
+
+ /* bind the user ptr, no need to copy the data. */
+ if (this->bindedHostPtr)
+ return true;
+
/* Copy the data if required */
- if ((mem->flags & CL_MEM_COPY_HOST_PTR) ||
- (mem->flags & CL_MEM_USE_HOST_PTR && this->alignedHostPtr == NULL)) {
- if (IS_IMAGE(mem)) {
- drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr);
- } else {
- cl_mem_buffer buffer = cl_mem_to_buffer(mem);
- drm_intel_bo_subdata(this->bo, buffer->sub_offset, mem->size, mem->host_ptr);
- }
+ if (mem->flags & (CL_MEM_COPY_HOST_PTR|CL_MEM_USE_HOST_PTR)) {
+ drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr);
}
return true;
@@ -343,10 +357,7 @@ cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device)
if (!buffer->parent) {
GenGPUContext* gpuCtx = reinterpret_cast<GenGPUContext*>(getGenContextPrivate(mem->ctx, device));
GenGPUMem* genMem = GBE_NEW(GenGPUMem, gpuCtx);
-
- /* HSW: Byte scattered Read/Write has limitation that
- the buffer size must be a multiple of 4 bytes. */
- genMem->realSize = ALIGN(mem->size, 4);
+ genMem->realSize = mem->size;
setGenMemPrivate(mem, device, genMem);
} else {
/* A sub buffer, just ref the parent's GenGPUMem. */
@@ -379,10 +390,13 @@ cl_int GenReleaseMem(cl_mem mem, const cl_device_id device)
static void* genDoMapBuffer(GenGPUMem* genMem, cl_mem mem, cl_map_flags flags, size_t offset, size_t size)
{
cl_mem_buffer buffer = cl_mem_to_buffer(mem);
+
+ GBE_ASSERT(buffer); // Must be buffer;
+
char* retAddr = NULL;
bool mapWrite = flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION);
- void* mappedAddr = genMem->genMapBo(mem, mapWrite);
+ void* mappedAddr = genMem->genMapBo(mapWrite);
if (mappedAddr == NULL) {
return NULL;
}
@@ -458,7 +472,7 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr,
if (genMem == NULL)
return CL_INVALID_VALUE;
- if (genMem->genAllocMemBo(mem) == false) {
+ if (genMem->genAllocBo(mem) == false) {
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
}
@@ -475,6 +489,11 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr,
return CL_SUCCESS;
}
+
+
+
+
+
return CL_MAP_FAILURE;
}