modify map

author: Junyan He <junyan.he@intel.com> 2016-04-28 19:05:16 +0800
committer: Junyan He <junyan.he@intel.com> 2016-04-28 19:05:16 +0800
commit: 69ea32876e7a8c6689cab56331ecb94600c39508 (patch)
tree: e1f04fabf99d48bd8c52f26d7c1c03620a697f29
parent: 40658a626bb08eb122c466869fa02031d120241c (diff)
3 files changed, 70 insertions, 51 deletions
diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp
index 39607c16..feaea18f 100644
--- a/backend/src/driver/cl_gen_driver.hpp
+++ b/backend/src/driver/cl_gen_driver.hpp
@@ -254,22 +254,22 @@ struct GenGPUCommandQueue {
 struct GenGPUMem {
   drm_intel_bo *bo;
   GenGPUContext* gpuCtx;
-  bool bindUserPtr;
-  void* alignedHostPtr;
+  void* bindedHostPtr;
+  void* alignedHostPtr; // for bind user ptr
   void* mappedAddr;
   bool writeMap;
   size_t realSize; // Maybe diff from the size in buffer, because align, etc.
   cl_gpgpu_tiling tiling;
   bool mappedGtt;
-  volatile int mapRef;
+  int mapRef;
   pthread_mutex_t mutex;
-  GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindUserPtr(false), alignedHostPtr(NULL),
+  GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindedHostPtr(NULL), alignedHostPtr(NULL),
     mappedAddr(NULL), writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) {
     pthread_mutex_init(&this->mutex, NULL);
   }
   ~GenGPUMem(void);
-  bool genAllocMemBo(cl_mem mem);
-  void* genMapBo(cl_mem mem, bool write);
+  bool genAllocBo(cl_mem mem);
+  void* genMapBo(bool write);
   void genUnMapBo(cl_mem mem);
 };
 
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp
index 078a6331..9b41979b 100644
--- a/backend/src/driver/cl_gen_kernel.cpp
+++ b/backend/src/driver/cl_gen_kernel.cpp
@@ -607,7 +607,7 @@ static cl_int genAllocateArgBufs(cl_kernel kernel, Kernel* ker, cl_command_queue
     GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device));
     GBE_ASSERT(genMem != NULL);
 
-    if (genMem->genAllocMemBo(mem) == false) {
+    if (genMem->genAllocBo(mem) == false) {
       return CL_MEM_OBJECT_ALLOCATION_FAILURE;
     }
   }
diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp
index f69a9f5d..3d8cd219 100644
--- a/backend/src/driver/cl_gen_mem.cpp
+++ b/backend/src/driver/cl_gen_mem.cpp
@@ -86,7 +86,7 @@ void GenGPUMem::genUnMapBo(cl_mem mem)
   this->writeMap = false;
 }
 
-void* GenGPUMem::genMapBo(cl_mem mem, bool write)
+void* GenGPUMem::genMapBo(bool write)
 {
   /* From here, we want to lock. */
   GenGPULockerHelper mutexAlloc(&this->mutex);
@@ -95,22 +95,25 @@ void* GenGPUMem::genMapBo(cl_mem mem, bool write)
     return NULL; // Not the same operation.
 
   if (this->mappedAddr != NULL) {
+    GBE_ASSERT(this->mapRef > 0);
     this->mapRef++;
     return this->mappedAddr;
   }
 
   GBE_ASSERT(this->mapRef == 0);
-  if (IS_IMAGE(mem) && this->tiling != GPGPU_NO_TILE) {
-    /* If we are image from User PTR, the tiling must be GPGPU_NO_TILE. */
+  if (this->tiling != GPGPU_NO_TILE) {
+    /* If we are in tiling mode, we must be image and no User PTR. */
+    GBE_ASSERT(this->bindedHostPtr == NULL);
+
     drm_intel_gem_bo_map_gtt(this->bo);
     GBE_ASSERT(this->bo->virt);
     this->mappedGtt = 1;
     this->mappedAddr = this->bo->virt;
   } else {
-    if (this->alignedHostPtr) {
-      //not created from userptr, the offset should not be always zero.
+    if (this->bindedHostPtr) {
+      // created from userptr, esay, set the mapped addr to host ptr.
       drm_intel_bo_wait_rendering(this->bo);
-      this->mappedAddr = mem->host_ptr;
+      this->mappedAddr = this->bindedHostPtr;
     } else {
       this->writeMap = write;
       drm_intel_bo_map(this->bo, write);
@@ -123,56 +126,67 @@ void* GenGPUMem::genMapBo(cl_mem mem, bool write)
   return this->mappedAddr;
 }
 
-bool GenGPUMem::genAllocMemBo(cl_mem mem)
+bool GenGPUMem::genAllocBo(cl_mem mem)
 {
   /* From here, we want to lock. */
   GenGPULockerHelper mutexAlloc(&this->mutex);
+  /* Never alloc bo for subbuffer, subbuffer need to use parent's GenGPUMem. */
+  GBE_ASSERT(!(cl_mem_to_buffer(mem) && cl_mem_to_buffer(mem)->parent != NULL));
+  GBE_ASSERT(mem->size > 0);
+  size_t alignment = 64;
 
   if (this->bo != NULL) {
     return true;
   }
 
-  if (!IS_IMAGE(mem)) {
-    if (mem->flags & CL_MEM_USE_HOST_PTR) {
-#ifdef HAS_USERPTR
-      if (queue->device->host_unified_memory) { // Need to alloc host accessible mem.
-        int page_size = getpagesize();
-        int cacheline_size = queue->device->global_mem_cache_line_size;
-
-        if (ALIGN((unsigned long)(mem->host_ptr), cacheline_size) == (unsigned long)(mem->host_ptr)) {
-          this->alignedHostPtr = (void*)(((unsigned long)(mem->host_ptr)) & (~(page_size - 1)));
+  if (this->tiling != GPGPU_NO_TILE)
+    alignment = 4096;
+
+  if (mem->flags & (CL_MEM_USE_HOST_PTR|CL_MEM_ALLOC_HOST_PTR)) {
+    GBE_ASSERT(mem->host_ptr);
+#ifdef HAS_USERPTR // Try to bind user ptr
+    if (this->tiling == GPGPU_NO_TILE && mem->enqueued_device->host_unified_memory) {
+      /* userptr not support tiling */
+      int page_size = getpagesize();
+      int cacheline_size = mem->enqueued_device->global_mem_cache_line_size;
+      if (ALIGN((unsigned long)(mem->host_ptr), cacheline_size) == (unsigned long)(mem->host_ptr)) {
+        /* Need to be cache line aligned. */
+        this->alignedHostPtr = (void*)(((unsigned long)(mem->host_ptr)) & (~(page_size - 1)));
+        this->bo = genBufferAllocUserptr(gpuCtx->bufmgr, "CL userptr memory object",
+                                         this->alignedHostPtr, mem->size, 0);
+        if (this->bo) {
           this->realSize =
             ALIGN(((unsigned long)mem->host_ptr - (unsigned long)alignedHostPtr + mem->size), page_size);
-          this->bo = genBufferAllocUserptr(gpuCtx->bufmgr, "CL userptr memory object",
-                                           this->alignedHostPtr, this->realSize, 0);
+          this->bindedHostPtr = mem->host_ptr;
+        } else {
+          this->alignedHostPtr = this->bindedHostPtr = NULL;
         }
       }
-
-      /* We can not create host accessible mem for GPU, we need to fallback to a fake
-         CL_MEM_USE_HOST_PTR policy. We just keep user ptr, every time, when MAP,
-         we copy the data to GPU and when UNMAP, copy back the data to host. */
-      if (this->bo == NULL)
-        this->alignedHostPtr = NULL;
-#endif
     }
+#endif
+  }
 
-    if (this->bo == NULL) {
-      this->bo = drm_intel_bo_alloc(this->gpuCtx->bufmgr, "CL memory object", this->realSize, 64);
-      if (UNLIKELY(this->bo == NULL)) {
-        return false;
-      }
+  if (this->bo == NULL) {
+    /* HSW: Byte scattered Read/Write has limitation that
+       the buffer size must be a multiple of 4 bytes. */
+    this->realSize = ALIGN(mem->size, 4);
+    this->bo = drm_intel_bo_alloc(this->gpuCtx->bufmgr, "CL memory object", this->realSize, alignment);
+    this->bindedHostPtr = NULL;
+    this->alignedHostPtr = NULL;
+    if (UNLIKELY(this->bo == NULL)) {
+      return false;
     }
   }
 
+  GBE_ASSERT(this->bo != NULL);
+
+  /* bind the user ptr, no need to copy the data. */
+  if (this->bindedHostPtr)
+    return true;
+
   /* Copy the data if required */
-  if ((mem->flags & CL_MEM_COPY_HOST_PTR) ||
-      (mem->flags & CL_MEM_USE_HOST_PTR && this->alignedHostPtr == NULL)) {
-    if (IS_IMAGE(mem)) {
-      drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr);
-    } else {
-      cl_mem_buffer buffer = cl_mem_to_buffer(mem);
-      drm_intel_bo_subdata(this->bo, buffer->sub_offset, mem->size, mem->host_ptr);
-    }
+  if (mem->flags & (CL_MEM_COPY_HOST_PTR|CL_MEM_USE_HOST_PTR)) {
+    drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr);
   }
 
   return true;
@@ -343,10 +357,7 @@ cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device)
   if (!buffer->parent) {
     GenGPUContext* gpuCtx = reinterpret_cast<GenGPUContext*>(getGenContextPrivate(mem->ctx, device));
     GenGPUMem* genMem = GBE_NEW(GenGPUMem, gpuCtx);
-
-    /* HSW: Byte scattered Read/Write has limitation that
-       the buffer size must be a multiple of 4 bytes. */
-    genMem->realSize = ALIGN(mem->size, 4);
+    genMem->realSize = mem->size;
     setGenMemPrivate(mem, device, genMem);
   } else {
     /* A sub buffer, just ref the parent's GenGPUMem. */
@@ -379,10 +390,13 @@ cl_int GenReleaseMem(cl_mem mem, const cl_device_id device)
 static void* genDoMapBuffer(GenGPUMem* genMem, cl_mem mem, cl_map_flags flags, size_t offset, size_t size)
 {
   cl_mem_buffer buffer = cl_mem_to_buffer(mem);
+
+  GBE_ASSERT(buffer); // Must be buffer;
+
   char* retAddr = NULL;
   bool mapWrite = flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION);
 
-  void* mappedAddr = genMem->genMapBo(mem, mapWrite);
+  void* mappedAddr = genMem->genMapBo(mapWrite);
   if (mappedAddr == NULL) {
     return NULL;
   }
@@ -458,7 +472,7 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr,
   if (genMem == NULL)
     return CL_INVALID_VALUE;
 
-  if (genMem->genAllocMemBo(mem) == false) {
+  if (genMem->genAllocBo(mem) == false) {
     return CL_MEM_OBJECT_ALLOCATION_FAILURE;
   }
 
@@ -475,6 +489,11 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr,
     return CL_SUCCESS;
   }
 
+
+
+
+
+
   return CL_MAP_FAILURE;
 }
author	Junyan He <junyan.he@intel.com>	2016-04-28 19:05:16 +0800
committer	Junyan He <junyan.he@intel.com>	2016-04-28 19:05:16 +0800
commit	69ea32876e7a8c6689cab56331ecb94600c39508 (patch)
tree	e1f04fabf99d48bd8c52f26d7c1c03620a697f29
parent	40658a626bb08eb122c466869fa02031d120241c (diff)