diff options
author | Junyan He <junyan.he@intel.com> | 2016-04-28 19:05:16 +0800 |
---|---|---|
committer | Junyan He <junyan.he@intel.com> | 2016-04-28 19:05:16 +0800 |
commit | 69ea32876e7a8c6689cab56331ecb94600c39508 (patch) | |
tree | e1f04fabf99d48bd8c52f26d7c1c03620a697f29 | |
parent | 40658a626bb08eb122c466869fa02031d120241c (diff) |
modify map
-rw-r--r-- | backend/src/driver/cl_gen_driver.hpp | 12 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_kernel.cpp | 2 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_mem.cpp | 107 |
3 files changed, 70 insertions, 51 deletions
diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp index 39607c16..feaea18f 100644 --- a/backend/src/driver/cl_gen_driver.hpp +++ b/backend/src/driver/cl_gen_driver.hpp @@ -254,22 +254,22 @@ struct GenGPUCommandQueue { struct GenGPUMem { drm_intel_bo *bo; GenGPUContext* gpuCtx; - bool bindUserPtr; - void* alignedHostPtr; + void* bindedHostPtr; + void* alignedHostPtr; // for bind user ptr void* mappedAddr; bool writeMap; size_t realSize; // Maybe diff from the size in buffer, because align, etc. cl_gpgpu_tiling tiling; bool mappedGtt; - volatile int mapRef; + int mapRef; pthread_mutex_t mutex; - GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindUserPtr(false), alignedHostPtr(NULL), + GenGPUMem(GenGPUContext* ctx) : bo(NULL), gpuCtx(ctx), bindedHostPtr(NULL), alignedHostPtr(NULL), mappedAddr(NULL), writeMap(false), realSize(0), tiling(GPGPU_NO_TILE), mappedGtt(false), mapRef(0) { pthread_mutex_init(&this->mutex, NULL); } ~GenGPUMem(void); - bool genAllocMemBo(cl_mem mem); - void* genMapBo(cl_mem mem, bool write); + bool genAllocBo(cl_mem mem); + void* genMapBo(bool write); void genUnMapBo(cl_mem mem); }; diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp index 078a6331..9b41979b 100644 --- a/backend/src/driver/cl_gen_kernel.cpp +++ b/backend/src/driver/cl_gen_kernel.cpp @@ -607,7 +607,7 @@ static cl_int genAllocateArgBufs(cl_kernel kernel, Kernel* ker, cl_command_queue GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device)); GBE_ASSERT(genMem != NULL); - if (genMem->genAllocMemBo(mem) == false) { + if (genMem->genAllocBo(mem) == false) { return CL_MEM_OBJECT_ALLOCATION_FAILURE; } } diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp index f69a9f5d..3d8cd219 100644 --- a/backend/src/driver/cl_gen_mem.cpp +++ b/backend/src/driver/cl_gen_mem.cpp @@ -86,7 +86,7 @@ void GenGPUMem::genUnMapBo(cl_mem mem) this->writeMap = false; } -void* GenGPUMem::genMapBo(cl_mem mem, bool write) +void* GenGPUMem::genMapBo(bool write) { /* From here, we want to lock. */ GenGPULockerHelper mutexAlloc(&this->mutex); @@ -95,22 +95,25 @@ void* GenGPUMem::genMapBo(cl_mem mem, bool write) return NULL; // Not the same operation. if (this->mappedAddr != NULL) { + GBE_ASSERT(this->mapRef > 0); this->mapRef++; return this->mappedAddr; } GBE_ASSERT(this->mapRef == 0); - if (IS_IMAGE(mem) && this->tiling != GPGPU_NO_TILE) { - /* If we are image from User PTR, the tiling must be GPGPU_NO_TILE. */ + if (this->tiling != GPGPU_NO_TILE) { + /* If we are in tiling mode, we must be image and no User PTR. */ + GBE_ASSERT(this->bindedHostPtr == NULL); + drm_intel_gem_bo_map_gtt(this->bo); GBE_ASSERT(this->bo->virt); this->mappedGtt = 1; this->mappedAddr = this->bo->virt; } else { - if (this->alignedHostPtr) { - //not created from userptr, the offset should not be always zero. + if (this->bindedHostPtr) { + // created from userptr, esay, set the mapped addr to host ptr. drm_intel_bo_wait_rendering(this->bo); - this->mappedAddr = mem->host_ptr; + this->mappedAddr = this->bindedHostPtr; } else { this->writeMap = write; drm_intel_bo_map(this->bo, write); @@ -123,56 +126,67 @@ void* GenGPUMem::genMapBo(cl_mem mem, bool write) return this->mappedAddr; } -bool GenGPUMem::genAllocMemBo(cl_mem mem) +bool GenGPUMem::genAllocBo(cl_mem mem) { /* From here, we want to lock. */ GenGPULockerHelper mutexAlloc(&this->mutex); + /* Never alloc bo for subbuffer, subbuffer need to use parent's GenGPUMem. */ + GBE_ASSERT(!(cl_mem_to_buffer(mem) && cl_mem_to_buffer(mem)->parent != NULL)); + GBE_ASSERT(mem->size > 0); + size_t alignment = 64; if (this->bo != NULL) { return true; } - if (!IS_IMAGE(mem)) { - if (mem->flags & CL_MEM_USE_HOST_PTR) { -#ifdef HAS_USERPTR - if (queue->device->host_unified_memory) { // Need to alloc host accessible mem. - int page_size = getpagesize(); - int cacheline_size = queue->device->global_mem_cache_line_size; - - if (ALIGN((unsigned long)(mem->host_ptr), cacheline_size) == (unsigned long)(mem->host_ptr)) { - this->alignedHostPtr = (void*)(((unsigned long)(mem->host_ptr)) & (~(page_size - 1))); + if (this->tiling != GPGPU_NO_TILE) + alignment = 4096; + + if (mem->flags & (CL_MEM_USE_HOST_PTR|CL_MEM_ALLOC_HOST_PTR)) { + GBE_ASSERT(mem->host_ptr); +#ifdef HAS_USERPTR // Try to bind user ptr + if (this->tiling == GPGPU_NO_TILE && mem->enqueued_device->host_unified_memory) { + /* userptr not support tiling */ + int page_size = getpagesize(); + int cacheline_size = mem->enqueued_device->global_mem_cache_line_size; + if (ALIGN((unsigned long)(mem->host_ptr), cacheline_size) == (unsigned long)(mem->host_ptr)) { + /* Need to be cache line aligned. */ + this->alignedHostPtr = (void*)(((unsigned long)(mem->host_ptr)) & (~(page_size - 1))); + this->bo = genBufferAllocUserptr(gpuCtx->bufmgr, "CL userptr memory object", + this->alignedHostPtr, mem->size, 0); + if (this->bo) { this->realSize = ALIGN(((unsigned long)mem->host_ptr - (unsigned long)alignedHostPtr + mem->size), page_size); - this->bo = genBufferAllocUserptr(gpuCtx->bufmgr, "CL userptr memory object", - this->alignedHostPtr, this->realSize, 0); + this->bindedHostPtr = mem->host_ptr; + } else { + this->alignedHostPtr = this->bindedHostPtr = NULL; } } - - /* We can not create host accessible mem for GPU, we need to fallback to a fake - CL_MEM_USE_HOST_PTR policy. We just keep user ptr, every time, when MAP, - we copy the data to GPU and when UNMAP, copy back the data to host. */ - if (this->bo == NULL) - this->alignedHostPtr = NULL; -#endif } +#endif + } - if (this->bo == NULL) { - this->bo = drm_intel_bo_alloc(this->gpuCtx->bufmgr, "CL memory object", this->realSize, 64); - if (UNLIKELY(this->bo == NULL)) { - return false; - } + if (this->bo == NULL) { + /* HSW: Byte scattered Read/Write has limitation that + the buffer size must be a multiple of 4 bytes. */ + this->realSize = ALIGN(mem->size, 4); + this->bo = drm_intel_bo_alloc(this->gpuCtx->bufmgr, "CL memory object", this->realSize, alignment); + this->bindedHostPtr = NULL; + this->alignedHostPtr = NULL; + if (UNLIKELY(this->bo == NULL)) { + return false; } } + GBE_ASSERT(this->bo != NULL); + + /* bind the user ptr, no need to copy the data. */ + if (this->bindedHostPtr) + return true; + /* Copy the data if required */ - if ((mem->flags & CL_MEM_COPY_HOST_PTR) || - (mem->flags & CL_MEM_USE_HOST_PTR && this->alignedHostPtr == NULL)) { - if (IS_IMAGE(mem)) { - drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr); - } else { - cl_mem_buffer buffer = cl_mem_to_buffer(mem); - drm_intel_bo_subdata(this->bo, buffer->sub_offset, mem->size, mem->host_ptr); - } + if (mem->flags & (CL_MEM_COPY_HOST_PTR|CL_MEM_USE_HOST_PTR)) { + drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr); } return true; @@ -343,10 +357,7 @@ cl_int GenCreateBuffer(cl_mem mem, const cl_device_id device) if (!buffer->parent) { GenGPUContext* gpuCtx = reinterpret_cast<GenGPUContext*>(getGenContextPrivate(mem->ctx, device)); GenGPUMem* genMem = GBE_NEW(GenGPUMem, gpuCtx); - - /* HSW: Byte scattered Read/Write has limitation that - the buffer size must be a multiple of 4 bytes. */ - genMem->realSize = ALIGN(mem->size, 4); + genMem->realSize = mem->size; setGenMemPrivate(mem, device, genMem); } else { /* A sub buffer, just ref the parent's GenGPUMem. */ @@ -379,10 +390,13 @@ cl_int GenReleaseMem(cl_mem mem, const cl_device_id device) static void* genDoMapBuffer(GenGPUMem* genMem, cl_mem mem, cl_map_flags flags, size_t offset, size_t size) { cl_mem_buffer buffer = cl_mem_to_buffer(mem); + + GBE_ASSERT(buffer); // Must be buffer; + char* retAddr = NULL; bool mapWrite = flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION); - void* mappedAddr = genMem->genMapBo(mem, mapWrite); + void* mappedAddr = genMem->genMapBo(mapWrite); if (mappedAddr == NULL) { return NULL; } @@ -458,7 +472,7 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, if (genMem == NULL) return CL_INVALID_VALUE; - if (genMem->genAllocMemBo(mem) == false) { + if (genMem->genAllocBo(mem) == false) { return CL_MEM_OBJECT_ALLOCATION_FAILURE; } @@ -475,6 +489,11 @@ cl_int GenEnqueueMapBuffer(cl_command_queue queue, cl_mem mem, void** ret_addr, return CL_SUCCESS; } + + + + + return CL_MAP_FAILURE; } |