diff options
author | Guo Yejun <yejun.guo@intel.com> | 2014-12-02 09:31:01 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-02 17:16:59 +0800 |
commit | befcef65ce5a87a91967974a53d5e77e2b16d136 (patch) | |
tree | f578af6d14082dd097c7edf41a1247b4cc8caea2 /src | |
parent | 6d6ea43aacbe65d9f9e2167d1824c35f128eec79 (diff) |
enable CL_MEM_ALLOC_HOST_PTR with user_ptr to avoid copy between GPU/CPU
when user ptr is enabled, allocates page aligned system memory for
CL_MEM_ALLOC_HOST_PTR inside the driver and wraps it as GPU memory
to avoid the copy between GPU and CPU.
and also do some code refine for the relative user_ptr code.
tests verified: beignet/utest, conformance/basic, buffers, mem_host_flags
Signed-off-by: Guo Yejun <yejun.guo@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cl_device_id.c | 8 | ||||
-rw-r--r-- | src/cl_mem.c | 37 | ||||
-rw-r--r-- | src/cl_mem.h | 4 |
3 files changed, 33 insertions, 16 deletions
diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 5ef0bdea..711f8ae7 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -27,6 +27,7 @@ #include "cl_thread.h" #include "CL/cl.h" #include "cl_gbe_loader.h" +#include "cl_alloc.h" #include <assert.h> #include <stdio.h> @@ -407,15 +408,14 @@ brw_gt3_break: cl_buffer_mgr bufmgr = cl_driver_get_bufmgr(dummy); const size_t sz = 4096; - void* host_ptr = NULL; - int err = posix_memalign(&host_ptr, 4096, sz); - if (err == 0) { + void* host_ptr = cl_aligned_malloc(sz, 4096);; + if (host_ptr != NULL) { cl_buffer bo = cl_buffer_alloc_userptr(bufmgr, "CL memory object", host_ptr, sz, 0); if (bo == NULL) ret->host_unified_memory = CL_FALSE; else cl_buffer_unreference(bo); - free(host_ptr); + cl_free(host_ptr); } else ret->host_unified_memory = CL_FALSE; diff --git a/src/cl_mem.c b/src/cl_mem.c index 916e909a..3055bea5 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -266,16 +266,26 @@ cl_mem_allocate(enum cl_mem_type type, #ifdef HAS_USERPTR if (ctx->device->host_unified_memory) { + int page_size = getpagesize(); /* currently only cl buf is supported, will add cl image support later */ - if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) { - /* userptr not support tiling */ - if (!is_tiled) { - int page_size = getpagesize(); - if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) { - mem->is_userptr = 1; - mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0); + if (type == CL_MEM_BUFFER_TYPE) { + if (flags & CL_MEM_USE_HOST_PTR) { + assert(host_ptr != NULL); + /* userptr not support tiling */ + if (!is_tiled) { + if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) { + mem->is_userptr = 1; + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0); + } } } + else if (flags & CL_MEM_ALLOC_HOST_PTR) { + const size_t alignedSZ = ALIGN(sz, page_size); + void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size); + mem->host_ptr = internal_host_ptr; + mem->is_userptr = 1; + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", internal_host_ptr, alignedSZ, 0); + } } } @@ -400,13 +410,17 @@ cl_mem_new_buffer(cl_context ctx, goto error; /* Copy the data if required */ - if (flags & CL_MEM_COPY_HOST_PTR) - cl_buffer_subdata(mem->bo, 0, sz, data); + if (flags & CL_MEM_COPY_HOST_PTR) { + if (mem->is_userptr) + memcpy(mem->host_ptr, data, sz); + else + cl_buffer_subdata(mem->bo, 0, sz, data); + } if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) cl_buffer_subdata(mem->bo, 0, sz, data); - if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR) + if (flags & CL_MEM_USE_HOST_PTR) mem->host_ptr = data; exit: @@ -1069,6 +1083,9 @@ cl_mem_delete(cl_mem mem) cl_buffer_unreference(mem->bo); } + if (mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR)) + cl_free(mem->host_ptr); + cl_free(mem); } diff --git a/src/cl_mem.h b/src/cl_mem.h index ac1175db..1641dcc4 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -86,13 +86,13 @@ typedef struct _cl_mem { size_t size; /* original request size, not alignment size, used in constant buffer */ cl_context ctx; /* Context it belongs to */ cl_mem_flags flags; /* Flags specified at the creation time */ - void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */ + void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR, CL_MEM_USE_HOST_PTR */ cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */ int mapped_ptr_sz; /* The array size of mapped_ptr. */ int map_ref; /* The mapped count. */ uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ - uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ + uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ } _cl_mem; struct _cl_mem_image { |