summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2014-12-02 09:31:01 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-12-02 17:16:59 +0800
commitbefcef65ce5a87a91967974a53d5e77e2b16d136 (patch)
treef578af6d14082dd097c7edf41a1247b4cc8caea2 /src
parent6d6ea43aacbe65d9f9e2167d1824c35f128eec79 (diff)
enable CL_MEM_ALLOC_HOST_PTR with user_ptr to avoid copy between GPU/CPU
when user ptr is enabled, allocates page aligned system memory for CL_MEM_ALLOC_HOST_PTR inside the driver and wraps it as GPU memory to avoid the copy between GPU and CPU. and also do some code refine for the relative user_ptr code. tests verified: beignet/utest, conformance/basic, buffers, mem_host_flags Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_device_id.c8
-rw-r--r--src/cl_mem.c37
-rw-r--r--src/cl_mem.h4
3 files changed, 33 insertions, 16 deletions
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 5ef0bdea..711f8ae7 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -27,6 +27,7 @@
#include "cl_thread.h"
#include "CL/cl.h"
#include "cl_gbe_loader.h"
+#include "cl_alloc.h"
#include <assert.h>
#include <stdio.h>
@@ -407,15 +408,14 @@ brw_gt3_break:
cl_buffer_mgr bufmgr = cl_driver_get_bufmgr(dummy);
const size_t sz = 4096;
- void* host_ptr = NULL;
- int err = posix_memalign(&host_ptr, 4096, sz);
- if (err == 0) {
+ void* host_ptr = cl_aligned_malloc(sz, 4096);;
+ if (host_ptr != NULL) {
cl_buffer bo = cl_buffer_alloc_userptr(bufmgr, "CL memory object", host_ptr, sz, 0);
if (bo == NULL)
ret->host_unified_memory = CL_FALSE;
else
cl_buffer_unreference(bo);
- free(host_ptr);
+ cl_free(host_ptr);
}
else
ret->host_unified_memory = CL_FALSE;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 916e909a..3055bea5 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -266,16 +266,26 @@ cl_mem_allocate(enum cl_mem_type type,
#ifdef HAS_USERPTR
if (ctx->device->host_unified_memory) {
+ int page_size = getpagesize();
/* currently only cl buf is supported, will add cl image support later */
- if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
- /* userptr not support tiling */
- if (!is_tiled) {
- int page_size = getpagesize();
- if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
- mem->is_userptr = 1;
- mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
+ if (type == CL_MEM_BUFFER_TYPE) {
+ if (flags & CL_MEM_USE_HOST_PTR) {
+ assert(host_ptr != NULL);
+ /* userptr not support tiling */
+ if (!is_tiled) {
+ if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
+ mem->is_userptr = 1;
+ mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
+ }
}
}
+ else if (flags & CL_MEM_ALLOC_HOST_PTR) {
+ const size_t alignedSZ = ALIGN(sz, page_size);
+ void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size);
+ mem->host_ptr = internal_host_ptr;
+ mem->is_userptr = 1;
+ mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", internal_host_ptr, alignedSZ, 0);
+ }
}
}
@@ -400,13 +410,17 @@ cl_mem_new_buffer(cl_context ctx,
goto error;
/* Copy the data if required */
- if (flags & CL_MEM_COPY_HOST_PTR)
- cl_buffer_subdata(mem->bo, 0, sz, data);
+ if (flags & CL_MEM_COPY_HOST_PTR) {
+ if (mem->is_userptr)
+ memcpy(mem->host_ptr, data, sz);
+ else
+ cl_buffer_subdata(mem->bo, 0, sz, data);
+ }
if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
cl_buffer_subdata(mem->bo, 0, sz, data);
- if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR)
+ if (flags & CL_MEM_USE_HOST_PTR)
mem->host_ptr = data;
exit:
@@ -1069,6 +1083,9 @@ cl_mem_delete(cl_mem mem)
cl_buffer_unreference(mem->bo);
}
+ if (mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR))
+ cl_free(mem->host_ptr);
+
cl_free(mem);
}
diff --git a/src/cl_mem.h b/src/cl_mem.h
index ac1175db..1641dcc4 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -86,13 +86,13 @@ typedef struct _cl_mem {
size_t size; /* original request size, not alignment size, used in constant buffer */
cl_context ctx; /* Context it belongs to */
cl_mem_flags flags; /* Flags specified at the creation time */
- void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
+ void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR, CL_MEM_USE_HOST_PTR */
cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
int mapped_ptr_sz; /* The array size of mapped_ptr. */
int map_ref; /* The mapped count. */
uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
- uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
+ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
} _cl_mem;
struct _cl_mem_image {