diff options
-rw-r--r-- | CMakeLists.txt | 11 | ||||
-rw-r--r-- | src/CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/cl_api.c | 10 | ||||
-rw-r--r-- | src/cl_driver.h | 3 | ||||
-rw-r--r-- | src/cl_driver_defs.c | 1 | ||||
-rw-r--r-- | src/cl_enqueue.c | 19 | ||||
-rw-r--r-- | src/cl_mem.c | 37 | ||||
-rw-r--r-- | src/cl_mem.h | 2 | ||||
-rw-r--r-- | src/cl_mem_gl.c | 2 | ||||
-rw-r--r-- | src/intel/intel_driver.c | 15 |
10 files changed, 87 insertions, 18 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 40cb74cc..15386f9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ ENDIF(X11_FOUND) # DRM pkg_check_modules(DRM REQUIRED libdrm) IF(DRM_FOUND) - MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX}") + MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX} ${DRM_VERSION}") INCLUDE_DIRECTORIES(${DRM_INCLUDE_DIRS}) ELSE(DRM_FOUND) MESSAGE(STATUS "Looking for DRM - not found") @@ -118,7 +118,14 @@ ENDIF(DRM_FOUND) pkg_check_modules(DRM_INTEL libdrm_intel>=2.4.52) IF(DRM_INTEL_FOUND) INCLUDE_DIRECTORIES(${DRM_INTEL_INCLUDE_DIRS}) - MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX}") + MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX} ${DRM_INTEL_VERSION}") + #userptr support starts from 2.4.57, but 2.4.58 is the actual stable release + IF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57) + MESSAGE(STATUS "Enable userptr support") + SET(DRM_INTEL_USERPTR "enable") + ELSE(DRM_INTEL_VERSION VERSION_GREATER 2.4.57) + MESSAGE(STATUS "Disable userptr support") + ENDIF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57) ELSE(DRM_INTEL_FOUND) MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found") ENDIF(DRM_INTEL_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fc5de89e..7182bada 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -109,6 +109,11 @@ SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}") endif (OCLIcd_FOUND) +if (DRM_INTEL_USERPTR) +SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}") +SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}") +endif (DRM_INTEL_USERPTR) + set(GIT_SHA1 "git_sha1.h") add_custom_target(${GIT_SHA1} ALL COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh diff --git a/src/cl_api.c b/src/cl_api.c index 05d30933..1f246386 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2665,9 +2665,13 @@ clEnqueueMapBuffer(cl_command_queue command_queue, ptr = data->ptr; if(event) cl_event_set_status(*event, CL_COMPLETE); } else { - if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) { - err = CL_MAP_FAILURE; - goto error; + if (buffer->is_userptr) + ptr = buffer->host_ptr; + else { + if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) { + err = CL_MAP_FAILURE; + goto error; + } } } err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL); diff --git a/src/cl_driver.h b/src/cl_driver.h index 638b791c..8697ff2c 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -285,6 +285,9 @@ extern cl_gpgpu_walker_cb *cl_gpgpu_walker; typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, size_t); extern cl_buffer_alloc_cb *cl_buffer_alloc; +typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long); +extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr; + /* Set a buffer's tiling mode */ typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride); extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling; diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index c31b6fc4..1335c20f 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -29,6 +29,7 @@ LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; /* Buffer */ LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL; +LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL; LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL; LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL; LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL; diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index db0bce74..5bdb7cd7 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -234,11 +234,15 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; - if(data->unsync_map == 1) - //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here - ptr = cl_mem_map_gtt(mem); - else - ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0); + if (mem->is_userptr) + ptr = mem->host_ptr; + else { + if(data->unsync_map == 1) + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here + ptr = cl_mem_map_gtt(mem); + else + ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0); + } if (ptr == NULL) { err = CL_MAP_FAILURE; @@ -246,7 +250,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) } data->ptr = ptr; - if(mem->flags & CL_MEM_USE_HOST_PTR) { + if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) { assert(mem->host_ptr); ptr = (char*)ptr + data->offset + buffer->sub_offset; memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data->size); @@ -331,7 +335,8 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) assert(mapped_ptr >= memobj->host_ptr && mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); /* Sync the data. */ - memcpy(v_ptr, mapped_ptr, mapped_size); + if (!memobj->is_userptr) + memcpy(v_ptr, mapped_ptr, mapped_size); } else { CHECK_IMAGE(memobj, image); diff --git a/src/cl_mem.c b/src/cl_mem.c index 16bd6135..d3199668 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -33,6 +33,7 @@ #include <assert.h> #include <stdio.h> #include <string.h> +#include <unistd.h> #define FIELD_SIZE(CASE,TYPE) \ case JOIN(CL_,CASE): \ @@ -223,6 +224,7 @@ cl_mem_allocate(enum cl_mem_type type, cl_mem_flags flags, size_t sz, cl_int is_tiled, + void *host_ptr, cl_int *errcode) { cl_buffer_mgr bufmgr = NULL; @@ -251,6 +253,7 @@ cl_mem_allocate(enum cl_mem_type type, mem->ref_n = 1; mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; + mem->is_userptr = 0; if (sz != 0) { /* Pinning will require stricter alignment rules */ @@ -260,7 +263,28 @@ cl_mem_allocate(enum cl_mem_type type, /* Allocate space in memory */ bufmgr = cl_context_get_bufmgr(ctx); assert(bufmgr); + +#ifdef HAS_USERPTR + if (ctx->device->host_unified_memory) { + /* currently only cl buf is supported, will add cl image support later */ + if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) { + /* userptr not support tiling */ + if (!is_tiled) { + int page_size = getpagesize(); + if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) { + mem->is_userptr = 1; + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0); + } + } + } + } + + if (!mem->is_userptr) + mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); +#else mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); +#endif + if (UNLIKELY(mem->bo == NULL)) { err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; @@ -387,12 +411,15 @@ cl_mem_new_buffer(cl_context ctx, sz = ALIGN(sz, 4); /* Create the buffer in video memory */ - mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, &err); + mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, data, &err); if (mem == NULL || err != CL_SUCCESS) goto error; /* Copy the data if required */ - if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR) + if (flags & CL_MEM_COPY_HOST_PTR) + cl_buffer_subdata(mem->bo, 0, sz, data); + + if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) cl_buffer_subdata(mem->bo, 0, sz, data); if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR) @@ -762,7 +789,7 @@ _cl_mem_new_image(cl_context ctx, sz = aligned_pitch * aligned_h * depth; } - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err); + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); if (mem == NULL || err != CL_SUCCESS) goto error; @@ -1834,7 +1861,7 @@ LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx, cl_int err = CL_SUCCESS; cl_mem mem = NULL; - mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, &err); + mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, NULL, &err); if (mem == NULL || err != CL_SUCCESS) goto error; @@ -1875,7 +1902,7 @@ LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx, goto error; } - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, &err); + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, NULL, &err); if (mem == NULL || err != CL_SUCCESS) { err = CL_OUT_OF_HOST_MEMORY; goto error; diff --git a/src/cl_mem.h b/src/cl_mem.h index 95c5f056..2e9dd5ad 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -92,6 +92,7 @@ typedef struct _cl_mem { int map_ref; /* The mapped count. */ uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ + uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ } _cl_mem; struct _cl_mem_image { @@ -262,6 +263,7 @@ cl_mem_allocate(enum cl_mem_type type, cl_mem_flags flags, size_t sz, cl_int is_tiled, + void *host_ptr, cl_int *errcode); void diff --git a/src/cl_mem_gl.c b/src/cl_mem_gl.c index 28d2ac65..36409089 100644 --- a/src/cl_mem_gl.c +++ b/src/cl_mem_gl.c @@ -63,7 +63,7 @@ cl_mem_new_gl_texture(cl_context ctx, goto error; } - mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, &err); + mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, NULL, &err); if (mem == NULL || err != CL_SUCCESS) goto error; diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index bb97220c..fc037cc8 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -690,6 +690,20 @@ cl_buffer intel_share_image_from_libva(cl_context ctx, return (cl_buffer)intel_bo; } +static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags) +{ +#ifdef HAS_USERPTR + drm_intel_bo *bo; + bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags); + /* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */ + if (bo == NULL) + bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED); + return (cl_buffer)bo; +#else + return NULL; +#endif +} + static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling) { switch (tiling) { @@ -734,6 +748,7 @@ intel_setup_callbacks(void) cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr; cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id; cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc; + cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr; cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling; #if defined(HAS_EGL) cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture; |