diff options
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/cl_api.c | 87 | ||||
-rw-r--r-- | src/cl_context.c | 29 | ||||
-rw-r--r-- | src/cl_context.h | 4 | ||||
-rw-r--r-- | src/cl_device_id.c | 1 | ||||
-rw-r--r-- | src/cl_device_id.h | 3 | ||||
-rw-r--r-- | src/cl_driver.h | 6 | ||||
-rw-r--r-- | src/cl_driver_defs.c | 2 | ||||
-rw-r--r-- | src/cl_enqueue.c | 8 | ||||
-rw-r--r-- | src/cl_gt_device.h | 1 | ||||
-rw-r--r-- | src/cl_kernel.c | 36 | ||||
-rw-r--r-- | src/cl_kernel.h | 8 | ||||
-rw-r--r-- | src/cl_mem.c | 111 | ||||
-rw-r--r-- | src/cl_mem.h | 13 | ||||
-rw-r--r-- | src/intel/intel_driver.c | 4 |
16 files changed, 312 insertions, 11 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index d839f3f1..f36ac717 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,18 +153,19 @@ IF(DRM_INTEL_FOUND) ELSE(HAVE_DRM_INTEL_SUBSLICE_TOTAL) MESSAGE(STATUS "Disable subslice total query support") ENDIF(HAVE_DRM_INTEL_SUBSLICE_TOTAL) - CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_pooled_eu" "" HAVE_DRM_INTEL_POOLED_EU) + CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_pooled_eu" ${DRM_INTEL_LIBDIR} HAVE_DRM_INTEL_POOLED_EU) IF(HAVE_DRM_INTEL_POOLED_EU) MESSAGE(STATUS "Enable pooled eu query support") ELSE(HAVE_DRM_INTEL_POOLED_EU) MESSAGE(STATUS "Disable pooled eu query support") ENDIF(HAVE_DRM_INTEL_POOLED_EU) - CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_min_eu_in_pool" "" HAVE_DRM_INTEL_MIN_EU_IN_POOL) + CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_min_eu_in_pool" ${DRM_INTEL_LIBDIR} HAVE_DRM_INTEL_MIN_EU_IN_POOL) IF(HAVE_DRM_INTEL_MIN_EU_IN_POOL) MESSAGE(STATUS "Enable min eu in pool query support") ELSE(HAVE_DRM_INTEL_MIN_EU_IN_POOL) MESSAGE(STATUS "Disable min eu in pool query support") ENDIF(HAVE_DRM_INTEL_MIN_EU_IN_POOL) + CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_bo_set_softpin_offset" ${DRM_INTEL_LIBDIR} HAVE_DRM_INTEL_BO_SET_SOFTPIN) ELSE(DRM_INTEL_FOUND) MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found") ENDIF(DRM_INTEL_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 72392b56..26ccceaf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -151,6 +151,11 @@ if (HAVE_DRM_INTEL_MIN_EU_IN_POOL) SET(CMAKE_C_FLAGS "-DHAS_MIN_EU_IN_POOL ${CMAKE_C_FLAGS}") endif (HAVE_DRM_INTEL_MIN_EU_IN_POOL) +if (HAVE_DRM_INTEL_BO_SET_SOFTPIN) + SET(CMAKE_CXX_FLAGS "-DHAS_BO_SET_SOFTPIN ${CMAKE_CXX_FLAGS}") + SET(CMAKE_C_FLAGS "-DHAS_BO_SET_SOFTPIN ${CMAKE_C_FLAGS}") +endif (HAVE_DRM_INTEL_BO_SET_SOFTPIN) + set(GIT_SHA1 "git_sha1.h") add_custom_target(${GIT_SHA1} ALL COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh diff --git a/src/cl_api.c b/src/cl_api.c index 1d4c5a1f..e24831c4 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -487,6 +487,80 @@ error: return mem; } +void * +clSVMAlloc (cl_context context, + cl_svm_mem_flags flags, + size_t size, + unsigned int alignment) +{ + cl_int err = CL_SUCCESS; + CHECK_CONTEXT (context); + return cl_mem_svm_allocate(context, flags, size, alignment); +error: + return NULL; +} + +void + clSVMFree (cl_context context, void* svm_pointer) +{ + cl_int err = CL_SUCCESS; + CHECK_CONTEXT (context); + return cl_mem_svm_delete(context, svm_pointer); +error: + return; +} + +cl_int +clEnqueueSVMMap (cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags map_flags, + void *svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) +{ + cl_int err = CL_SUCCESS; + cl_mem buffer; + + CHECK_QUEUE(command_queue); + buffer = cl_context_get_svm_from_ptr(command_queue->ctx, svm_ptr); + if(buffer == NULL) { + err = CL_INVALID_VALUE; + goto error; + } + + clEnqueueMapBuffer(command_queue, buffer, blocking_map, map_flags, 0, size, + num_events_in_wait_list, event_wait_list, event, &err); +error: + return err; +} + +cl_int +clEnqueueSVMUnmap (cl_command_queue command_queue, + void *svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) +{ + cl_int err = CL_SUCCESS; + cl_mem buffer; + + CHECK_QUEUE(command_queue); + buffer = cl_context_get_svm_from_ptr(command_queue->ctx, svm_ptr); + if(buffer == NULL) { + err = CL_INVALID_VALUE; + goto error; + } + + err = clEnqueueUnmapMemObject(command_queue, buffer, svm_ptr, + num_events_in_wait_list, event_wait_list, event); + +error: + return err; +} + + cl_mem clCreateImage2D(cl_context context, cl_mem_flags flags, @@ -1169,6 +1243,19 @@ error: return err; } +cl_int +clSetKernelArgSVMPointer (cl_kernel kernel, + cl_uint arg_index, + const void *arg_value) +{ + cl_int err = CL_SUCCESS; + CHECK_KERNEL(kernel); + + err = cl_kernel_set_arg_svm_pointer(kernel, arg_index, arg_value); +error: + return err; +} + cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { diff --git a/src/cl_context.c b/src/cl_context.c index 229ab960..95ca5b0e 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -81,6 +81,19 @@ cl_context_add_mem(cl_context ctx, cl_mem mem) { } LOCAL void +cl_context_add_svm(cl_context ctx, cl_mem mem) { + assert(mem->ctx == NULL); + cl_context_add_ref(ctx); + + CL_OBJECT_LOCK(ctx); + list_add_tail(&mem->base.node, &ctx->svm_objects); + ctx->svm_object_num++; + CL_OBJECT_UNLOCK(ctx); + + mem->ctx = ctx; +} + +LOCAL void cl_context_remove_mem(cl_context ctx, cl_mem mem) { assert(mem->ctx == ctx); CL_OBJECT_LOCK(ctx); @@ -452,3 +465,19 @@ unlock: CL_OBJECT_RELEASE_OWNERSHIP(ctx); return cl_kernel_dup(ker); } + +cl_mem +cl_context_get_svm_from_ptr(cl_context ctx, void * p) +{ + struct list_head *pos; + cl_mem buf; + + list_for_each (pos, (&ctx->mem_objects)) { + buf = (cl_mem)list_entry(pos, _cl_base_object, node); + if(buf->host_ptr == NULL) continue; + if(buf->is_svm == 0) continue; + if (buf->host_ptr == p) + return buf; + } + return NULL; +} diff --git a/src/cl_context.h b/src/cl_context.h index b2903a70..f49212ba 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -107,6 +107,8 @@ struct _cl_context { cl_uint queue_cookie; /* Cookie will change every time we change queue list. */ list_head mem_objects; /* All memory object currently allocated */ cl_uint mem_object_num; /* All memory number currently allocated */ + list_head svm_objects; /* All svm object currently allocated */ + cl_uint svm_object_num; /* All svm number currently allocated */ list_head samplers; /* All sampler object currently allocated */ cl_uint sampler_num; /* All sampler number currently allocated */ list_head events; /* All event object currently allocated */ @@ -186,5 +188,7 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx); extern cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option); +/* Get the SVM from pointer, return NULL if pointer is not from SVM */ +extern cl_mem cl_context_get_svm_from_ptr(cl_context ctx, void *p); #endif /* __CL_CONTEXT_H__ */ diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 7d56d90e..57a74fbd 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -1052,6 +1052,7 @@ cl_get_device_info(cl_device_id device, DECL_FIELD(PARTITION_TYPE, partition_type) DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment) DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment) + DECL_FIELD(SVM_CAPABILITIES, svm_capabilities) case CL_DEVICE_REFERENCE_COUNT: { diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 8cd55bb5..6c62d0b6 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -54,7 +54,8 @@ struct _cl_device_id { cl_uint native_vector_width_half; cl_uint max_clock_frequency; cl_uint address_bits; - cl_ulong max_mem_alloc_size; + size_t max_mem_alloc_size; + cl_device_svm_capabilities svm_capabilities; cl_bool image_support; cl_uint max_read_image_args; cl_uint max_write_image_args; diff --git a/src/cl_driver.h b/src/cl_driver.h index c431906a..a13ffd92 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -340,6 +340,12 @@ extern cl_buffer_alloc_cb *cl_buffer_alloc; typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long); extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr; +typedef cl_buffer (cl_buffer_set_softpin_offset_cb)(cl_buffer, uint64_t); +extern cl_buffer_set_softpin_offset_cb *cl_buffer_set_softpin_offset; + +typedef cl_buffer (cl_buffer_set_bo_use_full_range_cb)(cl_buffer, uint32_t); +extern cl_buffer_set_bo_use_full_range_cb *cl_buffer_set_bo_use_full_range; + /* Set a buffer's tiling mode */ typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride); extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling; diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index 96b2f77e..f5f5fe2c 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -33,6 +33,8 @@ LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL; /* Buffer */ LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL; LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL; +LOCAL cl_buffer_set_softpin_offset_cb *cl_buffer_set_softpin_offset = NULL; +LOCAL cl_buffer_set_bo_use_full_range_cb *cl_buffer_set_bo_use_full_range = NULL; LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL; LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL; LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL; diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 29cf5930..00b2dee9 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -282,8 +282,9 @@ cl_enqueue_map_buffer(enqueue_data *data, cl_int status) cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || - mem->type == CL_MEM_SUBBUFFER_TYPE); - struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem; + mem->type == CL_MEM_SUBBUFFER_TYPE || + mem->type == CL_MEM_SVM_TYPE); + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer *)mem; if (status == CL_SUBMITTED) { if (buffer->base.is_userptr) { @@ -408,7 +409,8 @@ cl_enqueue_unmap_mem_object(enqueue_data *data, cl_int status) if (memobj->flags & CL_MEM_USE_HOST_PTR) { if (memobj->type == CL_MEM_BUFFER_TYPE || - memobj->type == CL_MEM_SUBBUFFER_TYPE) { + memobj->type == CL_MEM_SUBBUFFER_TYPE || + memobj->type == CL_MEM_SVM_TYPE) { assert(mapped_ptr >= memobj->host_ptr && mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); /* Sync the data. */ diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index d27c1adb..d11d1814 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -40,6 +40,7 @@ .native_vector_width_double = 2, .native_vector_width_half = 8, .address_bits = 32, +.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER, .image_support = CL_TRUE, .max_read_image_args = BTI_MAX_READ_IMAGE_ARGS, .max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS, diff --git a/src/cl_kernel.c b/src/cl_kernel.c index b3f1e353..760fca85 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -253,11 +253,47 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) cl_mem_delete(k->args[index].mem); k->args[index].mem = mem; k->args[index].is_set = 1; + k->args[index].is_svm = mem->is_svm; + if(mem->is_svm) + k->args[index].ptr = mem->host_ptr; k->args[index].local_sz = 0; k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index); return CL_SUCCESS; } + +LOCAL cl_int +cl_kernel_set_arg_svm_pointer(cl_kernel k, cl_uint index, const void *value) +{ + enum gbe_arg_type arg_type; /* kind of argument */ + size_t arg_sz; /* size of the argument */ + cl_context ctx = k->program->ctx; + cl_mem mem= cl_context_get_svm_from_ptr(ctx, value); + + if (UNLIKELY(index >= k->arg_n)) + return CL_INVALID_ARG_INDEX; + arg_type = interp_kernel_get_arg_type(k->opaque, index); + arg_sz = interp_kernel_get_arg_size(k->opaque, index); + + if(arg_type != GBE_ARG_GLOBAL_PTR && arg_type != GBE_ARG_CONSTANT_PTR ) + return CL_INVALID_ARG_VALUE; + + if(mem == NULL) + return CL_INVALID_ARG_VALUE; + + cl_mem_add_ref(mem); + if (k->args[index].mem) + cl_mem_delete(k->args[index].mem); + + k->args[index].ptr = value; + k->args[index].mem = mem; + k->args[index].is_set = 1; + k->args[index].is_svm = 1; + k->args[index].local_sz = 0; + k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index); + return 0; +} + LOCAL int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 0aa4a4f2..9ec49139 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -41,8 +41,10 @@ typedef struct cl_argument { cl_sampler sampler; /* For sampler. */ cl_accelerator_intel accel; unsigned char bti; - uint32_t local_sz:31; /* For __local size specification */ + void *ptr; /* SVM ptr value. */ + uint32_t local_sz:30; /* For __local size specification */ uint32_t is_set:1; /* All args must be set before NDRange */ + uint32_t is_svm:1; /* Indicate this argument is SVMPointer */ } cl_argument; /* One OCL function */ @@ -108,6 +110,10 @@ extern int cl_kernel_set_arg(cl_kernel, uint32_t arg_index, size_t arg_size, const void *arg_value); +extern int cl_kernel_set_arg_svm_pointer(cl_kernel, + uint32_t arg_index, + const void *arg_value); + /* Get the argument information */ extern int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, diff --git a/src/cl_mem.c b/src/cl_mem.c index 333ffc95..712871b5 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -85,6 +85,7 @@ cl_get_mem_object_info(cl_mem mem, FIELD_SIZE(MEM_CONTEXT, cl_context); FIELD_SIZE(MEM_ASSOCIATED_MEMOBJECT, cl_mem); FIELD_SIZE(MEM_OFFSET, size_t); + FIELD_SIZE(MEM_USES_SVM_POINTER, cl_bool); default: return CL_INVALID_VALUE; } @@ -133,6 +134,8 @@ cl_get_mem_object_info(cl_mem mem, *((size_t *)param_value) = buf->sub_offset; } break; + case CL_MEM_USES_SVM_POINTER: + *((cl_uint *)param_value) = mem->is_svm; } return CL_SUCCESS; @@ -269,6 +272,7 @@ cl_mem_allocate(enum cl_mem_type type, mem->flags = flags; mem->is_userptr = 0; mem->offset = 0; + mem->is_svm = 0; mem->cmrt_mem = NULL; if (mem->type == CL_MEM_IMAGE_TYPE) { cl_mem_image(mem)->is_image_from_buffer = 0; @@ -293,6 +297,9 @@ cl_mem_allocate(enum cl_mem_type type, if (type == CL_MEM_BUFFER_TYPE) { if (flags & CL_MEM_USE_HOST_PTR) { assert(host_ptr != NULL); + cl_mem svm_mem = NULL; + if((svm_mem = cl_context_get_svm_from_ptr(ctx, host_ptr)) != NULL) + mem->is_svm = 1; /* userptr not support tiling */ if (!is_tiled) { if ((ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) && @@ -301,7 +308,13 @@ cl_mem_allocate(enum cl_mem_type type, mem->offset = host_ptr - aligned_host_ptr; mem->is_userptr = 1; size_t aligned_sz = ALIGN((mem->offset + sz), page_size); - mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); + + if(svm_mem != NULL) { + mem->bo = svm_mem->bo; + cl_mem_add_ref(svm_mem); + } else + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); + bufCreated = 1; } } @@ -614,6 +627,80 @@ void cl_mem_replace_buffer(cl_mem buffer, cl_buffer new_bo) } } +void* cl_mem_svm_allocate(cl_context ctx, cl_svm_mem_flags flags, + size_t size, unsigned int alignment) +{ + cl_int err = CL_SUCCESS; + size_t max_mem_size; + + if(UNLIKELY(alignment & (alignment - 1))) + return NULL; + + if ((err = cl_get_device_info(ctx->device, + CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(max_mem_size), + &max_mem_size, + NULL)) != CL_SUCCESS) { + return NULL; + } + + if(UNLIKELY(size == 0 || size > max_mem_size)) { + return NULL; + } + + if (flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) { + return NULL; + } + if (flags && ((flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_FINE_GRAIN_BUFFER)) + || ((flags & CL_MEM_WRITE_ONLY) && (flags & CL_MEM_READ_ONLY)) + || ((flags & CL_MEM_WRITE_ONLY) && (flags & CL_MEM_READ_WRITE)) + || ((flags & CL_MEM_READ_ONLY) && (flags & CL_MEM_READ_WRITE)))) { + return NULL; + } + +#ifdef HAS_BO_SET_SOFTPIN + cl_buffer_mgr bufmgr = NULL; + void * ptr = NULL; + cl_mem mem; + _cl_mem_svm* svm; + if(UNLIKELY((svm = CALLOC(_cl_mem_svm)) == NULL)) + return NULL; + mem = &svm->base; + + mem->type = CL_MEM_SVM_TYPE; + CL_OBJECT_INIT_BASE(mem, CL_OBJECT_MEM_MAGIC); + mem->flags = flags | CL_MEM_USE_HOST_PTR; + mem->is_userptr = 0; + mem->is_svm = 0; + mem->offset = 0; + + bufmgr = cl_context_get_bufmgr(ctx); + assert(bufmgr); + + int page_size = getpagesize(); + const size_t alignedSZ = ALIGN(size, page_size); + if(alignment == 0) + alignment = page_size; + else + alignment = ALIGN(alignment, page_size); + ptr = cl_aligned_malloc(alignedSZ, alignment); + if(ptr == NULL) return NULL; + + mem->host_ptr = ptr; + mem->is_svm = 1; + mem->is_userptr = 1; + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL SVM memory object", ptr, alignedSZ, 0); + mem->size = size; + cl_buffer_set_softpin_offset(mem->bo, (size_t)ptr); + cl_buffer_set_bo_use_full_range(mem->bo, 1); + + /* Append the svm in the context buffer list */ + cl_context_add_mem(ctx, mem); +#endif + + return ptr; +} + void cl_mem_copy_image_region(const size_t *origin, const size_t *region, void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, @@ -1166,6 +1253,18 @@ cl_mem_new_image(cl_context context, } LOCAL void +cl_mem_svm_delete(cl_context ctx, void *svm_pointer) +{ + cl_mem mem; + if(UNLIKELY(svm_pointer == NULL)) + return; + mem = cl_context_get_svm_from_ptr(ctx, svm_pointer); + if(mem == NULL) + return; + cl_mem_delete(mem); +} + +LOCAL void cl_mem_delete(cl_mem mem) { cl_int i; @@ -1198,6 +1297,11 @@ cl_mem_delete(cl_mem mem) } } + if(mem->is_svm && mem->type != CL_MEM_SVM_TYPE) { + cl_mem svm_mem = cl_context_get_svm_from_ptr(mem->ctx, mem->host_ptr); + if(svm_mem) + cl_mem_delete(svm_mem); + } /* Remove it from the list */ cl_context_remove_mem(mem->ctx, mem); @@ -1244,9 +1348,10 @@ cl_mem_delete(cl_mem mem) cl_buffer_unreference(mem->bo); } - if (mem->is_userptr && + if ((mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR) && - (mem->type != CL_MEM_SUBBUFFER_TYPE)) + (mem->type != CL_MEM_SUBBUFFER_TYPE)) || + (mem->is_svm && mem->type == CL_MEM_SVM_TYPE)) cl_free(mem->host_ptr); CL_OBJECT_DESTROY_BASE(mem); diff --git a/src/cl_mem.h b/src/cl_mem.h index 82f30f6a..4a71a8f7 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -73,6 +73,7 @@ typedef struct _cl_mem_dstr_cb { enum cl_mem_type { CL_MEM_BUFFER_TYPE, CL_MEM_SUBBUFFER_TYPE, + CL_MEM_SVM_TYPE, CL_MEM_IMAGE_TYPE, CL_MEM_GL_IMAGE_TYPE, CL_MEM_BUFFER1D_IMAGE_TYPE @@ -93,7 +94,8 @@ typedef struct _cl_mem { int map_ref; /* The mapped count. */ uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ - uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ + uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled */ + cl_bool is_svm; /* This object is svm */ size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/ uint8_t cmrt_mem_type; /* CmBuffer, CmSurface2D, ... */ @@ -113,6 +115,11 @@ typedef struct _cl_mem { CL_OBJECT_GET_REF(mem) >= 1 && \ mem->type < CL_MEM_IMAGE_TYPE)) +typedef struct _cl_mem_svm { + _cl_mem base; + cl_svm_mem_flags flags; /* Flags specified at the creation time */ +} _cl_mem_svm; + struct _cl_mem_image { _cl_mem base; cl_image_format fmt; /* only for images */ @@ -212,6 +219,10 @@ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int* /* Create a new sub memory object */ extern cl_mem cl_mem_new_sub_buffer(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *); +void* cl_mem_svm_allocate(cl_context, cl_svm_mem_flags, size_t, unsigned int); +void cl_mem_svm_delete(cl_context, void *svm_pointer); + + /* Idem but this is an image */ extern cl_mem cl_mem_new_image(cl_context context, diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index cf8f8292..d1796a5b 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -995,6 +995,10 @@ intel_setup_callbacks(void) cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info; cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc; cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr; +#ifdef HAS_BO_SET_SOFTPIN + cl_buffer_set_softpin_offset = (cl_buffer_set_softpin_offset_cb *) drm_intel_bo_set_softpin_offset; + cl_buffer_set_bo_use_full_range = (cl_buffer_set_bo_use_full_range_cb *) drm_intel_bo_use_48b_address_range; +#endif cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling; #if defined(HAS_GL_EGL) cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture; |