diff options
author | Guo Yejun <yejun.guo@intel.com> | 2015-09-25 04:27:51 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-10-14 11:26:51 +0800 |
commit | 74f29f500dd6b188e45cabc70cee8d1565733c47 (patch) | |
tree | 8f17a7dcb1cb2552ba484ae1d2c77ceaa29fec8f /src | |
parent | 91e2df5540c1fb7f093fbc051e42186a4b8f3113 (diff) |
enable USE_HOST_PTR for cl image with userptr to avoid extra copying
the pointer must be 64 byte aligned, and only when w,h equals to its
aligned value, otherwise, roll back to the old method with extra copying.
Signed-off-by: Guo Yejun <yejun.guo@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cl_command_queue.c | 7 | ||||
-rw-r--r-- | src/cl_enqueue.c | 10 | ||||
-rw-r--r-- | src/cl_mem.c | 45 |
3 files changed, 44 insertions, 18 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index c8934583..9dc3fe64 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -133,19 +133,16 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) struct _cl_mem_image *image; assert(interp_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE); - //currently, user ptr is not supported for cl image, so offset should be always zero - assert(k->args[id].mem->offset == 0); - image = cl_mem_image(k->args[id].mem); set_image_info(k->curbe, &k->images[i], image); - cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset, + cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset + k->args[id].mem->offset, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); // TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer // on demand. if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) - cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset, + cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset + k->args[id].mem->offset, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 9e34bb89..cec368cb 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -316,8 +316,9 @@ cl_int cl_enqueue_map_image(enqueue_data *data) if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); - //src and dst need add offset in function cl_mem_copy_image_region - cl_mem_copy_image_region(data->origin, data->region, + if (!mem->is_userptr) + //src and dst need add offset in function cl_mem_copy_image_region + cl_mem_copy_image_region(data->origin, data->region, mem->host_ptr, image->host_row_pitch, image->host_slice_pitch, data->ptr, row_pitch, image->slice_pitch, image, CL_TRUE, CL_TRUE); } @@ -374,8 +375,9 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) row_pitch = image->slice_pitch; else row_pitch = image->row_pitch; - //v_ptr have added offset, host_ptr have not added offset. - cl_mem_copy_image_region(origin, region, v_ptr, row_pitch, image->slice_pitch, + if (!memobj->is_userptr) + //v_ptr have added offset, host_ptr have not added offset. + cl_mem_copy_image_region(origin, region, v_ptr, row_pitch, image->slice_pitch, memobj->host_ptr, image->host_row_pitch, image->host_slice_pitch, image, CL_FALSE, CL_TRUE); } diff --git a/src/cl_mem.c b/src/cl_mem.c index fcef2fa5..561e0a4a 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -288,7 +288,6 @@ cl_mem_allocate(enum cl_mem_type type, int cacheline_size = 0; cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL); - /* currently only cl buf is supported, will add cl image support later */ if (type == CL_MEM_BUFFER_TYPE) { if (flags & CL_MEM_USE_HOST_PTR) { assert(host_ptr != NULL); @@ -312,6 +311,18 @@ cl_mem_allocate(enum cl_mem_type type, mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", internal_host_ptr, alignedSZ, 0); bufCreated = 1; } + } else if (type == CL_MEM_IMAGE_TYPE) { + if (host_ptr != NULL) { + assert(flags & CL_MEM_USE_HOST_PTR); + assert(!is_tiled); + assert(ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr); + void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1))); + mem->offset = host_ptr - aligned_host_ptr; + mem->is_userptr = 1; + size_t aligned_sz = ALIGN((mem->offset + sz), page_size); + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); + bufCreated = 1; + } } } @@ -823,6 +834,16 @@ _cl_mem_new_image(cl_context ctx, #undef DO_IMAGE_ERROR + uint8_t enableUserptr = 0; + if (ctx->device->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) { + int cacheline_size = 0; + cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL); + if (ALIGN((unsigned long)data, cacheline_size) == (unsigned long)data) { //might more conditions here + tiling = CL_NO_TILE; + enableUserptr = 1; + } + } + /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; @@ -861,8 +882,12 @@ _cl_mem_new_image(cl_context ctx, if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) { if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, buffer, &err); - else - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, NULL, &err); + else { + if (enableUserptr) + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, NULL, &err); + else + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, NULL, &err); + } } else { mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, NULL, &err); if (mem != NULL && err == CL_SUCCESS) { @@ -892,13 +917,15 @@ _cl_mem_new_image(cl_context ctx, 0, 0, 0); /* Copy the data if required */ - if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { + if (flags & CL_MEM_COPY_HOST_PTR) cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); - if (flags & CL_MEM_USE_HOST_PTR) { - mem->host_ptr = data; - cl_mem_image(mem)->host_row_pitch = pitch; - cl_mem_image(mem)->host_slice_pitch = slice_pitch; - } + + if (flags & CL_MEM_USE_HOST_PTR) { + mem->host_ptr = data; + cl_mem_image(mem)->host_row_pitch = pitch; + cl_mem_image(mem)->host_slice_pitch = slice_pitch; + if (!enableUserptr) + cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); } exit: |