/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_mem.h" #include "cl_image.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_device_id.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #define FIELD_SIZE(CASE,TYPE) \ case JOIN(CL_,CASE): \ if(param_value_size_ret) \ *param_value_size_ret = sizeof(TYPE); \ if(!param_value) \ return CL_SUCCESS; \ if(param_value_size < sizeof(TYPE)) \ return CL_INVALID_VALUE; \ break; LOCAL cl_int cl_get_mem_object_info(cl_mem mem, cl_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { switch(param_name) { FIELD_SIZE(MEM_TYPE, cl_mem_object_type); FIELD_SIZE(MEM_FLAGS, cl_mem_flags); FIELD_SIZE(MEM_SIZE, size_t); FIELD_SIZE(MEM_HOST_PTR, void *); FIELD_SIZE(MEM_MAP_COUNT, cl_uint); FIELD_SIZE(MEM_REFERENCE_COUNT, cl_uint); FIELD_SIZE(MEM_CONTEXT, cl_context); default: return CL_INVALID_VALUE; } switch(param_name) { case CL_MEM_TYPE: *((cl_mem_object_type *)param_value) = mem->type; break; case CL_MEM_FLAGS: *((cl_mem_flags *)param_value) = mem->flags; break; case CL_MEM_SIZE: *((size_t *)param_value) = cl_buffer_get_size(mem->bo); break; case CL_MEM_HOST_PTR: NOT_IMPLEMENTED; break; case CL_MEM_MAP_COUNT: NOT_IMPLEMENTED; break; case CL_MEM_REFERENCE_COUNT: NOT_IMPLEMENTED; break; case CL_MEM_CONTEXT: NOT_IMPLEMENTED; break; } return CL_SUCCESS; } #undef FIELD_SIZE static cl_mem cl_mem_allocate(cl_context ctx, cl_mem_flags flags, size_t sz, cl_int is_tiled, cl_int *errcode) { cl_buffer_mgr bufmgr = NULL; cl_mem mem = NULL; cl_int err = CL_SUCCESS; size_t alignment = 64; cl_ulong max_mem_size; assert(ctx); FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR, "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */ FATAL_IF (flags & CL_MEM_USE_HOST_PTR, "CL_MEM_USE_HOST_PTR unsupported"); /* XXX */ if ((err = cl_get_device_info(ctx->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_size), &max_mem_size, NULL)) != CL_SUCCESS) { goto error; } if (UNLIKELY(sz == 0 || sz > max_mem_size)) { err = CL_INVALID_BUFFER_SIZE; goto error; } /* Allocate and inialize the structure itself */ TRY_ALLOC (mem, CALLOC(struct _cl_mem)); SET_ICD(mem->dispatch) mem->ref_n = 1; mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; /* Pinning will require stricter alignment rules */ if ((flags & CL_MEM_PINNABLE) || is_tiled) alignment = 4096; /* Allocate space in memory */ bufmgr = cl_context_get_bufmgr(ctx); assert(bufmgr); mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); if (UNLIKELY(mem->bo == NULL)) { err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; } mem->size = sz; /* Append the buffer in the context buffer list */ pthread_mutex_lock(&ctx->buffer_lock); mem->next = ctx->buffers; if (ctx->buffers != NULL) ctx->buffers->prev = mem; ctx->buffers = mem; pthread_mutex_unlock(&ctx->buffer_lock); mem->ctx = ctx; cl_context_add_ref(ctx); exit: if (errcode) *errcode = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_mem cl_mem_new(cl_context ctx, cl_mem_flags flags, size_t sz, void *data, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; /* Check flags consistency */ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) { err = CL_INVALID_HOST_PTR; goto error; } /* Create the buffer in video memory */ mem = cl_mem_allocate(ctx, flags, sz, CL_FALSE, &err); if (mem == NULL || err != CL_SUCCESS) goto error; /* Copy the data if required */ if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */ cl_buffer_subdata(mem->bo, 0, sz, data); exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } static void cl_mem_copy_image(cl_mem image, size_t row_pitch, size_t slice_pitch, void* host_ptr) { char* dst_ptr = cl_mem_map_auto(image); if (row_pitch == image->row_pitch && (image->depth == 1 || slice_pitch == image->slice_pitch)) { memcpy(dst_ptr, host_ptr, image->depth == 1 ? row_pitch*image->h : slice_pitch*image->depth); } else { size_t y, z; for (z = 0; z < image->depth; z++) { const char* src = host_ptr; char* dst = dst_ptr; for (y = 0; y < image->h; y++) { memcpy(dst, src, image->bpp*image->w); src += row_pitch; dst += image->row_pitch; } host_ptr = (char*)host_ptr + slice_pitch; dst_ptr = (char*)dst_ptr + image->slice_pitch; } } cl_mem_unmap_auto(image); } static const uint32_t tile_sz = 4096; /* 4KB per tile */ static const uint32_t tilex_w = 512; /* tileX width in bytes */ static const uint32_t tilex_h = 8; /* tileX height in number of rows */ static const uint32_t tiley_w = 128; /* tileY width in bytes */ static const uint32_t tiley_h = 32; /* tileY height in number of rows */ static cl_mem _cl_mem_new_image(cl_context ctx, cl_mem_flags flags, const cl_image_format *fmt, const cl_mem_object_type image_type, size_t w, size_t h, size_t depth, size_t pitch, size_t slice_pitch, void *data, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0, aligned_pitch = 0, aligned_h; cl_image_tiling_t tiling = CL_NO_TILE; /* Check flags consistency */ if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR) && data == NULL)) { err = CL_INVALID_HOST_PTR; goto error; } /* Get the size of each pixel */ if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS)) goto error; /* Only a sub-set of the formats are supported */ intel_fmt = cl_image_get_intel_format(fmt); if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } /* See if the user parameters match */ #define DO_IMAGE_ERROR \ do { \ err = CL_INVALID_IMAGE_SIZE; \ goto error; \ } while (0); if (UNLIKELY(w == 0)) DO_IMAGE_ERROR; if (UNLIKELY(h == 0)) DO_IMAGE_ERROR; if (image_type == CL_MEM_OBJECT_IMAGE2D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; /* Pick up tiling mode (we do only linear on SNB) */ if (cl_driver_get_ver(ctx->drv) != 6) tiling = CL_TILE_Y; depth = 1; } if (image_type == CL_MEM_OBJECT_IMAGE3D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; size_t min_slice_pitch = min_pitch * h; if (data && slice_pitch == 0) slice_pitch = min_slice_pitch; if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; /* Pick up tiling mode (we do only linear on SNB) */ if (cl_driver_get_ver(ctx->drv) != 6) tiling = CL_TILE_Y; } #undef DO_IMAGE_ERROR /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; aligned_h = h; } else if (tiling == CL_TILE_X) { aligned_pitch = ALIGN(w * bpp, tilex_w); aligned_h = ALIGN(h, tilex_h); } else if (tiling == CL_TILE_Y) { aligned_pitch = ALIGN(w * bpp, tiley_w); aligned_h = ALIGN(h, tiley_h); } sz = aligned_pitch * aligned_h * depth; mem = cl_mem_allocate(ctx, flags, sz, tiling != CL_NO_TILE, &err); if (mem == NULL || err != CL_SUCCESS) goto error; mem->w = w; mem->h = h; mem->depth = depth; mem->fmt = *fmt; mem->intel_fmt = intel_fmt; mem->bpp = bpp; mem->is_image = 1; mem->row_pitch = aligned_pitch; mem->slice_pitch = image_type == CL_MEM_OBJECT_IMAGE1D || image_type == CL_MEM_OBJECT_IMAGE2D ? 0 : aligned_pitch*aligned_h; mem->tiling = tiling; mem->type = image_type; cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch); /* Copy the data if required */ if (flags & CL_MEM_COPY_HOST_PTR) cl_mem_copy_image(mem, pitch, slice_pitch, data); exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_mem cl_mem_new_image(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) { switch (image_desc->image_type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE3D: return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_depth, image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr, errcode_ret); case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_BUFFER: NOT_IMPLEMENTED; break; case CL_MEM_OBJECT_BUFFER: default: assert(0); } return NULL; } LOCAL void cl_mem_delete(cl_mem mem) { if (UNLIKELY(mem == NULL)) return; if (atomic_dec(&mem->ref_n) > 1) return; if (LIKELY(mem->bo != NULL)) cl_buffer_unreference(mem->bo); #ifdef HAS_EGL if (UNLIKELY(mem->egl_image != NULL)) { cl_mem_gl_delete(mem); } #endif /* Remove it from the list */ assert(mem->ctx); pthread_mutex_lock(&mem->ctx->buffer_lock); if (mem->prev) mem->prev->next = mem->next; if (mem->next) mem->next->prev = mem->prev; if (mem->prev == NULL && mem->next == NULL) mem->ctx->buffers = NULL; pthread_mutex_unlock(&mem->ctx->buffer_lock); cl_context_delete(mem->ctx); cl_free(mem); } LOCAL void cl_mem_add_ref(cl_mem mem) { assert(mem); atomic_inc(&mem->ref_n); } LOCAL void* cl_mem_map(cl_mem mem) { cl_buffer_map(mem->bo, 1); assert(cl_buffer_get_virtual(mem->bo)); return cl_buffer_get_virtual(mem->bo); } LOCAL cl_int cl_mem_unmap(cl_mem mem) { cl_buffer_unmap(mem->bo); return CL_SUCCESS; } LOCAL void* cl_mem_map_gtt(cl_mem mem) { cl_buffer_map_gtt(mem->bo); assert(cl_buffer_get_virtual(mem->bo)); return cl_buffer_get_virtual(mem->bo); } LOCAL cl_int cl_mem_unmap_gtt(cl_mem mem) { cl_buffer_unmap_gtt(mem->bo); return CL_SUCCESS; } LOCAL void* cl_mem_map_auto(cl_mem mem) { if (mem->is_image && mem->tiling != CL_NO_TILE) return cl_mem_map_gtt(mem); else return cl_mem_map(mem); } LOCAL cl_int cl_mem_unmap_auto(cl_mem mem) { if (mem->is_image && mem->tiling != CL_NO_TILE) cl_buffer_unmap_gtt(mem->bo); else cl_buffer_unmap(mem->bo); return CL_SUCCESS; } LOCAL cl_int cl_mem_pin(cl_mem mem) { assert(mem); if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0)) return CL_INVALID_MEM_OBJECT; cl_buffer_pin(mem->bo, 4096); return CL_SUCCESS; } LOCAL cl_int cl_mem_unpin(cl_mem mem) { assert(mem); if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0)) return CL_INVALID_MEM_OBJECT; cl_buffer_unpin(mem->bo); return CL_SUCCESS; }