diff options
author | Zhigang Gong <zhigang.gong@linux.intel.com> | 2014-12-14 00:34:20 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-18 15:42:24 +0800 |
commit | 888e9b9122ebb07f92107c2cfa16344b7d390762 (patch) | |
tree | 97ac6778b5c9136edf30be87a8f9baeac6ae355d /src | |
parent | 1cf4f004ac28d5f03118b16dca5f0e777a9e3d63 (diff) |
GBE/CL: use 2D image to implement large image1D_buffer.
Per OpenCL spec, the minimum CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is 65536
which is too large for 1D surface on Gen platforms.
Have to use a 2D surface to implement it. As OpenCL spec only allows
the image1d_t to be accessed via default sampler, it is doable as it
will never use a float coordinates and never use linear non-nearest
filters.
Signed-off-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cl_gt_device.h | 2 | ||||
-rw-r--r-- | src/cl_mem.c | 52 | ||||
-rw-r--r-- | src/cl_mem.h | 6 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 2 |
4 files changed, 50 insertions, 12 deletions
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index ed19f109..4faa15a1 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -50,7 +50,7 @@ .image3d_max_width = 8192, .image3d_max_height = 8192, .image3d_max_depth = 2048, -.image_mem_size = 8192, +.image_mem_size = 65536, .max_samplers = 16, .mem_base_addr_align = sizeof(cl_long) * 16 * 8, .min_data_type_align_size = sizeof(cl_long) * 16, diff --git a/src/cl_mem.c b/src/cl_mem.c index 3055bea5..3225fd27 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -190,10 +190,18 @@ cl_get_image_info(cl_mem mem, *(size_t *)param_value = image->slice_pitch; break; case CL_IMAGE_WIDTH: - *(size_t *)param_value = image->w; + + if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) { + struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image*) image; + *(size_t *)param_value = buffer1d_image->size; + } else + *(size_t *)param_value = image->w; break; case CL_IMAGE_HEIGHT: - *(size_t *)param_value = IS_1D(image) ? 0 : image->h; + if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) + *(size_t *)param_value = 0; + else + *(size_t *)param_value = IS_1D(image) ? 0 : image->h; break; case CL_IMAGE_DEPTH: *(size_t *)param_value = IS_3D(image) ? image->depth : 0; @@ -243,6 +251,10 @@ cl_mem_allocate(enum cl_mem_type type, struct _cl_mem_gl_image *gl_image = NULL; TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image)); mem = &gl_image->base.base; + } else if (type == CL_MEM_BUFFER1D_IMAGE_TYPE) { + struct _cl_mem_buffer1d_image *buffer1d_image = NULL; + TRY_ALLOC(buffer1d_image, CALLOC(struct _cl_mem_buffer1d_image)); + mem = &buffer1d_image->base.base; } else { struct _cl_mem_buffer *buffer = NULL; TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer)); @@ -678,6 +690,7 @@ _cl_mem_new_image(cl_context ctx, cl_mem_object_type image_type = orig_image_type; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0; + size_t origin_width = w; // for image1d buffer work around. cl_image_tiling_t tiling = CL_NO_TILE; /* Check flags consistency */ @@ -710,8 +723,7 @@ _cl_mem_new_image(cl_context ctx, image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER))) DO_IMAGE_ERROR; - if (image_type == CL_MEM_OBJECT_IMAGE1D || - image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + if (image_type == CL_MEM_OBJECT_IMAGE1D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -724,19 +736,30 @@ _cl_mem_new_image(cl_context ctx, if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; tiling = CL_NO_TILE; - } else if (image_type == CL_MEM_OBJECT_IMAGE2D) { + } else if (image_type == CL_MEM_OBJECT_IMAGE2D || + image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + + if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR; + /* This is an image1d buffer which exceeds normal image size restrication + We have to use a 2D image to simulate this 1D image. */ + h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width; + w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w; + tiling = CL_NO_TILE; + } else if (cl_driver_get_ver(ctx->drv) != 6) { + /* Pick up tiling mode (we do only linear on SNB) */ + tiling = cl_get_default_tiling(ctx->drv); + } + size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; + if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; - /* Pick up tiling mode (we do only linear on SNB) */ - if (cl_driver_get_ver(ctx->drv) != 6) - tiling = cl_get_default_tiling(ctx->drv); - depth = 1; } else if (image_type == CL_MEM_OBJECT_IMAGE3D || image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || @@ -791,7 +814,16 @@ _cl_mem_new_image(cl_context ctx, sz = aligned_pitch * aligned_h * depth; } - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); + if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); + else { + mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); + if (mem != NULL && err == CL_SUCCESS) { + struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image *)mem; + buffer1d_image->size = origin_width;; + } + } + if (mem == NULL || err != CL_SUCCESS) goto error; diff --git a/src/cl_mem.h b/src/cl_mem.h index 1641dcc4..fd502203 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -72,6 +72,7 @@ enum cl_mem_type { CL_MEM_SUBBUFFER_TYPE, CL_MEM_IMAGE_TYPE, CL_MEM_GL_IMAGE_TYPE, + CL_MEM_BUFFER1D_IMAGE_TYPE }; #define IS_IMAGE(mem) (mem->type >= CL_MEM_IMAGE_TYPE) #define IS_GL_IMAGE(mem) (mem->type == CL_MEM_GL_IMAGE_TYPE) @@ -117,6 +118,11 @@ struct _cl_mem_gl_image { uint32_t texture; }; +struct _cl_mem_buffer1d_image { + struct _cl_mem_image base; + uint32_t size; +}; + inline static void cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h, cl_mem_object_type image_type, diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 0df78768..c80a11ba 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1003,11 +1003,11 @@ static int intel_get_surface_type(cl_mem_object_type type) { switch (type) { - case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: return I965_SURFACE_1D; + case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: return I965_SURFACE_2D; |