summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@linux.intel.com>2014-12-14 00:34:20 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-12-18 15:42:24 +0800
commit888e9b9122ebb07f92107c2cfa16344b7d390762 (patch)
tree97ac6778b5c9136edf30be87a8f9baeac6ae355d /src
parent1cf4f004ac28d5f03118b16dca5f0e777a9e3d63 (diff)
GBE/CL: use 2D image to implement large image1D_buffer.
Per OpenCL spec, the minimum CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is 65536 which is too large for 1D surface on Gen platforms. Have to use a 2D surface to implement it. As OpenCL spec only allows the image1d_t to be accessed via default sampler, it is doable as it will never use a float coordinates and never use linear non-nearest filters. Signed-off-by: Zhigang Gong <zhigang.gong@linux.intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_gt_device.h2
-rw-r--r--src/cl_mem.c52
-rw-r--r--src/cl_mem.h6
-rw-r--r--src/intel/intel_gpgpu.c2
4 files changed, 50 insertions, 12 deletions
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index ed19f109..4faa15a1 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -50,7 +50,7 @@
.image3d_max_width = 8192,
.image3d_max_height = 8192,
.image3d_max_depth = 2048,
-.image_mem_size = 8192,
+.image_mem_size = 65536,
.max_samplers = 16,
.mem_base_addr_align = sizeof(cl_long) * 16 * 8,
.min_data_type_align_size = sizeof(cl_long) * 16,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3055bea5..3225fd27 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -190,10 +190,18 @@ cl_get_image_info(cl_mem mem,
*(size_t *)param_value = image->slice_pitch;
break;
case CL_IMAGE_WIDTH:
- *(size_t *)param_value = image->w;
+
+ if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
+ struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image*) image;
+ *(size_t *)param_value = buffer1d_image->size;
+ } else
+ *(size_t *)param_value = image->w;
break;
case CL_IMAGE_HEIGHT:
- *(size_t *)param_value = IS_1D(image) ? 0 : image->h;
+ if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE)
+ *(size_t *)param_value = 0;
+ else
+ *(size_t *)param_value = IS_1D(image) ? 0 : image->h;
break;
case CL_IMAGE_DEPTH:
*(size_t *)param_value = IS_3D(image) ? image->depth : 0;
@@ -243,6 +251,10 @@ cl_mem_allocate(enum cl_mem_type type,
struct _cl_mem_gl_image *gl_image = NULL;
TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image));
mem = &gl_image->base.base;
+ } else if (type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
+ struct _cl_mem_buffer1d_image *buffer1d_image = NULL;
+ TRY_ALLOC(buffer1d_image, CALLOC(struct _cl_mem_buffer1d_image));
+ mem = &buffer1d_image->base.base;
} else {
struct _cl_mem_buffer *buffer = NULL;
TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer));
@@ -678,6 +690,7 @@ _cl_mem_new_image(cl_context ctx,
cl_mem_object_type image_type = orig_image_type;
uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
+ size_t origin_width = w; // for image1d buffer work around.
cl_image_tiling_t tiling = CL_NO_TILE;
/* Check flags consistency */
@@ -710,8 +723,7 @@ _cl_mem_new_image(cl_context ctx,
image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)))
DO_IMAGE_ERROR;
- if (image_type == CL_MEM_OBJECT_IMAGE1D ||
- image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+ if (image_type == CL_MEM_OBJECT_IMAGE1D) {
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -724,19 +736,30 @@ _cl_mem_new_image(cl_context ctx,
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
tiling = CL_NO_TILE;
- } else if (image_type == CL_MEM_OBJECT_IMAGE2D) {
+ } else if (image_type == CL_MEM_OBJECT_IMAGE2D ||
+ image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+
+ if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+ if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR;
+ /* This is an image1d buffer which exceeds normal image size restrication
+ We have to use a 2D image to simulate this 1D image. */
+ h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
+ w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
+ tiling = CL_NO_TILE;
+ } else if (cl_driver_get_ver(ctx->drv) != 6) {
+ /* Pick up tiling mode (we do only linear on SNB) */
+ tiling = cl_get_default_tiling(ctx->drv);
+ }
+
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
+
if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
- /* Pick up tiling mode (we do only linear on SNB) */
- if (cl_driver_get_ver(ctx->drv) != 6)
- tiling = cl_get_default_tiling(ctx->drv);
-
depth = 1;
} else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
@@ -791,7 +814,16 @@ _cl_mem_new_image(cl_context ctx,
sz = aligned_pitch * aligned_h * depth;
}
- mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+ if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+ else {
+ mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+ if (mem != NULL && err == CL_SUCCESS) {
+ struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image *)mem;
+ buffer1d_image->size = origin_width;;
+ }
+ }
+
if (mem == NULL || err != CL_SUCCESS)
goto error;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1641dcc4..fd502203 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -72,6 +72,7 @@ enum cl_mem_type {
CL_MEM_SUBBUFFER_TYPE,
CL_MEM_IMAGE_TYPE,
CL_MEM_GL_IMAGE_TYPE,
+ CL_MEM_BUFFER1D_IMAGE_TYPE
};
#define IS_IMAGE(mem) (mem->type >= CL_MEM_IMAGE_TYPE)
#define IS_GL_IMAGE(mem) (mem->type == CL_MEM_GL_IMAGE_TYPE)
@@ -117,6 +118,11 @@ struct _cl_mem_gl_image {
uint32_t texture;
};
+struct _cl_mem_buffer1d_image {
+ struct _cl_mem_image base;
+ uint32_t size;
+};
+
inline static void
cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h,
cl_mem_object_type image_type,
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 0df78768..c80a11ba 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1003,11 +1003,11 @@ static int
intel_get_surface_type(cl_mem_object_type type)
{
switch (type) {
- case CL_MEM_OBJECT_IMAGE1D_BUFFER:
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
return I965_SURFACE_1D;
+ case CL_MEM_OBJECT_IMAGE1D_BUFFER:
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
return I965_SURFACE_2D;