summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-06-18 10:10:07 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-06-20 15:53:28 +0800
commit0aa3d33e0da27a3772964aecd72556f0770f63fb (patch)
treecbc2d79f007a986e5f1bff55e8b630e11e8cdb06 /src
parent5745447ce9b56504aeb95e2bf03b93c4d3a9d5dc (diff)
GBE/runtime: fixup broken 1d array image support.
As sample LD message doesn't support array index, we have to create a 2D array surface with the same buffer object. Thus one 1D array image will have two surfaces binded to it one is the index and the second is 128 + index. And then at kernel side, we will access the corresponding 2D array surface when the LD message is required otherwise will access the origin 1D array surface. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: He Junyan <junyan.he@inbox.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_api.c5
-rw-r--r--src/cl_command_queue.c5
-rw-r--r--src/cl_device_id.c1
-rw-r--r--src/cl_device_id.h1
-rw-r--r--src/cl_gt_device.h1
-rw-r--r--src/cl_mem.c29
-rw-r--r--src/intel/intel_gpgpu.c12
7 files changed, 36 insertions, 18 deletions
diff --git a/src/cl_api.c b/src/cl_api.c
index 3b118a88..d5ee645e 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context ctx,
err = CL_INVALID_VALUE;
goto error;
}
- if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
+ if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D &&
+ image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY &&
+ image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY &&
+ image_type != CL_MEM_OBJECT_IMAGE2D &&
image_type != CL_MEM_OBJECT_IMAGE3D)) {
err = CL_INVALID_VALUE;
goto error;
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 1bc97ac6..41281f21 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
image->intel_fmt, image->image_type,
image->w, image->h, image->depth,
image->row_pitch, image->tiling);
+ if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
+ image->intel_fmt, image->image_type,
+ image->w, image->h, image->depth,
+ image->row_pitch, image->tiling);
}
return CL_SUCCESS;
}
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index af8e90c6..578b5485 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id device,
DECL_FIELD(IMAGE_SUPPORT, image_support)
DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args)
DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args)
+ DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size)
DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width)
DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height)
DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width)
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index a5449a7f..769bfd20 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -51,6 +51,7 @@ struct _cl_device_id {
cl_uint max_read_image_args;
cl_uint max_write_image_args;
size_t image2d_max_width;
+ size_t image_max_array_size;
size_t image2d_max_height;
size_t image3d_max_width;
size_t image3d_max_height;
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index b8bda5e4..6d03123c 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -41,6 +41,7 @@
.image_support = CL_TRUE,
.max_read_image_args = 128,
.max_write_image_args = 8,
+.image_max_array_size = 2048,
.image2d_max_width = 8192,
.image2d_max_height = 8192,
.image3d_max_width = 8192,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 491993e4..a7a0f599 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -540,7 +540,7 @@ static cl_mem
_cl_mem_new_image(cl_context ctx,
cl_mem_flags flags,
const cl_image_format *fmt,
- const cl_mem_object_type image_type,
+ const cl_mem_object_type orig_image_type,
size_t w,
size_t h,
size_t depth,
@@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx,
{
cl_int err = CL_SUCCESS;
cl_mem mem = NULL;
+ cl_mem_object_type image_type = orig_image_type;
uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
cl_image_tiling_t tiling = CL_NO_TILE;
@@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx,
image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)))
DO_IMAGE_ERROR;
- if (image_type == CL_MEM_OBJECT_IMAGE1D ||
- image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ if (image_type == CL_MEM_OBJECT_IMAGE1D) {
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx,
else if (data && slice_pitch == 0)
slice_pitch = pitch;
if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
- if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
+ if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
@@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx,
depth = 1;
} else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
+ image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ h = 1;
+ tiling = CL_NO_TILE;
+ } else if (cl_driver_get_ver(ctx->drv) != 6)
+ tiling = cl_get_default_tiling();
+
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx,
slice_pitch = min_slice_pitch;
if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
- if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
+ if (image_type == CL_MEM_OBJECT_IMAGE3D &&
+ (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
+ else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
- /* Pick up tiling mode (we do only linear on SNB) */
- if (cl_driver_get_ver(ctx->drv) != 6)
- tiling = cl_get_default_tiling();
} else
assert(0);
@@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx,
/* Tiling requires to align both pitch and height */
if (tiling == CL_NO_TILE) {
aligned_pitch = w * bpp;
- if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
- image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
- image_type == CL_MEM_OBJECT_IMAGE3D)
- aligned_h = ALIGN(h, valign);
- else
- aligned_h = h;
+ aligned_h = ALIGN(h, valign);
} else if (tiling == CL_TILE_X) {
aligned_pitch = ALIGN(w * bpp, tilex_w);
aligned_h = ALIGN(h, tilex_h);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index d868a2f4..3b895394 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -91,7 +91,7 @@ struct intel_gpgpu
unsigned long img_bitmap; /* image usage bitmap. */
unsigned int img_index_base; /* base index for image surface.*/
- drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */
+ drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */
unsigned long sampler_bitmap; /* sampler usage bitmap. */
@@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
memset(ss, 0, sizeof(*ss));
ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- ss->ss0.surface_type = intel_get_surface_type(type);
+ if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ else
+ ss->ss0.surface_type = intel_get_surface_type(type);
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;
@@ -811,7 +814,10 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
memset(ss, 0, sizeof(*ss));
ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- ss->ss0.surface_type = intel_get_surface_type(type);
+ if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ else
+ ss->ss0.surface_type = intel_get_surface_type(type);
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;