diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2014-06-18 10:10:07 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-06-20 15:53:28 +0800 |
commit | 0aa3d33e0da27a3772964aecd72556f0770f63fb (patch) | |
tree | cbc2d79f007a986e5f1bff55e8b630e11e8cdb06 /src | |
parent | 5745447ce9b56504aeb95e2bf03b93c4d3a9d5dc (diff) |
GBE/runtime: fixup broken 1d array image support.
As sample LD message doesn't support array index, we have
to create a 2D array surface with the same buffer object.
Thus one 1D array image will have two surfaces binded to it
one is the index and the second is 128 + index.
And then at kernel side, we will access the corresponding
2D array surface when the LD message is required otherwise
will access the origin 1D array surface.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: He Junyan <junyan.he@inbox.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cl_api.c | 5 | ||||
-rw-r--r-- | src/cl_command_queue.c | 5 | ||||
-rw-r--r-- | src/cl_device_id.c | 1 | ||||
-rw-r--r-- | src/cl_device_id.h | 1 | ||||
-rw-r--r-- | src/cl_gt_device.h | 1 | ||||
-rw-r--r-- | src/cl_mem.c | 29 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 12 |
7 files changed, 36 insertions, 18 deletions
diff --git a/src/cl_api.c b/src/cl_api.c index 3b118a88..d5ee645e 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context ctx, err = CL_INVALID_VALUE; goto error; } - if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D && + if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D && + image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && + image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY && + image_type != CL_MEM_OBJECT_IMAGE2D && image_type != CL_MEM_OBJECT_IMAGE3D)) { err = CL_INVALID_VALUE; goto error; diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 1bc97ac6..41281f21 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) image->intel_fmt, image->image_type, image->w, image->h, image->depth, image->row_pitch, image->tiling); + if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) + cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset, + image->intel_fmt, image->image_type, + image->w, image->h, image->depth, + image->row_pitch, image->tiling); } return CL_SUCCESS; } diff --git a/src/cl_device_id.c b/src/cl_device_id.c index af8e90c6..578b5485 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id device, DECL_FIELD(IMAGE_SUPPORT, image_support) DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args) DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args) + DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size) DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width) DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height) DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width) diff --git a/src/cl_device_id.h b/src/cl_device_id.h index a5449a7f..769bfd20 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -51,6 +51,7 @@ struct _cl_device_id { cl_uint max_read_image_args; cl_uint max_write_image_args; size_t image2d_max_width; + size_t image_max_array_size; size_t image2d_max_height; size_t image3d_max_width; size_t image3d_max_height; diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index b8bda5e4..6d03123c 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -41,6 +41,7 @@ .image_support = CL_TRUE, .max_read_image_args = 128, .max_write_image_args = 8, +.image_max_array_size = 2048, .image2d_max_width = 8192, .image2d_max_height = 8192, .image3d_max_width = 8192, diff --git a/src/cl_mem.c b/src/cl_mem.c index 491993e4..a7a0f599 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -540,7 +540,7 @@ static cl_mem _cl_mem_new_image(cl_context ctx, cl_mem_flags flags, const cl_image_format *fmt, - const cl_mem_object_type image_type, + const cl_mem_object_type orig_image_type, size_t w, size_t h, size_t depth, @@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx, { cl_int err = CL_SUCCESS; cl_mem mem = NULL; + cl_mem_object_type image_type = orig_image_type; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0; cl_image_tiling_t tiling = CL_NO_TILE; @@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx, image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY))) DO_IMAGE_ERROR; - if (image_type == CL_MEM_OBJECT_IMAGE1D || - image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + if (image_type == CL_MEM_OBJECT_IMAGE1D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx, else if (data && slice_pitch == 0) slice_pitch = pitch; if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; - if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; + if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; @@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx, depth = 1; } else if (image_type == CL_MEM_OBJECT_IMAGE3D || + image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + h = 1; + tiling = CL_NO_TILE; + } else if (cl_driver_get_ver(ctx->drv) != 6) + tiling = cl_get_default_tiling(); + size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx, slice_pitch = min_slice_pitch; if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR; - if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR; + if (image_type == CL_MEM_OBJECT_IMAGE3D && + (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR + else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; - /* Pick up tiling mode (we do only linear on SNB) */ - if (cl_driver_get_ver(ctx->drv) != 6) - tiling = cl_get_default_tiling(); } else assert(0); @@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx, /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; - if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || - image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || - image_type == CL_MEM_OBJECT_IMAGE3D) - aligned_h = ALIGN(h, valign); - else - aligned_h = h; + aligned_h = ALIGN(h, valign); } else if (tiling == CL_TILE_X) { aligned_pitch = ALIGN(w * bpp, tilex_w); aligned_h = ALIGN(h, tilex_h); diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index d868a2f4..3b895394 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -91,7 +91,7 @@ struct intel_gpgpu unsigned long img_bitmap; /* image usage bitmap. */ unsigned int img_index_base; /* base index for image surface.*/ - drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */ + drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */ unsigned long sampler_bitmap; /* sampler usage bitmap. */ @@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, memset(ss, 0, sizeof(*ss)); ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 - ss->ss0.surface_type = intel_get_surface_type(type); + if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY) + ss->ss0.surface_type = I965_SURFACE_2D; + else + ss->ss0.surface_type = intel_get_surface_type(type); if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; ss->ss0.surface_array_spacing = 1; @@ -811,7 +814,10 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, memset(ss, 0, sizeof(*ss)); ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 - ss->ss0.surface_type = intel_get_surface_type(type); + if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY) + ss->ss0.surface_type = I965_SURFACE_2D; + else + ss->ss0.surface_type = intel_get_surface_type(type); if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; ss->ss0.surface_array_spacing = 1; |