summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-06-18 10:01:15 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-06-20 15:53:03 +0800
commit74252fdc7b3e211bcbe22e4411f313e978f25def (patch)
tree43dfc19f6fc0dee03a168c9f05f59166fdd9e8f4 /src
parent97da0110158ed127176cea18694b23ce8a9e608b (diff)
cl/driver: fix the incorrect handling of 1D array.
According to the bspec, the 1D array should be treated as a 3D like surface which has height 1. So we need to make sure the depth is the array_size. Thus the rt_view_extent's value should be always the same as the depth. According to the ocl spec, the 1D array firstly should be a 1D image rather than a 2D image. Thus we should access different lines according to the slice_pitch rather than the image_row_pitch. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: He Junyan <junyan.he@inbox.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_mem.c46
-rw-r--r--src/intel/intel_gpgpu.c20
2 files changed, 34 insertions, 32 deletions
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 0806d35c..491993e4 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -513,6 +513,7 @@ static const uint32_t tilex_w = 512; /* tileX width in bytes */
static const uint32_t tilex_h = 8; /* tileX height in number of rows */
static const uint32_t tiley_w = 128; /* tileY width in bytes */
static const uint32_t tiley_h = 32; /* tileY height in number of rows */
+static const uint32_t valign = 2; /* vertical alignment is 2. */
cl_image_tiling_t cl_get_default_tiling(void)
{
@@ -551,7 +552,7 @@ _cl_mem_new_image(cl_context ctx,
cl_int err = CL_SUCCESS;
cl_mem mem = NULL;
uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
- size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h;
+ size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
cl_image_tiling_t tiling = CL_NO_TILE;
/* Check flags consistency */
@@ -579,21 +580,29 @@ _cl_mem_new_image(cl_context ctx,
} while (0);
if (UNLIKELY(w == 0)) DO_IMAGE_ERROR;
- if (UNLIKELY(h == 0 && image_type != CL_MEM_OBJECT_IMAGE1D)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h == 0 && (image_type != CL_MEM_OBJECT_IMAGE1D &&
+ image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)))
+ DO_IMAGE_ERROR;
- if (image_type == CL_MEM_OBJECT_IMAGE1D) {
+ if (image_type == CL_MEM_OBJECT_IMAGE1D ||
+ image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
- depth = 1;
h = 1;
+ if (image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ depth = 1;
+ else if (data && slice_pitch == 0)
+ slice_pitch = pitch;
if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
+ if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
+ if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
+ if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
tiling = CL_NO_TILE;
- } else if (image_type == CL_MEM_OBJECT_IMAGE2D ||
- image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ } else if (image_type == CL_MEM_OBJECT_IMAGE2D) {
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -606,12 +615,9 @@ _cl_mem_new_image(cl_context ctx,
if (cl_driver_get_ver(ctx->drv) != 6)
tiling = cl_get_default_tiling();
- if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- tiling = CL_NO_TILE;
-
depth = 1;
} else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
- image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -637,7 +643,12 @@ _cl_mem_new_image(cl_context ctx,
/* Tiling requires to align both pitch and height */
if (tiling == CL_NO_TILE) {
aligned_pitch = w * bpp;
- aligned_h = h;
+ if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
+ image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
+ image_type == CL_MEM_OBJECT_IMAGE3D)
+ aligned_h = ALIGN(h, valign);
+ else
+ aligned_h = h;
} else if (tiling == CL_TILE_X) {
aligned_pitch = ALIGN(w * bpp, tilex_w);
aligned_h = ALIGN(h, tilex_h);
@@ -662,9 +673,12 @@ _cl_mem_new_image(cl_context ctx,
goto error;
cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
- aligned_slice_pitch = (image_type == CL_MEM_OBJECT_IMAGE1D || image_type == CL_MEM_OBJECT_IMAGE2D
- || image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
- ? 0 : aligned_pitch * ALIGN(h, 2);
+ if (image_type == CL_MEM_OBJECT_IMAGE1D ||
+ image_type == CL_MEM_OBJECT_IMAGE2D ||
+ image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ aligned_slice_pitch = 0;
+ else
+ aligned_slice_pitch = aligned_pitch * ALIGN(h, 2);
cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt,
intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling,
@@ -832,10 +846,6 @@ cl_mem_new_image(cl_context context,
image_desc->image_row_pitch, image_desc->image_slice_pitch,
host_ptr, errcode_ret);
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
- return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
- image_desc->image_width, image_desc->image_array_size, image_desc->image_depth,
- image_desc->image_row_pitch, image_desc->image_slice_pitch,
- host_ptr, errcode_ret);
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
image_desc->image_width, image_desc->image_height, image_desc->image_array_size,
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index bb944c9a..d868a2f4 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -763,6 +763,7 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
memset(ss, 0, sizeof(*ss));
+ ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
ss->ss0.surface_type = intel_get_surface_type(type);
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
@@ -772,13 +773,8 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
ss->ss1.base_addr = obj_bo->offset;
ss->ss2.width = w - 1;
- if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
- ss->ss2.height = 1;
- ss->ss3.depth = h - 1;
- } else {
- ss->ss2.height = h - 1;
- ss->ss3.depth = depth - 1;
- }
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
ss->ss4.not_str_buf.rt_view_extent = depth - 1;
ss->ss4.not_str_buf.min_array_element = 0;
ss->ss3.pitch = pitch - 1;
@@ -814,6 +810,7 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
memset(ss, 0, sizeof(*ss));
+ ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
ss->ss0.surface_type = intel_get_surface_type(type);
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
@@ -822,13 +819,8 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
ss->ss0.surface_format = format;
ss->ss1.base_addr = obj_bo->offset;
ss->ss2.width = w - 1;
- if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
- ss->ss2.height = 1;
- ss->ss3.depth = h - 1;
- } else {
- ss->ss2.height = h - 1;
- ss->ss3.depth = depth - 1;
- }
+ ss->ss2.height = h - 1;
+ ss->ss3.depth = depth - 1;
ss->ss4.not_str_buf.rt_view_extent = depth - 1;
ss->ss4.not_str_buf.min_array_element = 0;
ss->ss3.pitch = pitch - 1;