summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2014-10-21 21:02:27 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-10-23 09:19:17 +0800
commit33bbe06cc67a034d66bf419c242f6d0cb8ac9248 (patch)
tree945038b3643644ef670d46755a35d3885864ae70
parent2edb7451a8f92295f79e29ef16740b5cd16127f2 (diff)
Fix the bug of 1D array slice pitch
For BDW, the vertical align is 4 at least. This cause the slice pitch twice as big as the Gen7 for 1D buffer array. Because the buffer tiling alignment may change for different GENs, we move it from run time to intel driver. V2: Fix all the bugs about 1d and 2d image array. And delete the tile align size which is useless. Also integrate two image array test cases into this patch set. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--src/cl_driver.h3
-rw-r--r--src/cl_driver_defs.c1
-rw-r--r--src/cl_mem.c19
-rw-r--r--src/intel/intel_driver.c39
-rw-r--r--src/intel/intel_gpgpu.c2
5 files changed, 50 insertions, 14 deletions
diff --git a/src/cl_driver.h b/src/cl_driver.h
index e973ba55..0603089e 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -360,6 +360,9 @@ extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
typedef int (cl_buffer_get_fd_cb)(cl_buffer, int *fd);
extern cl_buffer_get_fd_cb *cl_buffer_get_fd;
+typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t dim);
+extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align;
+
/* Get the device id */
typedef int (cl_driver_get_device_id_cb)(void);
extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 72f25d9a..665dad2a 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -48,6 +48,7 @@ LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = NULL;
LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL;
LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL;
+LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL;
/* cl_khr_gl_sharing */
LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 077f1d73..59265a30 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -610,13 +610,6 @@ cl_mem_copy_image(struct _cl_mem_image *image,
cl_mem_unmap_auto((cl_mem)image);
}
-static const uint32_t tile_sz = 4096; /* 4KB per tile */
-static const uint32_t tilex_w = 512; /* tileX width in bytes */
-static const uint32_t tilex_h = 8; /* tileX height in number of rows */
-static const uint32_t tiley_w = 128; /* tileY width in bytes */
-static const uint32_t tiley_h = 32; /* tileY height in number of rows */
-static const uint32_t valign = 2; /* vertical alignment is 2. */
-
cl_image_tiling_t cl_get_default_tiling(void)
{
static int initialized = 0;
@@ -749,13 +742,13 @@ _cl_mem_new_image(cl_context ctx,
/* Tiling requires to align both pitch and height */
if (tiling == CL_NO_TILE) {
aligned_pitch = w * bpp;
- aligned_h = ALIGN(h, valign);
+ aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
} else if (tiling == CL_TILE_X) {
- aligned_pitch = ALIGN(w * bpp, tilex_w);
- aligned_h = ALIGN(h, tilex_h);
+ aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
+ aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
} else if (tiling == CL_TILE_Y) {
- aligned_pitch = ALIGN(w * bpp, tiley_w);
- aligned_h = ALIGN(h, tiley_h);
+ aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 0));
+ aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 1));
}
sz = aligned_pitch * aligned_h * depth;
@@ -779,7 +772,7 @@ _cl_mem_new_image(cl_context ctx,
image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
aligned_slice_pitch = 0;
else
- aligned_slice_pitch = aligned_pitch * ALIGN(h, 2);
+ aligned_slice_pitch = aligned_pitch * ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt,
intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling,
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 2c2ed5f4..fd44dceb 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -476,6 +476,44 @@ static int get_cl_tiling(uint32_t drm_tiling)
return CL_NO_TILE;
}
+static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim)
+{
+ uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver;
+ uint32_t ret = 0;
+
+ switch (tiling_mode) {
+ case CL_TILE_X:
+ if (dim == 0) { //tileX width in bytes
+ ret = 512;
+ } else if (dim == 1) { //tileX height in number of rows
+ ret = 8;
+ } else
+ assert(0);
+ break;
+
+ case CL_TILE_Y:
+ if (dim == 0) { //tileY width in bytes
+ ret = 128;
+ } else if (dim == 1) { //tileY height in number of rows
+ ret = 32;
+ } else
+ assert(0);
+ break;
+
+ case CL_NO_TILE:
+ if (dim == 1) { //vertical alignment
+ if (gen_ver == 8)
+ ret = 4;
+ else
+ ret = 2;
+ } else
+ assert(0);
+ break;
+ }
+
+ return ret;
+}
+
#if defined(HAS_EGL)
#include "intel_dri_resource_sharing.h"
#include "cl_image.h"
@@ -741,5 +779,6 @@ intel_setup_callbacks(void)
cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
+ cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align;
intel_set_gpgpu_callbacks(intel_get_device_id());
}
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 167d8d93..6cd73d65 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1094,6 +1094,7 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
ss->ss0.surface_format = format;
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
+ ss->ss1.surface_qpitch = (h + 3)/4;
}
ss->ss0.horizontal_alignment = 1;
ss->ss0.vertical_alignment = 1;
@@ -1117,7 +1118,6 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
ss->ss3.surface_pitch = pitch - 1;
ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
- ss->ss7.red_clear_color = 1;
ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;