summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2014-11-19 13:20:41 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-11-21 13:36:30 +0800
commit34a91f258aa0e0a8004a1019647efa245b28fc5e (patch)
tree8c17e7fe784145cee26eab5d70c49c6bf0434d45 /src
parent6fd5b39a18153be70d8cf22a115973188da4829a (diff)
BDW: Change the default tiling mode to TILING_Y on BDW.
TILING_Y's performance is better than TILING_X'S on BDW, but almost same on IVB/HSW. Using the TILING_Y as default tiling mode temporary, still need to find out the root cause why different behavior between BDW and IVB/HSW. V2: still using static and only initialize once. Signed-off-by: Yang Rong <rong.r.yang@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_mem.c10
1 files changed, 7 insertions, 3 deletions
diff --git a/src/cl_mem.c b/src/cl_mem.c
index d60c59b5..3323897b 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -637,11 +637,15 @@ cl_mem_copy_image(struct _cl_mem_image *image,
cl_mem_unmap_auto((cl_mem)image);
}
-cl_image_tiling_t cl_get_default_tiling(void)
+cl_image_tiling_t cl_get_default_tiling(cl_driver drv)
{
static int initialized = 0;
static cl_image_tiling_t tiling = CL_TILE_X;
+
if (!initialized) {
+ // FIXME, need to find out the performance diff's root cause on BDW.
+ if(cl_driver_get_ver(drv) == 8)
+ tiling = CL_TILE_Y;
char *tilingStr = getenv("OCL_TILING");
if (tilingStr != NULL) {
switch (tilingStr[0]) {
@@ -733,7 +737,7 @@ _cl_mem_new_image(cl_context ctx,
/* Pick up tiling mode (we do only linear on SNB) */
if (cl_driver_get_ver(ctx->drv) != 6)
- tiling = cl_get_default_tiling();
+ tiling = cl_get_default_tiling(ctx->drv);
depth = 1;
} else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
@@ -743,7 +747,7 @@ _cl_mem_new_image(cl_context ctx,
h = 1;
tiling = CL_NO_TILE;
} else if (cl_driver_get_ver(ctx->drv) != 6)
- tiling = cl_get_default_tiling();
+ tiling = cl_get_default_tiling(ctx->drv);
size_t min_pitch = bpp * w;
if (data && pitch == 0)