summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrigori Goronzy <greg@chown.ath.cx>2016-04-15 19:39:02 +0200
committerGrigori Goronzy <greg@chown.ath.cx>2016-04-15 19:39:02 +0200
commitb905404fcffd3924c9ad78f0c8d4a8e1707f4d09 (patch)
treef7cf390ed635155894c6b86b5efd1c63874c825a
parent88b8d97b0d9d601d8ce4f73ca0785af98317e33f (diff)
amdgpu/winsys: adjust IB size based on buffer wait timeib-size-tune
Small IBs help to reduce stalls for workloads that require a lot of synchronization. On the other hand, if there is no notable synchronization, we can use a large IB size to slightly improve performance in some cases. This introduces tuning of the IB size based on feedback on the average buffer wait time. The average wait time is tracked with exponential smoothing.
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c2
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c8
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h1
3 files changed, 9 insertions, 2 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 036301ef08..1e441e5981 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -195,6 +195,7 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
return NULL;
}
}
+ bo->ws->buffer_wait_time_avg = (3 * bo->ws->buffer_wait_time_avg) / 4;
} else {
uint64_t time = os_time_get_nano();
@@ -222,6 +223,7 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
}
bo->ws->buffer_wait_time += os_time_get_nano() - time;
+ bo->ws->buffer_wait_time_avg = (3 * bo->ws->buffer_wait_time_avg + os_time_get_nano() - time) / 4;
}
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 3ea0f3d55d..a9af0ce8eb 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -201,12 +201,16 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_ib *ib,
struct amdgpu_cs_ib_info *info, unsigned ib_type)
{
+ unsigned buffer_size = 128 * 1024 * 4;
+ unsigned ib_size = 32 * 1024 * 4;
+
/* Small IBs are better than big IBs, because the GPU goes idle quicker
* and there is less waiting for buffers and fences. Proof:
* http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
*/
- unsigned buffer_size = 128 * 1024 * 4;
- unsigned ib_size = 20 * 1024 * 4;
+ uint64_t avg = ((struct amdgpu_winsys *)ws)->buffer_wait_time_avg;
+ if (avg > 1E4)
+ ib_size = 10 * 1024 * 4;
if (ib_type == IB_CONST) {
buffer_size = 512 * 1024 * 4;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 91b9be4bb3..56be13e51c 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -54,6 +54,7 @@ struct amdgpu_winsys {
uint64_t allocated_vram;
uint64_t allocated_gtt;
uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
+ uint64_t buffer_wait_time_avg;
uint64_t num_cs_flushes;
unsigned gart_page_size;