From 6e9c386d16b3b38be6d1496758ef983b64744844 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 6 Sep 2013 13:20:46 -0400
Subject: freedreno: avoid stalling at ringbuffer wraparound

Because of how the tiling works, we can't really flush at arbitrary
points very easily.  So wraparound is handled by resetting to top of
ringbuffer.  Previously this would stall until current rendering is
complete.  Instead cycle through multiple ringbuffers to avoid a stall.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/freedreno_context.c | 51 +++++++++++++----------
 src/gallium/drivers/freedreno/freedreno_context.h | 12 ++++++
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 1d03351f04..96e1ef6e57 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -34,27 +34,32 @@
 #include "freedreno_gmem.h"
 #include "freedreno_util.h"
 
-/* there are two cases where we currently need to wait for render complete:
- * 1) pctx->flush() .. since at the moment we have no way for DDX to sync
- *    the presentation blit with the 3d core
- * 2) wrap-around for ringbuffer.. possibly we can do something more
- *    Intelligent here.  Right now we need to ensure there is enough room
- *    at the end of the drawcmds in the cmdstream buffer for all the per-
- *    tile cmds.  We do this the lamest way possible, by making the ringbuffer
- *    big, and flushing and resetting back to the beginning if we get too
- *    close to the end.
- */
 static void
-fd_context_wait(struct pipe_context *pctx)
+fd_context_next_rb(struct pipe_context *pctx)
 {
 	struct fd_context *ctx = fd_context(pctx);
-	uint32_t ts = fd_ringbuffer_timestamp(ctx->ring);
+	struct fd_ringbuffer *ring;
+	uint32_t ts;
+
+	fd_ringmarker_del(ctx->draw_start);
+	fd_ringmarker_del(ctx->draw_end);
+
+	/* grab next ringbuffer: */
+	ring = ctx->rings[(ctx->rings_idx++) % ARRAY_SIZE(ctx->rings)];
 
-	DBG("wait: %u", ts);
+	/* wait for new rb to be idle: */
+	ts = fd_ringbuffer_timestamp(ring);
+	if (ts) {
+		DBG("wait: %u", ts);
+		fd_pipe_wait(ctx->screen->pipe, ts);
+	}
+
+	fd_ringbuffer_reset(ring);
+
+	ctx->draw_start = fd_ringmarker_new(ring);
+	ctx->draw_end = fd_ringmarker_new(ring);
 
-	fd_pipe_wait(ctx->screen->pipe, ts);
-	fd_ringbuffer_reset(ctx->ring);
-	fd_ringmarker_mark(ctx->draw_start);
+	ctx->ring = ring;
 }
 
 /* emit accumulated render cmds, needed for example if render target has
@@ -79,7 +84,7 @@ fd_context_render(struct pipe_context *pctx)
 	 * wrap around:
 	 */
 	if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
-		fd_context_wait(pctx);
+		fd_context_next_rb(pctx);
 
 	ctx->needs_flush = false;
 	ctx->cleared = ctx->restore = ctx->resolve = 0;
@@ -131,6 +136,7 @@ fd_context_init(struct fd_context *ctx,
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct pipe_context *pctx;
+	int i;
 
 	ctx->screen = screen;
 
@@ -144,12 +150,13 @@ fd_context_init(struct fd_context *ctx,
 	pctx->priv = priv;
 	pctx->flush = fd_context_flush;
 
-	ctx->ring = fd_ringbuffer_new(screen->pipe, 0x100000);
-	if (!ctx->ring)
-		goto fail;
+	for (i = 0; i < ARRAY_SIZE(ctx->rings); i++) {
+		ctx->rings[i] = fd_ringbuffer_new(screen->pipe, 0x400000);
+		if (!ctx->rings[i])
+			goto fail;
+	}
 
-	ctx->draw_start = fd_ringmarker_new(ctx->ring);
-	ctx->draw_end = fd_ringmarker_new(ctx->ring);
+	fd_context_next_rb(pctx);
 
 	util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
 			16, UTIL_SLAB_SINGLETHREADED);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 3d18260445..808e3a72f0 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -134,6 +134,18 @@ struct fd_context {
 	} gmem_reason;
 	unsigned num_draws;
 
+	/* we can't really sanely deal with wraparound point in ringbuffer
+	 * and because of the way tiling works we can't really flush at
+	 * arbitrary points (without a big performance hit).  When we get
+	 * too close to the end of the current ringbuffer, cycle to the next
+	 * one (and wait for pending rendering from next rb to complete).
+	 * We want the # of ringbuffers to be high enough that we don't
+	 * normally have to wait before resetting to the start of the next
+	 * rb.
+	 */
+	struct fd_ringbuffer *rings[4];
+	unsigned rings_idx;
+
 	struct fd_ringbuffer *ring;
 	struct fd_ringmarker *draw_start, *draw_end;
 
-- 
cgit v1.2.3