drm/i915: refactor & correct GEN6 PIPE_CONTROL code

The code in gen6_render_ring_flush() wasn't quite right; whereas the workarounds were emitted with GFX_OP_PIPE_CONTROL(5) (allowing for a QWord write), the final PIPE_CONTROL used GFX_OP_PIPE_CONTROL(4) even though it would also have the PIPE_CONTROL_QW_WRITE flag set. This may (should?) have resulted in the next instruction word being consumed as data, or the PIPE_CONTROL not working as intended due to the flags-vs- length mismatch. The refactored code has a Gen6-specific function which will emit a complete PIPE_CONTROL sequence, with the correct number of extension DWords. The emit-workarounds and emit-flush functions are then trivially expressed as calls to the new low-level function. Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
author: Dave Gordon <david.s.gordon@intel.com> 2016-03-07 19:37:43 +0000
committer: John Harrison <John.C.Harrison@Intel.com> 2016-06-28 17:19:29 +0100
commit: e16237614e25e6ba89d64663cfceb70dea789da8 (patch)
tree: 3b2adfbfa7dfd8a83a40d5126b65dd76e70ea570
parent: d35d0a2d2aa8347c497124b9e8912972fbd561ae (diff)
1 files changed, 42 insertions, 41 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c243b324c56c..24ab3da2a0ab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -161,6 +161,41 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req,
 	return 0;
 }
 
+/*
+ * Emit a PIPE_CONTROL opcode with the supplied flags.
+ * This routine will fill in the scratch address & data (which may
+ * be ignored, if the flags don't include a post-sync operation).
+ * Workarounds are the responsibility of the caller.
+ */
+static int
+gen6_pipecontrol_emit(struct drm_i915_gem_request *req, u32 flags)
+{
+	struct intel_engine_cs *engine = req->engine;
+	u32 scratch_addr;
+	int ret;
+
+	ret = intel_ring_begin(req, 6);
+	if (ret)
+		return ret;
+
+	/* GEN6 PIPE_CONTROL has the GTT bit in the address */
+	scratch_addr = engine->scratch.gtt_offset;
+	scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
+	scratch_addr += 2 * CACHELINE_BYTES;
+
+	intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(engine, flags);
+	intel_ring_emit(engine, scratch_addr);		/* address */
+	intel_ring_emit(engine, 0);			/* low dword */
+	intel_ring_emit(engine, 0);			/* high dword */
+
+	intel_ring_emit(engine, MI_NOOP);
+
+	intel_ring_advance(engine);
+
+	return 0;
+}
+
 /**
  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  * implementing two workarounds on gen6.  From section 1.4.7.1
@@ -199,54 +234,25 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req,
  * really our business.  That leaves only stall at scoreboard.
  */
 static int
-intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
+gen6_pipecontrol_workarounds_emit(struct drm_i915_gem_request *req)
 {
-	struct intel_engine_cs *engine = req->engine;
-	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
-	ret = intel_ring_begin(req, 6);
-	if (ret)
-		return ret;
-
-	intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5));
-	intel_ring_emit(engine, PIPE_CONTROL_CS_STALL |
-			PIPE_CONTROL_STALL_AT_SCOREBOARD);
-	intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
-	intel_ring_emit(engine, 0); /* low dword */
-	intel_ring_emit(engine, 0); /* high dword */
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_advance(engine);
-
-	ret = intel_ring_begin(req, 6);
+	ret = gen6_pipecontrol_emit(req, PIPE_CONTROL_CS_STALL |
+					 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5));
-	intel_ring_emit(engine, PIPE_CONTROL_QW_WRITE);
-	intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
-	intel_ring_emit(engine, 0);
-	intel_ring_emit(engine, 0);
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_advance(engine);
-
-	return 0;
+	return gen6_pipecontrol_emit(req, PIPE_CONTROL_QW_WRITE);
 }
 
 static int
 gen6_render_ring_flush(struct drm_i915_gem_request *req,
 		       u32 invalidate_domains, u32 flush_domains)
 {
-	struct intel_engine_cs *engine = req->engine;
 	u32 flags = 0;
-	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	int ret;
 
-	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(req);
-	if (ret)
-		return ret;
-
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
@@ -273,17 +279,12 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	}
 
-	ret = intel_ring_begin(req, 4);
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	ret = gen6_pipecontrol_workarounds_emit(req);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4));
-	intel_ring_emit(engine, flags);
-	intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(engine, 0);
-	intel_ring_advance(engine);
-
-	return 0;
+	return gen6_pipecontrol_emit(req, flags);
 }
 
 static int
author	Dave Gordon <david.s.gordon@intel.com>	2016-03-07 19:37:43 +0000
committer	John Harrison <John.C.Harrison@Intel.com>	2016-06-28 17:19:29 +0100
commit	e16237614e25e6ba89d64663cfceb70dea789da8 (patch)
tree	3b2adfbfa7dfd8a83a40d5126b65dd76e70ea570
parent	d35d0a2d2aa8347c497124b9e8912972fbd561ae (diff)