drm/i915: Allow scheduler to manage inter-ring object synchronisation

The scheduler has always tracked batch buffer dependencies based on DRM object usage. This means that it will not submit a batch on one ring that has outstanding dependencies still executing on other rings. This is exactly the same synchronisation performed by i915_gem_object_sync() using hardware semaphores where available and CPU stalls where not (e.g. in execlist mode and/or on Gen8 hardware). Unfortunately, when a batch buffer is submitted to the driver the _object_sync() call happens first. Thus in case where hardware semaphores are disabled, the driver has already stalled until the dependency has been resolved. This patch adds an optimisation to _object_sync() to ignore the synchronisation in the case where it will subsequently be handled by the scheduler. This removes the driver stall and (in the single application case) provides near hardware semaphore performance even when hardware semaphores are disabled. In a busy system where there is other work that can be executed on the stalling ring, it provides better than hardware semaphore performance as it removes the stall from both the driver and from the hardware. There is also a theory that this method should improve power usage as hardware semaphores are apparently not very power efficient - the stalled ring does not go into as low a power a state as when it is genuinely idle. The optimisation is to check whether both ends of the synchronisation are batch buffer requests. If they are, then the scheduler will have the inter-dependency tracked and managed. If one or other end is not a batch buffer request (e.g. a page flip) then the code falls back to the CPU stall or hardware semaphore as appropriate. To check whether the existing usage is a batch buffer, the code simply calls the 'are you tracking this request' function of the scheduler on the object's last_read_req member. To check whether the new usage is a batch buffer, a flag is passed in from the caller. v6: Updated to newer nightly (lots of ring -> engine renaming). Replaced the i915_scheduler_is_request_tracked() function with i915_scheduler_is_request_batch_buffer() as the need for the former has gone away and it was really being used to ask the latter question in a convoluted manner. [review feedback from Joonas Lahtinen] Issue: VIZ-5566 Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
author: John Harrison <John.C.Harrison@Intel.com> 2015-04-28 11:39:41 +0100
committer: John Harrison <John.C.Harrison@Intel.com> 2016-05-06 14:13:06 +0100
commit: d24b6c144b0e3e1f0af83400d1995f6cbe97c802 (patch)
tree: 8b555c988765bd8e01c6e627afb6148cf8c2c322
parent: 1547967b7af347222e33c4e95f2301e9b0582138 (diff)
7 files changed, 37 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4d5d059bcab9..cb346ab7afb0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3101,7 +3101,7 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct intel_engine_cs *to,
-			 struct drm_i915_gem_request **to_req);
+			 struct drm_i915_gem_request **to_req, bool to_batch);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req);
 int i915_gem_dumb_create(struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 30f5d3fd08dc..47f57dbd2b6b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3658,7 +3658,7 @@ static int
 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		       struct intel_engine_cs *to,
 		       struct drm_i915_gem_request *from_req,
-		       struct drm_i915_gem_request **to_req)
+		       struct drm_i915_gem_request **to_req, bool to_batch)
 {
 	struct intel_engine_cs *from;
 	int ret;
@@ -3670,6 +3670,14 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	if (i915_gem_request_completed(from_req))
 		return 0;
 
+	/*
+	 * The scheduler will manage inter-ring object dependencies
+	 * as long as both to and from requests are scheduler managed
+	 * (i.e. batch buffers).
+	 */
+	if (to_batch && i915_scheduler_is_request_batch_buffer(from_req))
+		return 0;
+
 	if (!i915_semaphore_is_enabled(obj->base.dev)) {
 		struct drm_i915_private *i915 = to_i915(obj->base.dev);
 		uint32_t flags;
@@ -3728,6 +3736,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * @to_req: request we wish to use the object for. See below.
  *          This will be allocated and returned if a request is
  *          required but not passed in.
+ * @to_batch: is the sync request on behalf of batch buffer submission?
+ * If so then the scheduler can (potentially) manage the synchronisation.
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
@@ -3758,7 +3768,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		     struct intel_engine_cs *to,
-		     struct drm_i915_gem_request **to_req)
+		     struct drm_i915_gem_request **to_req, bool to_batch)
 {
 	const bool readonly = obj->base.pending_write_domain == 0;
 	struct drm_i915_gem_request *req[I915_NUM_ENGINES];
@@ -3780,7 +3790,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 				req[n++] = obj->last_read_req[i];
 	}
 	for (i = 0; i < n; i++) {
-		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
+		ret = __i915_gem_object_sync(obj, to, req[i], to_req, to_batch);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 54175d8c71c9..c1205f096451 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -954,7 +954,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->engine, &req);
+			ret = i915_gem_object_sync(obj, req->engine, &req, true);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index d1f819c95750..46953609bb38 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -1346,6 +1346,25 @@ bool i915_scheduler_is_mutex_required(struct drm_i915_gem_request *req)
 }
 
 /**
+ * i915_scheduler_is_request_batch_buffer - is this request related to a
+ * batch buffer object?
+ * @req: request to be queried
+ *
+ * Returns true if the given request is for a batch buffer. False means it
+ * is for something else - page flip, context initialisation, etc.
+ */
+bool i915_scheduler_is_request_batch_buffer(struct drm_i915_gem_request *req)
+{
+	if (req->scheduler_qe == NULL)
+		return false;
+
+	if (req->scheduler_qe->params.batch_obj == NULL)
+		return false;
+
+	return true;
+}
+
+/**
  * i915_scheduler_closefile - notify the scheduler that a DRM file handle
  * has been closed.
  * @dev: DRM device
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 0017b40f1314..da80f792e566 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -151,6 +151,7 @@ int i915_scheduler_flush(struct intel_engine_cs *engine, bool is_locked);
 int i915_scheduler_flush_stamp(struct intel_engine_cs *engine,
 			       unsigned long stamp, bool is_locked);
 bool i915_scheduler_is_mutex_required(struct drm_i915_gem_request *req);
+bool i915_scheduler_is_request_batch_buffer(struct drm_i915_gem_request *req);
 int i915_scheduler_query_stats(struct intel_engine_cs *engine,
 			       struct i915_scheduler_stats_nodes *stats);
 bool i915_scheduler_file_queue_wait(struct drm_file *file,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 54274d3ec601..c2b1d5fd0a10 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11604,7 +11604,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	 * into the display plane and skip any waits.
 	 */
 	if (!mmio_flip) {
-		ret = i915_gem_object_sync(obj, engine, &request);
+		ret = i915_gem_object_sync(obj, engine, &request, false);
 		if (ret)
 			goto cleanup_pending;
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 02808f71ee82..c1263e6f8946 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -698,7 +698,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->engine, &req);
+			ret = i915_gem_object_sync(obj, req->engine, &req, true);
 			if (ret)
 				return ret;
 		}
author	John Harrison <John.C.Harrison@Intel.com>	2015-04-28 11:39:41 +0100
committer	John Harrison <John.C.Harrison@Intel.com>	2016-05-06 14:13:06 +0100
commit	d24b6c144b0e3e1f0af83400d1995f6cbe97c802 (patch)
tree	8b555c988765bd8e01c6e627afb6148cf8c2c322
parent	1547967b7af347222e33c4e95f2301e9b0582138 (diff)