drm/i915: Inline sg_next() for the optimised SGL iterator

Avoiding the out-of-line call to sg_next() reduces the kernel execution overhead by 10% in some workloads (for example the Unreal Engine 4 demo Atlantis on 2GiB GTTs) which are dominated by the cost of inserting PTEs due to texture thrashing. We can demonstrate this in a microbenchmark that forces us to rebind the object on every execbuf, where we can measure a 25% improvement, in the time required to execute an execbuf requiring a texture to be rebound, for inlining the sg_next() for large texture sizes. Benchmark: igt/benchmarks/gem_exec_fault Benchmark: igt/benchmarks/gem_exec_trace/Atlantis Signed-off-by: Dave Gordon <david.s.gordon@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1463741647-15666-5-git-send-email-chris@chris-wilson.co.uk
author: Dave Gordon <david.s.gordon@intel.com> 2016-05-20 11:54:07 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2016-05-20 13:43:00 +0100
commit: 63d15326168d54cfef082f6899c86f8260db4037 (patch)
tree: 572c3a0e87f9f1b1c4ec0a551d8e5518f2893317
parent: 85d1225ec066b2ef46fbd0ed1bae78ae1f3e6c91 (diff)
1 files changed, 21 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 63ff5fa2b2bd..e2abd9eb352b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2278,6 +2278,25 @@ static __always_inline struct sgt_iter {
 }
 
 /**
+ * __sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Description:
+ *   If the entry is the last, return NULL; otherwise, step to the next
+ *   element in the array (@sg@+1). If that's a chain pointer, follow it;
+ *   otherwise just return the pointer to the current element.
+ **/
+static inline struct scatterlist *__sg_next(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
+	return sg_is_last(sg) ? NULL :
+		likely(!sg_is_chain(++sg)) ? sg :
+		sg_chain_ptr(sg);
+}
+
+/**
  * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
  * @__dmap:	DMA address (output)
  * @__iter:	'struct sgt_iter' (iterator state, internal)
@@ -2287,7 +2306,7 @@ static __always_inline struct sgt_iter {
 	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
 	     ((__dmap) = (__iter).dma + (__iter).curr);			\
 	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
-	     ((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
+	     ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
 
 /**
  * for_each_sgt_page - iterate over the pages of the given sg_table
@@ -2300,7 +2319,7 @@ static __always_inline struct sgt_iter {
 	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
 	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
 	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
-	     ((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))
+	     ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
 
 /**
  * Request queue structure.
author	Dave Gordon <david.s.gordon@intel.com>	2016-05-20 11:54:07 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2016-05-20 13:43:00 +0100
commit	63d15326168d54cfef082f6899c86f8260db4037 (patch)
tree	572c3a0e87f9f1b1c4ec0a551d8e5518f2893317
parent	85d1225ec066b2ef46fbd0ed1bae78ae1f3e6c91 (diff)