drm/i915: Added scheduler statistic reporting to debugfs

It is useful for know what the scheduler is doing for both debugging and performance analysis purposes. This change adds a bunch of counters and such that keep track of various scheduler operations (batches submitted, completed, flush requests, etc.). The data can then be read in userland via the debugfs mechanism. v2: Updated to match changes to scheduler implementation. v3: Updated for changes to kill code and flush code. v4: Removed the fence/sync code as that will be part of a separate patch series. Wrapped a long line to keep the style checker happy. v5: Updated to remove forward declarations and white space. Added documentation. [Joonas Lahtinen] Used lighter weight spinlocks. v6: Updated to newer nightly (lots of ring -> engine renaming). Added 'for_each_scheduler_node()' and 'assert_scheduler_lock_held()' helper macros. Updated to use 'to_i915()' instead of dev_private. Converted all enum labels to uppercase. Removed even more white space. Moved the enum to string conversion function to debugfs.c rather than scheduler.c [review feedback from Joonas Lahtinen] Added running totals of 'flying' and 'queued' nodes rather than re-calculating each time as a minor CPU performance optimisation. Added stats to the new file queue wait implementation. v6.1: Added a target engine parameter to the throttle function. This is used for tracking per engine throttle stats introduced in the v6 update. This fixes a random memory corruption bug caused by using an invalid engine pointer. For: VIZ-1587 Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
author: John Harrison <John.C.Harrison@Intel.com> 2014-05-08 15:20:19 +0100
committer: John Harrison <John.C.Harrison@Intel.com> 2016-05-06 14:13:03 +0100
commit: 9f71cf32e83dc7c4585ea687b7328396e7bc531b (patch)
tree: 76e93d0d81220cea0705279073d0ea0874fd0413
parent: 75988accb62589f9c11e379470ad283646ce40e3 (diff)
4 files changed, 237 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 980bb20ed28a..1d04cde7ba13 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3609,6 +3609,116 @@ static int i915_drrs_status(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static const char *i915_scheduler_queue_status_str(
+				enum i915_scheduler_queue_status status)
+{
+	static char	str[50];
+
+	switch (status) {
+	case I915_SQS_NONE:
+	return "None";
+
+	case I915_SQS_QUEUED:
+	return "Queued";
+
+	case I915_SQS_POPPED:
+	return "Popped";
+
+	case I915_SQS_FLYING:
+	return "Flying";
+
+	case I915_SQS_COMPLETE:
+	return "Complete";
+
+	case I915_SQS_DEAD:
+	return "Dead";
+
+	case I915_SQS_MAX:
+	return "Invalid";
+
+	default:
+	break;
+	}
+
+	sprintf(str, "[Unknown_%d!]", status);
+	return str;
+}
+
+static int i915_scheduler_info(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_stats *stats = scheduler->stats;
+	struct i915_scheduler_stats_nodes node_stats[I915_NUM_ENGINES];
+	struct intel_engine_cs *engine;
+	char   str[50 * (I915_NUM_ENGINES + 1)], name[50], *ptr;
+	int ret, i, e;
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (ret)
+		return ret;
+
+#define PRINT_VAR(name, fmt, var)					\
+	do {								\
+		sprintf(str, "%-22s", name);				\
+		ptr = str + strlen(str);				\
+		for_each_engine_id(engine, dev_priv, e) {		\
+			sprintf(ptr, " %10" fmt, var);			\
+			ptr += strlen(ptr);				\
+		}							\
+		seq_printf(m, "%s\n", str);				\
+	} while (0)
+
+	PRINT_VAR("Engine name:",           "s", dev_priv->engine[e].name);
+	PRINT_VAR("  Engine seqno",         "d", engine->get_seqno(engine));
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Batch submissions:\n");
+	PRINT_VAR("  Queued",               "u", stats[e].queued);
+	PRINT_VAR("  Submitted",            "u", stats[e].submitted);
+	PRINT_VAR("  Completed",            "u", stats[e].completed);
+	PRINT_VAR("  Expired",              "u", stats[e].expired);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Flush counts:\n");
+	PRINT_VAR("  By object",            "u", stats[e].flush_obj);
+	PRINT_VAR("  By request",           "u", stats[e].flush_req);
+	PRINT_VAR("  By stamp",             "u", stats[e].flush_stamp);
+	PRINT_VAR("  Blanket",              "u", stats[e].flush_all);
+	PRINT_VAR("  Entries bumped",       "u", stats[e].flush_bump);
+	PRINT_VAR("  Entries submitted",    "u", stats[e].flush_submit);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Miscellaneous:\n");
+	PRINT_VAR("  ExecEarly retry",      "u", stats[e].exec_early);
+	PRINT_VAR("  ExecFinal requeue",    "u", stats[e].exec_again);
+	PRINT_VAR("  ExecFinal killed",     "u", stats[e].exec_dead);
+	PRINT_VAR("  Hung flying",          "u", stats[e].kill_flying);
+	PRINT_VAR("  Hung queued",          "u", stats[e].kill_queued);
+	PRINT_VAR("  File queue wait",      "u", stats[e].file_wait);
+	PRINT_VAR("  File queue stall",     "u", stats[e].file_stall);
+	PRINT_VAR("  File queue lost",      "u", stats[e].file_lost);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Queue contents:\n");
+	for_each_engine(engine, dev_priv)
+		i915_scheduler_query_stats(engine, node_stats + engine->id);
+
+	for (i = 0; i < (I915_SQS_MAX + 1); i++) {
+		sprintf(name, "  %s", i915_scheduler_queue_status_str(i));
+		PRINT_VAR(name, "d", node_stats[e].counts[i]);
+	}
+	seq_putc(m, '\n');
+
+#undef PRINT_VAR
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return 0;
+}
+
 struct pipe_crc_info {
 	const char *name;
 	struct drm_device *dev;
@@ -5585,6 +5695,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_semaphore_status", i915_semaphore_status, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
+	{"i915_scheduler_info", i915_scheduler_info, 0},
 	{"i915_wa_registers", i915_wa_registers, 0},
 	{"i915_ddb_info", i915_ddb_info, 0},
 	{"i915_sseu_status", i915_sseu_status, 0},
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f9de86a61d42..89851780cad4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1561,7 +1561,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	/* Throttle batch requests per device file */
-	if (i915_scheduler_file_queue_wait(file))
+	if (i915_scheduler_file_queue_wait(file, engine))
 		return -EAGAIN;
 
 	intel_runtime_pm_get(dev_priv);
@@ -1811,6 +1811,9 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+
+	dev_priv->scheduler->stats[engine->id].exec_early++;
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 3569ddde6258..123f1dcd5a26 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -142,9 +142,12 @@ static void i915_scheduler_node_kill(struct i915_scheduler *scheduler,
 
 	if (I915_SQS_IS_FLYING(node)) {
 		scheduler->counts[node->params.engine->id].flying--;
+		scheduler->stats[node->params.engine->id].kill_flying++;
 		trace_i915_scheduler_unfly(node->params.engine, node);
-	} else
+	} else {
 		scheduler->counts[node->params.engine->id].queued--;
+		scheduler->stats[node->params.engine->id].kill_queued++;
+	}
 
 	node->status = I915_SQS_DEAD;
 	trace_i915_scheduler_node_state_change(node->params.engine, node);
@@ -390,6 +393,8 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 		 */
 		i915_scheduler_node_fly(node);
 
+		scheduler->stats[engine->id].submitted++;
+
 		spin_unlock_irq(&scheduler->lock);
 		ret = dev_priv->gt.execbuf_final(&node->params);
 		spin_lock_irq(&scheduler->lock);
@@ -413,6 +418,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 			case ENOENT:
 				/* Fatal errors. Kill the node. */
 				requeue = false;
+				scheduler->stats[engine->id].exec_dead++;
 				i915_scheduler_node_kill(scheduler, node);
 				break;
 
@@ -423,6 +429,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 			case ERESTARTSYS:
 			case EINTR:
 				/* Supposedly recoverable errors. */
+				scheduler->stats[engine->id].exec_again++;
 				break;
 
 			default:
@@ -431,6 +438,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 				 * for the best.
 				 */
 				MISSING_CASE(-ret);
+				scheduler->stats[engine->id].exec_again++;
 				break;
 			}
 
@@ -574,12 +582,15 @@ static int i915_scheduler_queue_execbuffer_bypass(struct i915_scheduler_queue_en
 	struct i915_scheduler *scheduler = dev_priv->scheduler;
 	int ret;
 
+	scheduler->stats[qe->params.engine->id].queued++;
+
 	trace_i915_scheduler_queue(qe->params.engine, qe);
 
 	intel_ring_reserved_space_cancel(qe->params.request->ringbuf);
 
 	scheduler->flags[qe->params.engine->id] |= I915_SF_SUBMITTING;
 	ret = dev_priv->gt.execbuf_final(&qe->params);
+	scheduler->stats[qe->params.engine->id].submitted++;
 	scheduler->flags[qe->params.engine->id] &= ~I915_SF_SUBMITTING;
 
 	/*
@@ -593,6 +604,8 @@ static int i915_scheduler_queue_execbuffer_bypass(struct i915_scheduler_queue_en
 	qe->status = I915_SQS_COMPLETE;
 	i915_scheduler_clean_node(qe);
 
+	scheduler->stats[qe->params.engine->id].expired++;
+
 	return 0;
 }
 
@@ -704,6 +717,8 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
 						 scheduler->min_flying;
 
 	scheduler->counts[engine->id].queued++;
+	scheduler->stats[engine->id].queued++;
+
 	trace_i915_scheduler_queue(engine, node);
 	trace_i915_scheduler_node_state_change(engine, node);
 
@@ -746,10 +761,13 @@ bool i915_scheduler_notify_request(struct drm_i915_gem_request *req)
 	WARN_ON(!I915_SQS_IS_FLYING(node));
 
 	/* Node was in flight so mark it as complete. */
-	if (req->cancelled)
+	if (req->cancelled) {
 		node->status = I915_SQS_DEAD;
-	else
+		scheduler->stats[req->engine->id].kill_flying++;
+	} else {
 		node->status = I915_SQS_COMPLETE;
+		scheduler->stats[req->engine->id].completed++;
+	}
 
 	scheduler->counts[req->engine->id].flying--;
 	trace_i915_scheduler_node_state_change(req->engine, node);
@@ -892,6 +910,7 @@ static bool i915_scheduler_remove(struct i915_scheduler *scheduler,
 
 		list_del(&node->link);
 		list_add(&node->link, remove);
+		scheduler->stats[engine->id].expired++;
 
 		/* Strip the dependency info while the mutex is still locked */
 		i915_scheduler_remove_dependent(scheduler, node);
@@ -997,13 +1016,15 @@ void i915_scheduler_work_handler(struct work_struct *work)
 /**
  * i915_scheduler_file_queue_wait - Waits for space in the per file queue.
  * @file: File object to process.
+ * @engine: Engine being submitted to
  * This allows throttling of applications by limiting the total number of
  * outstanding requests to a specified level. Once that limit is reached,
  * this call will stall waiting on the oldest outstanding request. If it can
  * not stall for any reason it returns true to mean that the queue is full
  * and no more requests should be accepted.
  */
-bool i915_scheduler_file_queue_wait(struct drm_file *file)
+bool i915_scheduler_file_queue_wait(struct drm_file *file,
+				    struct intel_engine_cs *target)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_private *dev_priv = file_priv->dev_priv;
@@ -1048,12 +1069,14 @@ bool i915_scheduler_file_queue_wait(struct drm_file *file)
 		}
 
 		if (!req) {
+			scheduler->stats[target->id].file_lost++;
 			spin_unlock_irq(&scheduler->lock);
 			return false;
 		}
 
 		i915_gem_request_reference(req);
 
+		scheduler->stats[target->id].file_wait++;
 		spin_unlock_irq(&scheduler->lock);
 
 		ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
@@ -1077,9 +1100,60 @@ bool i915_scheduler_file_queue_wait(struct drm_file *file)
 
 err_unref:
 	i915_gem_request_unreference(req);
+
+	spin_lock_irq(&scheduler->lock);
+	scheduler->stats[target->id].file_wait--;
+	scheduler->stats[target->id].file_stall++;
+	spin_unlock_irq(&scheduler->lock);
+
 	return true;
 }
 
+/**
+ * i915_scheduler_query_stats - return various scheduler statistics
+ * @engine: Engine to report on
+ * @stats: Stats structure to be filled in
+ * For various reasons (debugging, performance analysis, curiosity) it is
+ * useful to see statistics about what the scheduler is doing. This function
+ * returns the stats that have been gathered in a data structure. The
+ * expectation is that this will be returned to the user via debugfs.
+ */
+int i915_scheduler_query_stats(struct intel_engine_cs *engine,
+			       struct i915_scheduler_stats_nodes *stats)
+{
+	struct drm_i915_private *dev_priv = to_i915(engine->dev);
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_queue_entry *node;
+
+	memset(stats, 0x00, sizeof(*stats));
+
+	spin_lock_irq(&scheduler->lock);
+
+	for_each_scheduler_node(node, engine->id) {
+		if (node->status >= I915_SQS_MAX) {
+			DRM_DEBUG_DRIVER("Invalid node state: %d! [uniq = %d, seqno = %d]\n",
+					 node->status, node->params.request->uniq,
+					 node->params.request->seqno);
+
+			stats->counts[I915_SQS_MAX]++;
+			continue;
+		}
+
+		stats->counts[node->status]++;
+	}
+
+	WARN(stats->counts[I915_SQS_QUEUED] != scheduler->counts[engine->id].queued,
+	     "Queued count mis-match: %d vs %d!\n",
+	     stats->counts[I915_SQS_QUEUED], scheduler->counts[engine->id].queued);
+	WARN(stats->counts[I915_SQS_FLYING] != scheduler->counts[engine->id].flying,
+	     "Flying count mis-match: %d vs %d!\n",
+	     stats->counts[I915_SQS_FLYING], scheduler->counts[engine->id].flying);
+
+	spin_unlock_irq(&scheduler->lock);
+
+	return 0;
+}
+
 static int i915_scheduler_submit_max_priority(struct intel_engine_cs *engine,
 					      bool is_locked)
 {
@@ -1160,6 +1234,7 @@ int i915_scheduler_flush_stamp(struct intel_engine_cs *engine,
 	}
 
 	spin_lock_irq(&scheduler->lock);
+	scheduler->stats[engine->id].flush_stamp++;
 	i915_scheduler_priority_bump_clear(scheduler);
 	for_each_scheduler_node(node, engine->id) {
 		if (!I915_SQS_IS_QUEUED(node))
@@ -1170,12 +1245,15 @@ int i915_scheduler_flush_stamp(struct intel_engine_cs *engine,
 
 		flush_count = i915_scheduler_priority_bump(scheduler,
 					node, scheduler->priority_level_max);
+		scheduler->stats[engine->id].flush_bump += flush_count;
 	}
 	spin_unlock_irq(&scheduler->lock);
 
 	if (flush_count) {
 		DRM_DEBUG_DRIVER("<%s> Bumped %d entries\n", engine->name, flush_count);
 		flush_count = i915_scheduler_submit_max_priority(engine, is_locked);
+		if (flush_count > 0)
+			scheduler->stats[engine->id].flush_submit += flush_count;
 	}
 
 	return flush_count;
@@ -1211,6 +1289,8 @@ int i915_scheduler_flush(struct intel_engine_cs *engine, bool is_locked)
 
 	WARN_ON(is_locked && (scheduler->flags[engine->id] & I915_SF_SUBMITTING));
 
+	scheduler->stats[engine->id].flush_all++;
+
 	do {
 		found = false;
 		spin_lock_irq(&scheduler->lock);
@@ -1228,6 +1308,7 @@ int i915_scheduler_flush(struct intel_engine_cs *engine, bool is_locked)
 				ret = i915_scheduler_submit(engine);
 			else
 				ret = i915_scheduler_submit_unlocked(engine);
+			scheduler->stats[engine->id].flush_submit++;
 			if (ret < 0)
 				return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 5c33c83630a7..464c051fede6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -80,6 +80,36 @@ struct i915_scheduler_node_states {
 	uint32_t queued;
 };
 
+struct i915_scheduler_stats {
+	/* Batch buffer counts: */
+	uint32_t queued;
+	uint32_t submitted;
+	uint32_t completed;
+	uint32_t expired;
+
+	/* Other stuff: */
+	uint32_t flush_obj;
+	uint32_t flush_req;
+	uint32_t flush_stamp;
+	uint32_t flush_all;
+	uint32_t flush_bump;
+	uint32_t flush_submit;
+
+	uint32_t exec_early;
+	uint32_t exec_again;
+	uint32_t exec_dead;
+	uint32_t kill_flying;
+	uint32_t kill_queued;
+
+	uint32_t file_wait;
+	uint32_t file_stall;
+	uint32_t file_lost;
+};
+
+struct i915_scheduler_stats_nodes {
+	uint32_t counts[I915_SQS_MAX + 1];
+};
+
 struct i915_scheduler {
 	struct list_head node_queue[I915_NUM_ENGINES];
 	uint32_t flags[I915_NUM_ENGINES];
@@ -95,6 +125,9 @@ struct i915_scheduler {
 	int32_t priority_level_preempt;
 	uint32_t min_flying;
 	uint32_t file_queue_max;
+
+	/* Statistics: */
+	struct i915_scheduler_stats stats[I915_NUM_ENGINES];
 };
 
 /* Flag bits for i915_scheduler::flags */
@@ -117,6 +150,9 @@ int i915_scheduler_flush(struct intel_engine_cs *engine, bool is_locked);
 int i915_scheduler_flush_stamp(struct intel_engine_cs *engine,
 			       unsigned long stamp, bool is_locked);
 bool i915_scheduler_is_mutex_required(struct drm_i915_gem_request *req);
-bool i915_scheduler_file_queue_wait(struct drm_file *file);
+int i915_scheduler_query_stats(struct intel_engine_cs *engine,
+			       struct i915_scheduler_stats_nodes *stats);
+bool i915_scheduler_file_queue_wait(struct drm_file *file,
+				    struct intel_engine_cs *engine);
 
 #endif  /* _I915_SCHEDULER_H_ */
author	John Harrison <John.C.Harrison@Intel.com>	2014-05-08 15:20:19 +0100
committer	John Harrison <John.C.Harrison@Intel.com>	2016-05-06 14:13:03 +0100
commit	9f71cf32e83dc7c4585ea687b7328396e7bc531b (patch)
tree	76e93d0d81220cea0705279073d0ea0874fd0413
parent	75988accb62589f9c11e379470ad283646ce40e3 (diff)