i915/gem_exec_latency: Measure the latency of context switching

Measure the baseline latency between contexts in order to directly compare that with the additional cost of preemption. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-03-13 23:50:57 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2020-12-13 12:01:31 +0000
commit: 3c7f8f6a85aa8943e14b32c7a9e01aad9d1f53fa (patch)
tree: 5189fc65d09026872d1a6a6b5f08d1f285f7cac3
parent: e31a5224a86dc335686f2185f9ea0641c4124236 (diff)
1 files changed, 220 insertions, 10 deletions
diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index 6c5a4bc6..d8810f75 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -412,6 +412,87 @@ static void latency_from_ring(int fd,
 	}
 }
 
+static void execution_latency(int i915, unsigned int ring, const char *name)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4095),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring | I915_EXEC_NO_RELOC | I915_EXEC_HANDLE_LUT,
+	};
+	const uint32_t mmio_base = gem_engine_mmio_base(i915, name);
+	const unsigned int cs_timestamp = mmio_base + 0x358;
+	volatile uint32_t *timestamp;
+	uint32_t *cs, *result;
+
+	igt_require(mmio_base);
+	timestamp =
+		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+	obj.handle = gem_create(i915, 4096);
+	obj.flags = EXEC_OBJECT_PINNED;
+	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+	for (int i = 0; i < 16; i++) {
+		cs = result + 16 * i;
+		*cs++ = 0x24 << 23 | 2; /* SRM */
+		*cs++ = cs_timestamp;
+		*cs++ = 4096 - 16 * 4 + i * 4;
+		*cs++ = 0;
+		*cs++ = 0xa << 23;
+	}
+
+	cs = result + 1024 - 16;
+
+	for (int length = 2; length <= 16; length <<= 1) {
+		struct igt_mean submit, batch, total;
+		int last = length - 1;
+
+		igt_mean_init(&submit);
+		igt_mean_init(&batch);
+		igt_mean_init(&total);
+
+		igt_until_timeout(2) {
+			uint32_t now, end;
+
+			cs[last] = 0;
+
+			now = *timestamp;
+			for (int i = 0; i < length; i++) {
+				execbuf.batch_start_offset = 64 * i;
+				gem_execbuf(i915, &execbuf);
+			}
+			while (!((volatile uint32_t *)cs)[last])
+				;
+			end = *timestamp;
+
+			igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
+			igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last);
+			igt_mean_add(&total, (end - now) * rcs_clock);
+		}
+
+		igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&submit),
+			 1e-3 * sqrt(igt_mean_get_variance(&submit)));
+
+		igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&batch),
+			 1e-3 * sqrt(igt_mean_get_variance(&batch)));
+
+		igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&total),
+			 1e-3 * sqrt(igt_mean_get_variance(&total)));
+	}
+
+	munmap(result, 4096);
+	gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -618,6 +699,118 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
 	munmap(results, MMAP_SZ);
 }
 
+static void context_switch(int i915,
+			   unsigned int engine, const char *name,
+			   unsigned int flags)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[5];
+	struct drm_i915_gem_execbuffer2 eb;
+	uint32_t *cs, *bbe, *results, v;
+	const uint32_t mmio_base = gem_engine_mmio_base(i915, name);
+	struct igt_mean mean;
+	uint32_t ctx[2];
+
+	igt_require(mmio_base);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		ctx[i] = gem_context_create(i915);
+
+	if (flags & PREEMPT) {
+		gem_context_set_priority(i915, ctx[0], -1023);
+		gem_context_set_priority(i915, ctx[1], +1023);
+	}
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(i915, 4096);
+	gem_set_caching(i915, obj[0].handle, 1);
+	results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+	obj[1].handle = gem_create(i915, 4096);
+	memset(reloc,0, sizeof(reloc));
+	obj[1].relocation_count = ARRAY_SIZE(reloc);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(i915, obj[1].handle,
+		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+	cs = bbe;
+	*cs++ = 0x5 << 23;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = MI_BATCH_BUFFER_START | 1;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+
+	cs = bbe + 64;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[2].target_handle = obj[0].handle;
+	reloc[2].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[2].delta = 4;
+	*cs++ = 0;
+	*cs++ = 0x29 << 23 | 2; /* LRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[3].target_handle = obj[0].handle;
+	reloc[3].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[4].target_handle = obj[0].handle;
+	reloc[4].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[4].delta = 8;
+	*cs++ = 0;
+	*cs++ = 0xa << 23;
+
+	memset(&eb, 0, sizeof(eb));
+	eb.buffers_ptr = to_user_pointer(obj);
+	eb.buffer_count = ARRAY_SIZE(obj);
+	eb.flags = engine;
+	eb.flags |= I915_EXEC_NO_RELOC;
+
+	v = 0;
+	igt_mean_init(&mean);
+	igt_until_timeout(5) {
+		eb.rsvd1 = ctx[0];
+		eb.batch_start_offset = 0;
+		gem_execbuf(i915, &eb);
+
+		while (results[0] == v)
+			igt_assert(gem_bo_busy(i915, obj[1].handle));
+
+		eb.rsvd1 = ctx[1];
+		eb.batch_start_offset = 64 * sizeof(*cs);
+		gem_execbuf(i915, &eb);
+
+		*bbe = 0xa << 23;
+		gem_sync(i915, obj[1].handle);
+		*bbe = 0x5 << 23;
+
+		v = results[0];
+		igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
+	}
+	igt_info("%s context switch latency%s: %.2f±%.2fus\n",
+		 name, flags & PREEMPT ? " (preempt)" : "",
+		 1e-3 * igt_mean_get(&mean),
+		 1e-3 * sqrt(igt_mean_get_variance(&mean)));
+	munmap(results, 4096);
+	munmap(bbe, 4096);
+
+	for (int i = 0; i < ARRAY_SIZE(obj); i++)
+		gem_close(i915, obj[i].handle);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		gem_context_destroy(i915, ctx[i]);
+}
+
 static double clockrate(int i915, int reg)
 {
 	volatile uint32_t *mmio;
@@ -698,61 +891,78 @@ igt_main
 				igt_subtest_f("%s-dispatch", e->name)
 					latency_on_ring(device,
 							eb_ring(e),
-							e->name, 0);
+							e->full_name, 0);
 
 				igt_subtest_f("%s-live-dispatch", e->name)
 					latency_on_ring(device,
 							eb_ring(e),
-							e->name, LIVE);
+							e->full_name, LIVE);
 
 				igt_subtest_f("%s-poll", e->name)
 					poll_ring(device,
 						  eb_ring(e),
-						  e->name);
+						  e->full_name);
 
 				igt_subtest_f("%s-rtidle-submit", e->name)
 					rthog_latency_on_ring(device,
 							      e->exec_id |
 							      e->flags,
-							      e->name,
+							      e->full_name,
 							      RTIDLE);
 
 				igt_subtest_f("%s-rthog-submit", e->name)
 					rthog_latency_on_ring(device,
 							      e->exec_id |
 							      e->flags,
-							      e->name,
+							      e->full_name,
 							      0);
 
+				igt_subtest_f("%s-execution-latency", e->name)
+					execution_latency(device,
+							  e->exec_id | e->flags,
+							  e->full_name);
+
 				igt_subtest_f("%s-live-dispatch-queued", e->name)
 					latency_on_ring(device,
 							eb_ring(e),
-							e->name, LIVE | CORK);
+							e->full_name,
+							LIVE | CORK);
 				igt_subtest_f("%s-dispatch-queued", e->name)
 					latency_on_ring(device,
 							eb_ring(e),
-							e->name, CORK);
+							e->full_name, CORK);
 
 				igt_subtest_f("%s-synchronisation", e->name)
 					latency_from_ring(device,
 							  eb_ring(e),
-							  e->name, 0);
+							  e->full_name, 0);
 
 				igt_subtest_f("%s-synchronisation-queued", e->name)
 					latency_from_ring(device,
 							  eb_ring(e),
-							  e->name, CORK);
+							  e->full_name, CORK);
 
+				igt_subtest_f("%s-cs", e->name)
+					context_switch(device,
+						       e->exec_id | e->flags,
+						       e->full_name, 0);
 				igt_subtest_group {
 					igt_fixture {
 						gem_require_contexts(device);
 						igt_require(gem_scheduler_has_preemption(device));
 					}
 
+					igt_subtest_f("%s-cs-preempt", e->name)
+						context_switch(device,
+							       e->exec_id | e->flags,
+							       e->full_name,
+							       PREEMPT);
+
 					igt_subtest_f("%s-preemption", e->name)
 						latency_from_ring(device,
 								  eb_ring(e),
-								  e->name, PREEMPT);
+								  e->full_name,
+								  PREEMPT);
 				}
 			}
 		}
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-03-13 23:50:57 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2020-12-13 12:01:31 +0000
commit	3c7f8f6a85aa8943e14b32c7a9e01aad9d1f53fa (patch)
tree	5189fc65d09026872d1a6a6b5f08d1f285f7cac3
parent	e31a5224a86dc335686f2185f9ea0641c4124236 (diff)