diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2020-12-13 11:35:11 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2020-12-14 16:55:55 +0000 |
commit | a668d5c148ec3c1d3958f660a146a88676aac25d (patch) | |
tree | bcbf8600c1d270140dc766974ec03b768a944c37 | |
parent | ec4073d30b428aaf199c0f4de6c0a5ebdc1c1c65 (diff) |
i915/gem_exec_balancer: Measure timeslicing fairness
Oversaturate the virtual engines on the system and check that each
workload receives a fair share of the available GPU time.
v2: Apply a modicum of statistical integrity.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
-rw-r--r-- | tests/i915/gem_exec_balancer.c | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c index 35a032cc..dffc3de9 100644 --- a/tests/i915/gem_exec_balancer.c +++ b/tests/i915/gem_exec_balancer.c @@ -2763,6 +2763,170 @@ static void smoketest(int i915, int timeout) gem_close(i915, batch[0].handle); } +static uint32_t read_ctx_timestamp(int i915, uint32_t ctx) +{ + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_exec_object2 obj = { + .handle = gem_create(i915, 4096), + .offset = 32 << 20, + .relocs_ptr = to_user_pointer(&reloc), + .relocation_count = 1, + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .rsvd1 = ctx, + }; + uint32_t *map, *cs; + uint32_t ts; + + cs = map = gem_mmap__device_coherent(i915, obj.handle, + 0, 4096, PROT_WRITE); + + *cs++ = 0x24 << 23 | 1 << 19 | 2; /* relative SRM */ + *cs++ = 0x3a8; /* CTX_TIMESTAMP */ + memset(&reloc, 0, sizeof(reloc)); + reloc.target_handle = obj.handle; + reloc.presumed_offset = obj.offset; + reloc.offset = offset_in_page(cs); + reloc.delta = 4000; + *cs++ = obj.offset + 4000; + *cs++ = obj.offset >> 32; + + *cs++ = MI_BATCH_BUFFER_END; + + gem_execbuf(i915, &execbuf); + gem_sync(i915, obj.handle); + gem_close(i915, obj.handle); + + ts = map[1000]; + munmap(map, 4096); + + return ts; +} + +static int cmp_u32(const void *A, const void *B) +{ + const uint32_t *a = A, *b = B; + + if (*a < *b) + return -1; + else if (*a > *b) + return 1; + else + return 0; +} + +static int read_ctx_timestamp_frequency(int i915) +{ + int value = 12500000; /* icl!!! are you feeling alright? CTX vs CS */ + drm_i915_getparam_t gp = { + .value = &value, + .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY, + }; + if (intel_gen(intel_get_drm_devid(i915)) != 11) + ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp); + return value; +} + +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y) +{ + return (x + y - 1) / y; +} + +static uint64_t ticks_to_ns(int i915, uint64_t ticks) +{ + return div64_u64_round_up(ticks * NSEC_PER_SEC, + read_ctx_timestamp_frequency(i915)); +} + +static void __fairslice(int i915, + const struct i915_engine_class_instance *ci, + unsigned int count, + int duration) +{ + const double timeslice_duration_ns = 1e6; + igt_spin_t *spin = NULL; + uint32_t ctx[count + 1]; + uint32_t ts[count + 1]; + double threshold; + + igt_debug("Launching %zd spinners on %s\n", + ARRAY_SIZE(ctx), class_to_str(ci->engine_class)); + igt_assert(ARRAY_SIZE(ctx) >= 3); + + for (int i = 0; i < ARRAY_SIZE(ctx); i++) { + ctx[i] = load_balancer_create(i915, ci, count); + if (spin == NULL) { + spin = __igt_spin_new(i915, .ctx = ctx[i]); + } else { + struct drm_i915_gem_execbuffer2 eb = { + .buffer_count = 1, + .buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]), + .rsvd1 = ctx[i], + }; + gem_execbuf(i915, &eb); + } + } + + sleep(duration); /* over the course of many timeslices */ + + igt_assert(gem_bo_busy(i915, spin->handle)); + igt_spin_end(spin); + igt_debug("Cancelled spinners\n"); + + for (int i = 0; i < ARRAY_SIZE(ctx); i++) + ts[i] = read_ctx_timestamp(i915, ctx[i]); + + for (int i = 0; i < ARRAY_SIZE(ctx); i++) + gem_context_destroy(i915, ctx[i]); + igt_spin_free(i915, spin); + + /* + * If we imagine that the timeslices are randomly distributed to + * the virtual engines, we would expect the variation to be modelled + * by a drunken walk; ergo sqrt(num_timeslices). + */ + threshold = sqrt(1e9 * duration / timeslice_duration_ns); + threshold *= timeslice_duration_ns; + threshold *= 3; /* CI safety factor before crying wolf */ + + qsort(ts, ARRAY_SIZE(ctx), sizeof(*ts), cmp_u32); + igt_info("%s: [%.1f, %.1f, %.1f] ms, expect %1.f +- %.1fms\n", + class_to_str(ci->engine_class), + 1e-6 * ticks_to_ns(i915, ts[0]), + 1e-6 * ticks_to_ns(i915, ts[(count + 1) / 2]), + 1e-6 * ticks_to_ns(i915, ts[count]), + 2e3 * count / ARRAY_SIZE(ctx), + 1e-6 * threshold); + + igt_assert_f(ts[count], "CTX_TIMESTAMP not reported!\n"); + igt_assert_f(ticks_to_ns(i915, ts[count] - ts[0]) < 2 * threshold, + "Range of timeslices greater than tolerable: %.2fms > %.2fms; unfair!\n", + 1e-6 * ticks_to_ns(i915, ts[count] - ts[0]), + 1e-6 * threshold * 2); +} + +static void fairslice(int i915) +{ + /* Relative CS mmio */ + igt_require(intel_gen(intel_get_drm_devid(i915)) >= 11); + + for (int class = 0; class < 32; class++) { + struct i915_engine_class_instance *ci; + unsigned int count = 0; + + ci = list_engines(i915, 1u << class, &count); + if (!ci || count < 2) { + free(ci); + continue; + } + + __fairslice(i915, ci, count, 2); + free(ci); + } +} + static bool has_context_engines(int i915) { struct drm_i915_gem_context_param p = { @@ -2848,6 +3012,9 @@ igt_main full(i915, p->flags); } + igt_subtest("fairslice") + fairslice(i915); + igt_subtest("nop") nop(i915); |