From 0c080f1aa0757e93cf2440154b5e0615910e7239 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Jul 2020 18:33:47 +0100 Subject: i915/gem_exec_hostile: Wild Stallions! Signed-off-by: Chris Wilson --- lib/i915/gem_mman.h | 5 +- tests/Makefile.sources | 3 + tests/i915/gem_exec_hostile.c | 550 ++++++++++++++++++++++++++++++++++++++++++ tests/intel-ci/blacklist.txt | 1 + tests/meson.build | 1 + 5 files changed, 556 insertions(+), 4 deletions(-) create mode 100644 tests/i915/gem_exec_hostile.c diff --git a/lib/i915/gem_mman.h b/lib/i915/gem_mman.h index ec2899ff..b38d4e1c 100644 --- a/lib/i915/gem_mman.h +++ b/lib/i915/gem_mman.h @@ -26,6 +26,7 @@ #define GEM_MMAN_H #include +#include void *gem_mmap__gtt(int fd, uint32_t handle, uint64_t size, unsigned prot); void *gem_mmap__cpu(int fd, uint32_t handle, uint64_t offset, uint64_t size, unsigned prot); @@ -42,10 +43,6 @@ void *gem_mmap__device_coherent(int fd, uint32_t handle, uint64_t offset, void *gem_mmap__cpu_coherent(int fd, uint32_t handle, uint64_t offset, uint64_t size, unsigned prot); -#ifndef I915_GEM_DOMAIN_WC -#define I915_GEM_DOMAIN_WC 0x80 -#endif - bool gem_has_mappable_ggtt(int i915); void gem_require_mappable_ggtt(int i915); bool gem_has_mmap_offset(int fd); diff --git a/tests/Makefile.sources b/tests/Makefile.sources index d9c8f610..17c8325f 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -248,6 +248,9 @@ gem_exec_flush_SOURCES = i915/gem_exec_flush.c TESTS_progs += gem_exec_gttfill gem_exec_gttfill_SOURCES = i915/gem_exec_gttfill.c +TESTS_progs += gem_exec_hostile +gem_exec_hostile_SOURCES = i915/gem_exec_hostile.c + TESTS_progs += gem_exec_latency gem_exec_latency_SOURCES = i915/gem_exec_latency.c diff --git a/tests/i915/gem_exec_hostile.c b/tests/i915/gem_exec_hostile.c new file mode 100644 index 00000000..5fc32f36 --- /dev/null +++ b/tests/i915/gem_exec_hostile.c @@ -0,0 +1,550 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "i915/gem.h" +#include "i915/gem_mman.h" +#include "igt.h" +#include "igt_perf.h" +#include "igt_sysfs.h" + +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) + +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) +/* Opcodes for MI_MATH_INSTR */ +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0) +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2) +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2) +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1) +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1) +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0) +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0) +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0) +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0) +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0) +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2) +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2) +/* Registers used as operands in MI_MATH_INSTR */ +#define MI_MATH_REG(x) (x) +#define MI_MATH_REG_SRCA 0x20 +#define MI_MATH_REG_SRCB 0x21 +#define MI_MATH_REG_ACCU 0x31 +#define MI_MATH_REG_ZF 0x32 +#define MI_MATH_REG_CF 0x33 + +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) + +static unsigned int offset_in_page(void *addr) +{ + return (uintptr_t)addr & 4095; +} + +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y) +{ + return (x + y - 1) / y; +} + +static int read_timestamp_frequency(int i915) +{ + int value = 0; + drm_i915_getparam_t gp = { + .value = &value, + .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY, + }; + ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp); + return value; +} + +static uint64_t ns_to_ticks(int i915, uint64_t ns) +{ + return div64_u64_round_up(ns * read_timestamp_frequency(i915), + NSEC_PER_SEC); +} + +static uint32_t __batch_create(int i915, uint32_t offset) +{ + const uint32_t bbe = MI_BATCH_BUFFER_END; + uint32_t handle; + + handle = gem_create(i915, ALIGN(offset + 4, 4096)); + gem_write(i915, handle, offset, &bbe, sizeof(bbe)); + + return handle; +} + +static uint32_t batch_create(int i915) +{ + return __batch_create(i915, 0); +} + +static void delay(int i915, + const struct intel_execution_engine2 *e, + uint32_t handle, + uint64_t addr, + uint64_t ns) +{ + const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8; + const uint32_t base = gem_engine_mmio_base(i915, e->name); +#define CS_GPR(x) (base + 0x600 + 8 * (x)) +#define RUNTIME (base + 0x3a8) + enum { START_TS, NOW_TS }; + uint32_t *map, *cs, *jmp; + + igt_require(base); + + /* Loop until CTX_TIMESTAMP - initial > @ns */ + + cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE); + + *cs++ = MI_LOAD_REGISTER_IMM; + *cs++ = CS_GPR(START_TS) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG; + *cs++ = RUNTIME; + *cs++ = CS_GPR(START_TS); + + while (offset_in_page(cs) & 63) + *cs++ = 0; + jmp = cs; + + *cs++ = 0x5 << 23; /* MI_ARB_CHECK */ + + *cs++ = MI_LOAD_REGISTER_IMM; + *cs++ = CS_GPR(NOW_TS) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG; + *cs++ = RUNTIME; + *cs++ = CS_GPR(NOW_TS); + + /* delta = now - start; inverted to match COND_BBE */ + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); + *cs++ = MI_MATH_SUB; + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU); + + /* Save delta for reading by COND_BBE */ + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */ + *cs++ = CS_GPR(NOW_TS); + *cs++ = addr + 4000; + *cs++ = addr >> 32; + + /* Delay between SRM and COND_BBE to post the writes */ + for (int n = 0; n < 8; n++) { + *cs++ = MI_STORE_DWORD_IMM; + if (use_64b) { + *cs++ = addr + 4064; + *cs++ = addr >> 32; + } else { + *cs++ = 0; + *cs++ = addr + 4064; + } + *cs++ = 0; + } + + /* Break if delta > ns */ + *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b); + *cs++ = ~ns_to_ticks(i915, ns); + *cs++ = addr + 4000; + *cs++ = addr >> 32; + + /* Otherwise back to recalculating delta */ + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b; + *cs++ = addr + offset_in_page(jmp); + *cs++ = addr >> 32; + + munmap(map, 4096); +} + +static struct drm_i915_gem_exec_object2 +delay_create(int i915, uint32_t ctx, + const struct intel_execution_engine2 *e, + uint64_t target_ns) +{ + struct drm_i915_gem_exec_object2 obj = { + .handle = batch_create(i915), + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS, + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .rsvd1 = ctx, + .flags = e->flags, + }; + + obj.offset = obj.handle << 12; + gem_execbuf(i915, &execbuf); + gem_sync(i915, obj.handle); + + delay(i915, e, obj.handle, obj.offset, target_ns); + + obj.flags |= EXEC_OBJECT_PINNED; + return obj; +} + +static uint32_t vm_clone(int i915) +{ + uint32_t ctx = 0; + __gem_context_clone(i915, 0, + I915_CONTEXT_CLONE_VM | + I915_CONTEXT_CLONE_ENGINES, + I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE, + &ctx); + return ctx; +} + +static int __execbuf(int i915, struct drm_i915_gem_execbuffer2 *execbuf) +{ + int err; + + err = 0; + if (ioctl(i915, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf)) { + err = -errno; + igt_assume(err); + } + + errno = 0; + return err; +} + +static const char *time_repr(uint64_t ns, char *buf, size_t len) +{ + int x = 0; + + if (ns > 48 * 3600ull * NSEC_PER_SEC) { + uint64_t day; + + day = ns / (24 * 3600ull * NSEC_PER_SEC); + ns -= day * 24 * 3600ull * NSEC_PER_SEC; + + x += snprintf(buf + x, len - x, "%"PRId64"d", day); + } + + if (x || ns > 2 * 3600ull * NSEC_PER_SEC) { + uint64_t hour; + + hour = ns / (3600ull * NSEC_PER_SEC); + ns -= hour * 3600ull * NSEC_PER_SEC; + + x += snprintf(buf + x, len - x, "%"PRId64"h", hour); + } + + if (x || ns > 2 * 60ull * NSEC_PER_SEC) { + uint64_t min; + + min = ns / (60ull * NSEC_PER_SEC); + ns -= min * 60ull * NSEC_PER_SEC; + + x += snprintf(buf + x, len - x, "%"PRId64"m", min); + } + + if (x || ns > NSEC_PER_SEC) { + x += snprintf(buf + x, len - x, "%.3fs", 1e-9 * ns); + ns = 0; + } + + if (ns) + snprintf(buf, len, "%.3fus", 1e-3 * ns); + + return buf; +} + +static uint32_t far_delay(int i915, uint64_t delay, int target) +{ + const int ncpus = sysconf(_SC_NPROCESSORS_ONLN); + const struct intel_execution_engine2 *e; + struct drm_i915_gem_exec_object2 obj[64]; + uint32_t handle = gem_create(i915, 4096); + unsigned long *counters, count, max; + struct timespec tv; + uint64_t elapsed; + uint64_t submit; + char buf[80]; + + counters = mmap(NULL, 64 << 10, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + igt_assert(counters != MAP_FAILED); + + __for_each_physical_engine(i915, e) + obj[e->flags] = delay_create(i915, 0, e, delay); + + fcntl(i915, F_SETFL, fcntl(i915, F_GETFL) | O_NONBLOCK); + __for_each_physical_engine(i915, e) { + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj[e->flags]), + .buffer_count = 1, + .flags = e->flags, + }; + + gem_execbuf(i915, &execbuf); + igt_nsec_elapsed(memset(&tv, 0, sizeof(tv))); + gem_sync(i915, obj[e->flags].handle); + igt_info("%s single delay: %s\n", e->name, + time_repr(igt_nsec_elapsed(&tv), buf, sizeof(buf))); + + igt_nsec_elapsed(memset(&tv, 0, sizeof(tv))); + count = 0; + while (__execbuf(i915, &execbuf) == 0) + count++; + gem_sync(i915, obj[e->flags].handle); + igt_info("%s %lu combined delay: %s\n", e->name, count, + time_repr(igt_nsec_elapsed(&tv), buf, sizeof(buf))); + } + + submit = 3 * target; + submit *= NSEC_PER_SEC; + submit /= 2 * delay; + + igt_nsec_elapsed(memset(&tv, 0, sizeof(tv))); + igt_fork(child, ncpus) { + struct drm_i915_gem_exec_object2 batch[2] = { + { + .handle = batch_create(i915), + .flags = EXEC_OBJECT_WRITE, + } + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(batch), + .buffer_count = 2, + }; + unsigned long *x = &counters[65 * child]; + + for (count = 0; count < submit;) { + execbuf.rsvd1 = vm_clone(i915); + if (!execbuf.rsvd1) + break; + + __for_each_physical_engine(i915, e) { + batch[1] = obj[e->flags]; + execbuf.flags = e->flags; + + while (__execbuf(i915, &execbuf) == 0) { + x[1 + e->flags]++; + count++; + } + } + + gem_context_destroy(i915, execbuf.rsvd1); + x[0]++; + } + + execbuf.rsvd1 = 0; + batch[1] = batch[0]; + batch[1].flags &= ~EXEC_OBJECT_WRITE; + batch[0].handle = handle; + assert(batch[0].flags & EXEC_OBJECT_WRITE); + gem_execbuf(i915, &execbuf); + } + igt_waitchildren(); + + count = 0; + for (int i = 1; i < 65; i++) + count += counters[i]; + + memset(&counters[65 * ncpus], 0, 64 * sizeof(counters[0])); + for (int child = 1; child < ncpus; child++) { + for (int i = 0; i < 65; i++) + counters[i] += counters[child * 65 + i]; + } + + max = 0; + for (int i = 1; i < 65; i++) + if (counters[i] > max) + max = counters[i]; + count += max; + + elapsed = igt_nsec_elapsed(&tv); + igt_info("Created an estimated %s delay using %lu requests across %lu contexts\n", + time_repr(delay * count - elapsed, buf, sizeof(buf)), + count, counters[0]); + munmap(counters, 64 << 10); + + __for_each_physical_engine(i915, e) + gem_close(i915, obj[e->flags].handle); + + return handle; +} + +static void far_fence(int i915, int timeout) +{ + uint32_t handle = far_delay(i915, NSEC_PER_SEC / 250, timeout); + struct timespec tv; + char buf[80]; + + igt_nsec_elapsed(memset(&tv, 0, sizeof(tv))); + gem_sync(i915, handle); + igt_info("Synchronisation: %s\n", + time_repr(igt_nsec_elapsed(&tv), buf, sizeof(buf))); + + gem_close(i915, handle); +} + +static void far_userptr(int i915, int timeout) +{ + struct drm_i915_gem_exec_object2 obj[3] = { + { + .handle = far_delay(i915, NSEC_PER_SEC / 200, timeout), + .flags = EXEC_OBJECT_WRITE, + }, + { /* userptr */ }, + { + .handle = batch_create(i915), + } + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(obj), + .buffer_count = ARRAY_SIZE(obj), + }; + const size_t sz = 2 << 20; + struct timespec tv; + char buf[80]; + void *mem; + + mem = mmap(NULL, sz, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + igt_assert(mem != MAP_FAILED); + madvise(mem, sz, MADV_HUGEPAGE); + gem_userptr(i915, mem, sz, 0, 0, &obj[1].handle); + gem_execbuf(i915, &execbuf); + + gem_close(i915, obj[2].handle); + gem_close(i915, obj[1].handle); + gem_close(i915, obj[0].handle); + + igt_nsec_elapsed(memset(&tv, 0, sizeof(tv))); + munmap(mem, sz); + igt_info("munmap: %s\n", + time_repr(igt_nsec_elapsed(&tv), buf, sizeof(buf))); +} + +static void many_interrupts(int i915, int timeout) +{ + const int gen = intel_gen(intel_get_drm_devid(i915)); + const struct intel_execution_engine2 *e; + unsigned int sz = 4 << 20; + struct drm_i915_gem_exec_object2 obj = { + .handle = gem_create(i915, sz), + .flags = EXEC_OBJECT_PINNED, + .offset = 48 << 20, + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + }; + uint32_t *cs, *map, *end; + uint64_t i_before[2], i_after[2]; + int fd, params; + + /* + * Disable hangcheck for uniform behaviour across generations, + * before there is a scheduler there is just one global ring, and + * if that ring grows longer than the hangcheck interval, we declare + * a hang. With a scheduler, the system remains responsive and new + * clients get a share of the GPU -- just the fence and the waiters + * upon that fence remains stuck behind the long queue. + */ + params = igt_params_open(i915); + igt_sysfs_set(params, "enable_hangcheck", "0"); + + fd = perf_i915_open(i915, I915_PMU_INTERRUPTS); + + map = cs = gem_mmap__device_coherent(i915, obj.handle, 0, sz, PROT_WRITE); + for (unsigned int n = 0; n < sz / sizeof(*cs); n++) + *cs++ = 0x2 << 23; + + end = cs -= 4; + *cs++ = 0x5 << 23; + if (gen >= 8) { + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cs++ = obj.offset; + *cs++ = 0; + } else if (gen >= 6) { + *cs++ = MI_BATCH_BUFFER_START | 1 << 8; + *cs++ = obj.offset; + } else { + *cs++ = MI_BATCH_BUFFER_START | 2 << 6; + *cs++ = obj.offset | 1; + } + + __for_each_physical_engine(i915, e) { + struct pollfd pfd; + + if (!gem_class_has_mutable_submission(i915, e->class)) + continue; + + execbuf.flags = e->flags | I915_EXEC_FENCE_OUT; + gem_execbuf_wr(i915, &execbuf); + + /* Do a dummy wait to enable interrupts on each engine */ + pfd.fd = execbuf.rsvd2 >> 32; + pfd.events = POLLIN; + poll(&pfd, 1, 1); + close(pfd.fd); + } + + read(fd, i_before, sizeof(i_before)); + sleep(timeout); /* wait long enough for NMI watchdogs to kick in */ + read(fd, i_after, sizeof(i_after)); + + *end = MI_BATCH_BUFFER_END; + __sync_synchronize(); + munmap(map, sz); + + if (fd != -1) { + igt_info("Generated %"PRId64" interrupts (%.2e/s)\n", + i_after[0] - i_before[0], + (i_after[0] - i_before[0]) * 1e9 / (i_after[1] - i_before[1])); + close(fd); + } + + igt_sysfs_set(params, "enable_hangcheck", "1"); + close(params); + + gem_sync(i915, obj.handle); + gem_close(i915, obj.handle); +} + +igt_main +{ + int i915 = -1; + + igt_fixture { + i915 = drm_open_driver(DRIVER_INTEL); + igt_require_gem(i915); + } + + igt_subtest("far-fence") + far_fence(i915, 60); + + igt_subtest("far-userptr") + far_userptr(i915, 120 /* hungtaskd timeout */); + + igt_subtest("many-interrupts") + many_interrupts(i915, 60); + + igt_fixture { + close(i915); + } +} diff --git a/tests/intel-ci/blacklist.txt b/tests/intel-ci/blacklist.txt index c6af22b3..65ecc479 100644 --- a/tests/intel-ci/blacklist.txt +++ b/tests/intel-ci/blacklist.txt @@ -28,6 +28,7 @@ igt@gem_exec_big@(?!.*single).* igt@gem_exec_capture@many-(?!4K-).* igt@gem_exec_fence@.*hang.* igt@gem_exec_flush@(?!.*basic).* +igt@gem_exec_hostile(@.*)? igt@gem_exec_latency(@.*)? igt@gem_exec_lut_handle(@.*)? igt@gem_exec_nop@(?!.*basic).* diff --git a/tests/meson.build b/tests/meson.build index 9d600670..32870234 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -151,6 +151,7 @@ i915_progs = [ 'gem_exec_fence', 'gem_exec_flush', 'gem_exec_gttfill', + 'gem_exec_hostile', 'gem_exec_latency', 'gem_exec_lut_handle', 'gem_exec_nop', -- cgit v1.2.3