From 9227eb272d634b6fc2a081d6686a6af015b7a113 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 20 Oct 2017 13:24:34 +0100 Subject: intel-gpu-overlay: Add queued stat Signed-off-by: Tvrtko Ursulin --- lib/igt_perf.h | 6 ++++++ overlay/gpu-top.c | 14 ++++++++++++++ overlay/gpu-top.h | 6 ++++++ overlay/overlay.c | 3 +++ 4 files changed, 29 insertions(+) diff --git a/lib/igt_perf.h b/lib/igt_perf.h index 5428feb0..eaf7a928 100644 --- a/lib/igt_perf.h +++ b/lib/igt_perf.h @@ -35,9 +35,12 @@ enum drm_i915_pmu_engine_sample { I915_SAMPLE_BUSY = 0, I915_SAMPLE_WAIT = 1, I915_SAMPLE_SEMA = 2, + I915_SAMPLE_QUEUED = 3, I915_ENGINE_SAMPLE_MAX /* non-ABI */ }; +#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */ + #define I915_PMU_SAMPLE_BITS (4) #define I915_PMU_SAMPLE_MASK (0xf) #define I915_PMU_SAMPLE_INSTANCE_BITS (8) @@ -58,6 +61,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ENGINE_SEMA(class, instance) \ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) +#define I915_PMU_ENGINE_QUEUED(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED) + #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c index 61b8f62f..c252a0a9 100644 --- a/overlay/gpu-top.c +++ b/overlay/gpu-top.c @@ -72,6 +72,10 @@ static int perf_init(struct gpu_top *gt) gt->fd) >= 0) gt->have_sema = 1; + if (perf_i915_open_group(I915_PMU_ENGINE_QUEUED(d->class, d->inst), + gt->fd) >= 0) + gt->have_queued = 1; + gt->ring[0].name = d->name; gt->num_rings = 1; @@ -93,6 +97,12 @@ static int perf_init(struct gpu_top *gt) gt->fd) < 0) return -1; + if (gt->have_queued && + perf_i915_open_group(I915_PMU_ENGINE_QUEUED(d->class, + d->inst), + gt->fd) < 0) + return -1; + gt->ring[gt->num_rings++].name = d->name; } @@ -298,6 +308,8 @@ int gpu_top_update(struct gpu_top *gt) s->wait[n] = sample[m++]; if (gt->have_sema) s->sema[n] = sample[m++]; + if (gt->have_queued) + s->queued[n] = sample[m++]; } if (gt->count == 1) @@ -310,6 +322,8 @@ int gpu_top_update(struct gpu_top *gt) gt->ring[n].u.u.wait = (100 * (s->wait[n] - d->wait[n]) + d_time/2) / d_time; if (gt->have_sema) gt->ring[n].u.u.sema = (100 * (s->sema[n] - d->sema[n]) + d_time/2) / d_time; + if (gt->have_queued) + gt->queued[n] = (double)((s->queued[n] - d->queued[n])) * 1e9 * I915_SAMPLE_QUEUED_SCALE / d_time; /* in case of rounding + sampling errors, fudge */ if (gt->ring[n].u.u.busy > 100) diff --git a/overlay/gpu-top.h b/overlay/gpu-top.h index d3cdd779..7d3acb4a 100644 --- a/overlay/gpu-top.h +++ b/overlay/gpu-top.h @@ -36,6 +36,7 @@ struct gpu_top { int num_rings; int have_wait; int have_sema; + int have_queued; struct gpu_top_ring { const char *name; @@ -44,6 +45,7 @@ struct gpu_top { uint8_t busy; uint8_t wait; uint8_t sema; + uint8_t queued; } u; uint32_t payload; } u; @@ -54,7 +56,11 @@ struct gpu_top { uint64_t busy[MAX_RINGS]; uint64_t wait[MAX_RINGS]; uint64_t sema[MAX_RINGS]; + uint64_t queued[MAX_RINGS]; } stat[2]; + + double queued[MAX_RINGS]; + int count; }; diff --git a/overlay/overlay.c b/overlay/overlay.c index b30f7a1d..5fc6958b 100644 --- a/overlay/overlay.c +++ b/overlay/overlay.c @@ -255,6 +255,9 @@ static void show_gpu_top(struct overlay_context *ctx, struct overlay_gpu_top *gt len = sprintf(txt, "%s: %3d%% busy", gt->gpu_top.ring[n].name, gt->gpu_top.ring[n].u.u.busy); + if (gt->gpu_top.have_queued) + len += sprintf(txt + len, ", qd %.2f", + gt->gpu_top.queued[n]); if (gt->gpu_top.ring[n].u.u.wait) len += sprintf(txt + len, ", %d%% wait", gt->gpu_top.ring[n].u.u.wait); -- cgit v1.2.3