summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2017-10-20 13:24:34 +0100
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2017-10-20 17:58:38 +0100
commit9227eb272d634b6fc2a081d6686a6af015b7a113 (patch)
tree186a441427d7d8c790c9fc00718b81c19c1e2cad
parent660de04679c2448903e003e61e3e79b6f9a0dc49 (diff)
intel-gpu-overlay: Add queued statpmu
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
-rw-r--r--lib/igt_perf.h6
-rw-r--r--overlay/gpu-top.c14
-rw-r--r--overlay/gpu-top.h6
-rw-r--r--overlay/overlay.c3
4 files changed, 29 insertions, 0 deletions
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index 5428feb0..eaf7a928 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -35,9 +35,12 @@ enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
I915_SAMPLE_SEMA = 2,
+ I915_SAMPLE_QUEUED = 3,
I915_ENGINE_SAMPLE_MAX /* non-ABI */
};
+#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */
+
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
@@ -58,6 +61,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 61b8f62f..c252a0a9 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -72,6 +72,10 @@ static int perf_init(struct gpu_top *gt)
gt->fd) >= 0)
gt->have_sema = 1;
+ if (perf_i915_open_group(I915_PMU_ENGINE_QUEUED(d->class, d->inst),
+ gt->fd) >= 0)
+ gt->have_queued = 1;
+
gt->ring[0].name = d->name;
gt->num_rings = 1;
@@ -93,6 +97,12 @@ static int perf_init(struct gpu_top *gt)
gt->fd) < 0)
return -1;
+ if (gt->have_queued &&
+ perf_i915_open_group(I915_PMU_ENGINE_QUEUED(d->class,
+ d->inst),
+ gt->fd) < 0)
+ return -1;
+
gt->ring[gt->num_rings++].name = d->name;
}
@@ -298,6 +308,8 @@ int gpu_top_update(struct gpu_top *gt)
s->wait[n] = sample[m++];
if (gt->have_sema)
s->sema[n] = sample[m++];
+ if (gt->have_queued)
+ s->queued[n] = sample[m++];
}
if (gt->count == 1)
@@ -310,6 +322,8 @@ int gpu_top_update(struct gpu_top *gt)
gt->ring[n].u.u.wait = (100 * (s->wait[n] - d->wait[n]) + d_time/2) / d_time;
if (gt->have_sema)
gt->ring[n].u.u.sema = (100 * (s->sema[n] - d->sema[n]) + d_time/2) / d_time;
+ if (gt->have_queued)
+ gt->queued[n] = (double)((s->queued[n] - d->queued[n])) * 1e9 * I915_SAMPLE_QUEUED_SCALE / d_time;
/* in case of rounding + sampling errors, fudge */
if (gt->ring[n].u.u.busy > 100)
diff --git a/overlay/gpu-top.h b/overlay/gpu-top.h
index d3cdd779..7d3acb4a 100644
--- a/overlay/gpu-top.h
+++ b/overlay/gpu-top.h
@@ -36,6 +36,7 @@ struct gpu_top {
int num_rings;
int have_wait;
int have_sema;
+ int have_queued;
struct gpu_top_ring {
const char *name;
@@ -44,6 +45,7 @@ struct gpu_top {
uint8_t busy;
uint8_t wait;
uint8_t sema;
+ uint8_t queued;
} u;
uint32_t payload;
} u;
@@ -54,7 +56,11 @@ struct gpu_top {
uint64_t busy[MAX_RINGS];
uint64_t wait[MAX_RINGS];
uint64_t sema[MAX_RINGS];
+ uint64_t queued[MAX_RINGS];
} stat[2];
+
+ double queued[MAX_RINGS];
+
int count;
};
diff --git a/overlay/overlay.c b/overlay/overlay.c
index b30f7a1d..5fc6958b 100644
--- a/overlay/overlay.c
+++ b/overlay/overlay.c
@@ -255,6 +255,9 @@ static void show_gpu_top(struct overlay_context *ctx, struct overlay_gpu_top *gt
len = sprintf(txt, "%s: %3d%% busy",
gt->gpu_top.ring[n].name,
gt->gpu_top.ring[n].u.u.busy);
+ if (gt->gpu_top.have_queued)
+ len += sprintf(txt + len, ", qd %.2f",
+ gt->gpu_top.queued[n]);
if (gt->gpu_top.ring[n].u.u.wait)
len += sprintf(txt + len, ", %d%% wait",
gt->gpu_top.ring[n].u.u.wait);