diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-08-29 12:01:38 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2013-08-29 13:33:58 +0100 |
commit | 474ce5396e3dc5a3f057da84cb70a642c0ac90d6 (patch) | |
tree | 8fac2e048f53960245430fba74cb597b2c3ab7d0 /overlay | |
parent | 75ef36713a75bc46faf5b92a4442869ef6999c3a (diff) |
overlay: Monitor per-ring context switch rate
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'overlay')
-rw-r--r-- | overlay/gpu-perf.c | 25 | ||||
-rw-r--r-- | overlay/gpu-perf.h | 4 | ||||
-rw-r--r-- | overlay/overlay.c | 43 |
3 files changed, 61 insertions, 11 deletions
diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c index fc215634..1d35da50 100644 --- a/overlay/gpu-perf.c +++ b/overlay/gpu-perf.c @@ -40,10 +40,12 @@ #if defined(__i386__) #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #endif #if defined(__x86_64__) #define rmb() asm volatile("lfence" ::: "memory") +#define wmb() asm volatile("sfence" ::: "memory") #endif #define N_PAGES 32 @@ -228,6 +230,14 @@ static int flip_complete(struct gpu_perf *gp, const void *event) return 1; } +static int ctx_switch(struct gpu_perf *gp, const void *event) +{ + const struct sample_event *sample = event; + + gp->ctx_switch[sample->raw[1]]++; + return 1; +} + static int ring_sync(struct gpu_perf *gp, const void *event) { const struct sample_event *sample = event; @@ -293,6 +303,7 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags) perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_end", wait_end); perf_tracepoint_open(gp, "i915", "i915_flip_complete", flip_complete); perf_tracepoint_open(gp, "i915", "i915_gem_ring_sync_to", ring_sync); + perf_tracepoint_open(gp, "i915", "i915_gem_ring_switch_context", ctx_switch); if (gp->nr_events == 0) { gp->error = "i915.ko tracepoints not available"; @@ -303,20 +314,19 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags) return; } -static int process_sample(struct gpu_perf *gp, +static int process_sample(struct gpu_perf *gp, int cpu, const struct perf_event_header *header) { const struct sample_event *sample = (const struct sample_event *)header; int n, update = 0; /* hash me! */ - for (n = 0; n < gp->nr_cpus * gp->nr_events; n++) { - if (gp->sample[n].id != sample->id) + for (n = 0; n < gp->nr_events; n++) { + int m = n * gp->nr_cpus + cpu; + if (gp->sample[m].id != sample->id) continue; - update = 1; - if (gp->sample[n].func) - update = gp->sample[n].func(gp, sample); + update = gp->sample[m].func(gp, sample); break; } @@ -380,13 +390,14 @@ int gpu_perf_update(struct gpu_perf *gp) } if (header->type == PERF_RECORD_SAMPLE) - update += process_sample(gp, header); + update += process_sample(gp, n, header); tail += header->size; } if (wrap) tail &= mask; mmap->data_tail = tail; + wmb(); } free(buffer); diff --git a/overlay/gpu-perf.h b/overlay/gpu-perf.h index 395eb8af..fae60bcb 100644 --- a/overlay/gpu-perf.h +++ b/overlay/gpu-perf.h @@ -41,7 +41,9 @@ struct gpu_perf { int (*func)(struct gpu_perf *, const void *); } *sample; - int flip_complete[4]; + unsigned flip_complete[MAX_RINGS]; + unsigned ctx_switch[MAX_RINGS]; + struct gpu_perf_comm { struct gpu_perf_comm *next; char name[256]; diff --git a/overlay/overlay.c b/overlay/overlay.c index 65137864..3fddde94 100644 --- a/overlay/overlay.c +++ b/overlay/overlay.c @@ -102,6 +102,7 @@ struct overlay_gpu_top { struct overlay_gpu_perf { struct gpu_perf gpu_perf; + time_t show_ctx; }; struct overlay_gpu_freq { @@ -127,6 +128,8 @@ struct overlay_context { cairo_t *cr; int width, height; + time_t time; + struct overlay_gpu_top gpu_top; struct overlay_gpu_perf gpu_perf; struct overlay_gpu_freq gpu_freq; @@ -270,6 +273,8 @@ static void init_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *gp) { gpu_perf_init(&gp->gpu_perf, 0); + + gp->show_ctx = 0; } static char *get_comm(pid_t pid, char *comm, int len) @@ -310,6 +315,16 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * char buf[1024]; cairo_pattern_t *linear; int x, y, y1, y2, n; + int has_ctx = 0; + + gpu_perf_update(&gp->gpu_perf); + + for (n = 4; n > 0; n--) { + if (gp->gpu_perf.ctx_switch[n-1]) { + has_ctx = n; + break; + } + } cairo_rectangle(ctx->cr, ctx->width/2+HALF_PAD-.5, PAD-.5, ctx->width/2-SIZE_PAD+1, ctx->height/2-SIZE_PAD+1); cairo_set_source_rgb(ctx->cr, .15, .15, .15); @@ -326,12 +341,9 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * return; } - gpu_perf_update(&gp->gpu_perf); - y = PAD + 12 - 2; x = ctx->width/2 + HALF_PAD; - for (comm = gp->gpu_perf.comm; comm; comm = comm->next) { int total; @@ -369,6 +381,8 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * chart_draw(comm->user_data, ctx->cr); y2 += 14; } + if (has_ctx || gp->show_ctx) + y2 += 14; y1 += -12 - 2; y2 += 14 - 14 + 4; @@ -465,6 +479,27 @@ skip_comm: cairo_move_to(ctx->cr, x, y); cairo_show_text(ctx->cr, buf); y += 14; + + cairo_set_source_rgba(ctx->cr, 1, 1, 1, 1); + cairo_move_to(ctx->cr, x, y); + if (has_ctx) { + int len = sprintf(buf, "Contexts:"); + for (n = 0; n < has_ctx; n++) + len += sprintf(buf + len, "%s %d", + n ? "," : "", + gp->gpu_perf.ctx_switch[n]); + + memset(gp->gpu_perf.ctx_switch, 0, sizeof(gp->gpu_perf.ctx_switch)); + gp->show_ctx = ctx->time; + + cairo_show_text(ctx->cr, buf); + y += 14; + } else if (gp->show_ctx) { + cairo_show_text(ctx->cr, "Contexts: 0"); + y += 14; + if (ctx->time - gp->show_ctx > 10) + gp->show_ctx = 0; + } } static void init_gpu_freq(struct overlay_context *ctx, @@ -841,6 +876,8 @@ int main(int argc, char **argv) i = 0; while (1) { + ctx.time = time(NULL); + ctx.cr = cairo_create(ctx.surface); cairo_set_operator(ctx.cr, CAIRO_OPERATOR_CLEAR); cairo_paint(ctx.cr); |