diff options
author | Kenneth Graunke <kenneth@whitecape.org> | 2013-03-27 14:42:19 -0700 |
---|---|---|
committer | Kenneth Graunke <kenneth@whitecape.org> | 2013-10-03 13:35:58 -0700 |
commit | 8a7cdd499a0a1b7e37695d772d76c7892d996e80 (patch) | |
tree | 15c4820a57f83f57fb525f5ceaee1d46d9f1aa37 | |
parent | 5fdba02bb40ef4a9b79bac1361826f99215140ff (diff) |
i965: Add AMD_performance_monitor support for Sandybridge.
XXX: Need to grab forcewake.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_monitor.c | 164 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_extensions.c | 2 |
3 files changed, 166 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c6e6655032..9d2a52da2f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1333,6 +1333,9 @@ struct brw_context } query; struct { + /** The number of monitors that need A-counters enabled. */ + uint32_t a_counter_users; + /* A map describing which counters are stored at a particular 32-bit * offset in the buffer object. */ diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c index 87c4a633bf..fe3a8fe24b 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c +++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c @@ -25,8 +25,6 @@ * \file brw_performance_monitor.c * * Implementation of the GL_AMD_performance_monitor extension. - * - * Currently only for Ironlake. */ #include <limits.h> @@ -168,9 +166,157 @@ const static struct brw_perf_bo_layout gen5_perf_bo_layout[] = /** @} */ +/** + * Sandybridge: + * @{ + * + * While the documentation for performance counters is suspiciously missing + * from the Sandybridge PRM, they were documented in Volume 1 Part 3 of the + * Ironlake PRM. + */ + +/** + * Aggregating counters A0-A28: + * + * While the Ironlake PRM clearly documents that there are 29 counters (A0-A28), + * it only lists the names for 28 of them; one is missing. However, careful + * examination reveals a pattern: there are five GS counters (Active, Stall, + * Core Stall, # threads loaded, and ready but not running time). There are + * also five PS counters, in the same order. But there are only four VS + * counters listed - the number of VS threads loaded is missing. Presumably, + * it exists and is counter 5, and the rest are shifted over one place. + */ +const static struct gl_perf_monitor_counter gen6_raw_aggregating_counters[] = { + COUNTER("Aggregated Core Array Active"), + COUNTER("Aggregated Core Array Stalled"), + COUNTER("Vertex Shader Active Time"), + COUNTER("Vertex Shader Stall Time"), + COUNTER("Vertex Shader Stall Time - Core Stall"), + COUNTER("# VS threads loaded"), + COUNTER("Vertex Shader Ready but not running time"), + COUNTER("Geometry Shader Active Time"), + COUNTER("Geometry Shader Stall Time"), + COUNTER("Geometry Shader Stall Time - Core Stall"), + COUNTER("# GS threads loaded"), + COUNTER("Geometry Shader Ready but not running time"), + COUNTER("Pixel Shader Active Time"), + COUNTER("Pixel Shader Stall Time"), + COUNTER("Pixel Shader Stall Time - Core Stall"), + COUNTER("# PS threads loaded"), + COUNTER("Pixel Shader Ready but not running time"), + COUNTER("Early Z Test Pixels Passing"), + COUNTER("Early Z Test Pixels Failing"), + COUNTER("Early Stencil Test Pixels Passing"), + COUNTER("Early Stencil Test Pixels Failing"), + COUNTER("Pixel Kill Count"), + COUNTER("Alpha Test Pixels Failed"), + COUNTER("Post PS Stencil Pixels Failed"), + COUNTER("Post PS Z buffer Pixels Failed"), + COUNTER("Pixels/samples written in the frame buffer"), + COUNTER("GPU Busy"), + COUNTER("CL active and not stalled"), + COUNTER("SF active and stalled"), +}; + +const static struct gl_perf_monitor_group gen6_groups[] = { + GROUP("Aggregating Counters", INT_MAX, gen6_raw_aggregating_counters), +}; + +/** + * Sandybridge: Counter Select = 001 + * A0 A1 A2 A3 A4 TIMESTAMP RPT_ID + * A5 A6 A7 A8 A9 A10 A11 A12 + * A13 A14 A15 A16 A17 A18 A19 A20 + * A21 A22 A23 A24 A25 A26 A27 A28 + * + * (Yes, this is a strange order.) + */ +const static struct brw_perf_bo_layout gen6_perf_bo_layout[] = +{ + { -1, -1 }, /* Report ID */ + { -1, -1 }, /* TIMESTAMP (64-bit) */ + { -1, -1 }, /* ...second half... */ + { 0, 4 }, /* A counters */ + { 0, 3 }, + { 0, 2 }, + { 0, 1 }, + { 0, 0 }, + { 0, 12 }, + { 0, 11 }, + { 0, 10 }, + { 0, 9 }, + { 0, 8 }, + { 0, 7 }, + { 0, 6 }, + { 0, 5 }, + { 0, 20 }, + { 0, 19 }, + { 0, 18 }, + { 0, 17 }, + { 0, 16 }, + { 0, 15 }, + { 0, 14 }, + { 0, 13 }, + { 0, 28 }, + { 0, 27 }, + { 0, 26 }, + { 0, 25 }, + { 0, 24 }, + { 0, 23 }, + { 0, 22 }, + { 0, 21 }, +}; +/** @} */ + /******************************************************************************/ static void +start_aggregating_counters(struct brw_context *brw) +{ + unsigned counter_format = 1; + + /* Ironlake always counts */ + if (brw->gen < 6) + return; + + if (++brw->perfmon.a_counter_users == 1) { + BEGIN_BATCH(3); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); + OUT_BATCH(OACONTROL); + /* XXX: Enable only for this context */ + OUT_BATCH(counter_format << OACONTROL_COUNTER_SELECT_SHIFT | + OACONTROL_ENABLE_COUNTERS); + ADVANCE_BATCH(); + + /* We have to flush or else the counters may not get enabled + * before the actions we want to measure happen. + */ + intel_batchbuffer_emit_mi_flush(brw); + } +} + +static void +stop_aggregating_counters(struct brw_context *brw) +{ + /* Ironlake counters are unstoppable */ + if (brw->gen < 6) + return; + + if (--brw->perfmon.a_counter_users == 0) { + BEGIN_BATCH(3); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); + OUT_BATCH(OACONTROL); + OUT_BATCH(0); + ADVANCE_BATCH(); + + /* Leaving counters enabled is probably harmless, but flush anyway + * just to make sure they get turned off. + */ + intel_batchbuffer_emit_mi_flush(brw); + } +} + +static void snapshot_aggregating_counters(struct brw_context *brw, drm_intel_bo *bo, uint32_t offset_in_bytes) { @@ -194,6 +340,13 @@ snapshot_aggregating_counters(struct brw_context *brw, offset_in_bytes + 64); OUT_BATCH(report_id); ADVANCE_BATCH(); + } else if (brw->gen == 6) { + BEGIN_BATCH(3); + OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + offset_in_bytes | MI_COUNTER_ADDRESS_GTT); + OUT_BATCH(report_id); + ADVANCE_BATCH(); } else { assert(!"Unsupported generation for performance counters."); } @@ -256,6 +409,7 @@ brw_begin_perf_monitor(struct gl_context *ctx, /* Take a shapshot of all active counters */ if (aggregating_counters_needed(brw, m)) { + start_aggregating_counters(brw); snapshot_aggregating_counters(brw, monitor->bo, 0); } @@ -272,6 +426,7 @@ brw_end_perf_monitor(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_monitor_object *monitor = brw_perf_monitor(m); if (aggregating_counters_needed(brw, m)) { + stop_aggregating_counters(brw); snapshot_aggregating_counters(brw, monitor->bo, SECOND_SNAPSHOT_OFFSET_IN_BYTES); } @@ -387,5 +542,10 @@ brw_init_performance_monitors(struct brw_context *brw) ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen5_groups); brw->perfmon.bo_layout = gen5_perf_bo_layout; brw->perfmon.entries_in_bo = ARRAY_SIZE(gen5_perf_bo_layout); + } else if (brw->gen == 6) { + ctx->PerfMonitor.Groups = gen6_groups; + ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen6_groups); + brw->perfmon.bo_layout = gen6_perf_bo_layout; + brw->perfmon.entries_in_bo = ARRAY_SIZE(gen6_perf_bo_layout); } } diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 0502a48552..bd57a39c3c 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -160,7 +160,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130; } - if (brw->gen == 5) + if (brw->gen == 5 || brw->gen == 6) ctx->Extensions.AMD_performance_monitor = true; if (brw->gen >= 7) { |