summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2013-03-27 14:42:19 -0700
committerKenneth Graunke <kenneth@whitecape.org>2013-10-03 13:35:58 -0700
commit8a7cdd499a0a1b7e37695d772d76c7892d996e80 (patch)
tree15c4820a57f83f57fb525f5ceaee1d46d9f1aa37
parent5fdba02bb40ef4a9b79bac1361826f99215140ff (diff)
i965: Add AMD_performance_monitor support for Sandybridge.
XXX: Need to grab forcewake. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_performance_monitor.c164
-rw-r--r--src/mesa/drivers/dri/i965/intel_extensions.c2
3 files changed, 166 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c6e6655032..9d2a52da2f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1333,6 +1333,9 @@ struct brw_context
} query;
struct {
+ /** The number of monitors that need A-counters enabled. */
+ uint32_t a_counter_users;
+
/* A map describing which counters are stored at a particular 32-bit
* offset in the buffer object.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index 87c4a633bf..fe3a8fe24b 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -25,8 +25,6 @@
* \file brw_performance_monitor.c
*
* Implementation of the GL_AMD_performance_monitor extension.
- *
- * Currently only for Ironlake.
*/
#include <limits.h>
@@ -168,9 +166,157 @@ const static struct brw_perf_bo_layout gen5_perf_bo_layout[] =
/** @} */
+/**
+ * Sandybridge:
+ * @{
+ *
+ * While the documentation for performance counters is suspiciously missing
+ * from the Sandybridge PRM, they were documented in Volume 1 Part 3 of the
+ * Ironlake PRM.
+ */
+
+/**
+ * Aggregating counters A0-A28:
+ *
+ * While the Ironlake PRM clearly documents that there are 29 counters (A0-A28),
+ * it only lists the names for 28 of them; one is missing. However, careful
+ * examination reveals a pattern: there are five GS counters (Active, Stall,
+ * Core Stall, # threads loaded, and ready but not running time). There are
+ * also five PS counters, in the same order. But there are only four VS
+ * counters listed - the number of VS threads loaded is missing. Presumably,
+ * it exists and is counter 5, and the rest are shifted over one place.
+ */
+const static struct gl_perf_monitor_counter gen6_raw_aggregating_counters[] = {
+ COUNTER("Aggregated Core Array Active"),
+ COUNTER("Aggregated Core Array Stalled"),
+ COUNTER("Vertex Shader Active Time"),
+ COUNTER("Vertex Shader Stall Time"),
+ COUNTER("Vertex Shader Stall Time - Core Stall"),
+ COUNTER("# VS threads loaded"),
+ COUNTER("Vertex Shader Ready but not running time"),
+ COUNTER("Geometry Shader Active Time"),
+ COUNTER("Geometry Shader Stall Time"),
+ COUNTER("Geometry Shader Stall Time - Core Stall"),
+ COUNTER("# GS threads loaded"),
+ COUNTER("Geometry Shader Ready but not running time"),
+ COUNTER("Pixel Shader Active Time"),
+ COUNTER("Pixel Shader Stall Time"),
+ COUNTER("Pixel Shader Stall Time - Core Stall"),
+ COUNTER("# PS threads loaded"),
+ COUNTER("Pixel Shader Ready but not running time"),
+ COUNTER("Early Z Test Pixels Passing"),
+ COUNTER("Early Z Test Pixels Failing"),
+ COUNTER("Early Stencil Test Pixels Passing"),
+ COUNTER("Early Stencil Test Pixels Failing"),
+ COUNTER("Pixel Kill Count"),
+ COUNTER("Alpha Test Pixels Failed"),
+ COUNTER("Post PS Stencil Pixels Failed"),
+ COUNTER("Post PS Z buffer Pixels Failed"),
+ COUNTER("Pixels/samples written in the frame buffer"),
+ COUNTER("GPU Busy"),
+ COUNTER("CL active and not stalled"),
+ COUNTER("SF active and stalled"),
+};
+
+const static struct gl_perf_monitor_group gen6_groups[] = {
+ GROUP("Aggregating Counters", INT_MAX, gen6_raw_aggregating_counters),
+};
+
+/**
+ * Sandybridge: Counter Select = 001
+ * A0 A1 A2 A3 A4 TIMESTAMP RPT_ID
+ * A5 A6 A7 A8 A9 A10 A11 A12
+ * A13 A14 A15 A16 A17 A18 A19 A20
+ * A21 A22 A23 A24 A25 A26 A27 A28
+ *
+ * (Yes, this is a strange order.)
+ */
+const static struct brw_perf_bo_layout gen6_perf_bo_layout[] =
+{
+ { -1, -1 }, /* Report ID */
+ { -1, -1 }, /* TIMESTAMP (64-bit) */
+ { -1, -1 }, /* ...second half... */
+ { 0, 4 }, /* A counters */
+ { 0, 3 },
+ { 0, 2 },
+ { 0, 1 },
+ { 0, 0 },
+ { 0, 12 },
+ { 0, 11 },
+ { 0, 10 },
+ { 0, 9 },
+ { 0, 8 },
+ { 0, 7 },
+ { 0, 6 },
+ { 0, 5 },
+ { 0, 20 },
+ { 0, 19 },
+ { 0, 18 },
+ { 0, 17 },
+ { 0, 16 },
+ { 0, 15 },
+ { 0, 14 },
+ { 0, 13 },
+ { 0, 28 },
+ { 0, 27 },
+ { 0, 26 },
+ { 0, 25 },
+ { 0, 24 },
+ { 0, 23 },
+ { 0, 22 },
+ { 0, 21 },
+};
+/** @} */
+
/******************************************************************************/
static void
+start_aggregating_counters(struct brw_context *brw)
+{
+ unsigned counter_format = 1;
+
+ /* Ironlake always counts */
+ if (brw->gen < 6)
+ return;
+
+ if (++brw->perfmon.a_counter_users == 1) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(OACONTROL);
+ /* XXX: Enable only for this context */
+ OUT_BATCH(counter_format << OACONTROL_COUNTER_SELECT_SHIFT |
+ OACONTROL_ENABLE_COUNTERS);
+ ADVANCE_BATCH();
+
+ /* We have to flush or else the counters may not get enabled
+ * before the actions we want to measure happen.
+ */
+ intel_batchbuffer_emit_mi_flush(brw);
+ }
+}
+
+static void
+stop_aggregating_counters(struct brw_context *brw)
+{
+ /* Ironlake counters are unstoppable */
+ if (brw->gen < 6)
+ return;
+
+ if (--brw->perfmon.a_counter_users == 0) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(OACONTROL);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ /* Leaving counters enabled is probably harmless, but flush anyway
+ * just to make sure they get turned off.
+ */
+ intel_batchbuffer_emit_mi_flush(brw);
+ }
+}
+
+static void
snapshot_aggregating_counters(struct brw_context *brw,
drm_intel_bo *bo, uint32_t offset_in_bytes)
{
@@ -194,6 +340,13 @@ snapshot_aggregating_counters(struct brw_context *brw,
offset_in_bytes + 64);
OUT_BATCH(report_id);
ADVANCE_BATCH();
+ } else if (brw->gen == 6) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ offset_in_bytes | MI_COUNTER_ADDRESS_GTT);
+ OUT_BATCH(report_id);
+ ADVANCE_BATCH();
} else {
assert(!"Unsupported generation for performance counters.");
}
@@ -256,6 +409,7 @@ brw_begin_perf_monitor(struct gl_context *ctx,
/* Take a shapshot of all active counters */
if (aggregating_counters_needed(brw, m)) {
+ start_aggregating_counters(brw);
snapshot_aggregating_counters(brw, monitor->bo, 0);
}
@@ -272,6 +426,7 @@ brw_end_perf_monitor(struct gl_context *ctx,
struct brw_context *brw = brw_context(ctx);
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
if (aggregating_counters_needed(brw, m)) {
+ stop_aggregating_counters(brw);
snapshot_aggregating_counters(brw, monitor->bo,
SECOND_SNAPSHOT_OFFSET_IN_BYTES);
}
@@ -387,5 +542,10 @@ brw_init_performance_monitors(struct brw_context *brw)
ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen5_groups);
brw->perfmon.bo_layout = gen5_perf_bo_layout;
brw->perfmon.entries_in_bo = ARRAY_SIZE(gen5_perf_bo_layout);
+ } else if (brw->gen == 6) {
+ ctx->PerfMonitor.Groups = gen6_groups;
+ ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen6_groups);
+ brw->perfmon.bo_layout = gen6_perf_bo_layout;
+ brw->perfmon.entries_in_bo = ARRAY_SIZE(gen6_perf_bo_layout);
}
}
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 0502a48552..bd57a39c3c 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -160,7 +160,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
}
- if (brw->gen == 5)
+ if (brw->gen == 5 || brw->gen == 6)
ctx->Extensions.AMD_performance_monitor = true;
if (brw->gen >= 7) {