From 750d6c91aa333d6bda20e4a376f8b1064ea7f418 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Fri, 11 Dec 2015 16:03:56 -0500 Subject: radeonsi: add placeholder MC and SRBM performance counter groups Yet another change motivated by AMD GPUPerfStudio compatibility. These groups are not directly accessible from userspace, and AMD GPUPerfStudio does not actually query them - it just requires them to be there. Hence, adding a placeholder for now. --- src/gallium/drivers/radeonsi/si_perfcounter.c | 70 +++++++++++++++++++++------ 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index f944a52b8d..24855e4e6f 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -56,6 +56,8 @@ enum si_pc_reg_layout { /* Registers are laid out in decreasing rather than increasing order. */ SI_PC_REG_REVERSE = 4, + + SI_PC_FAKE = 8, }; struct si_pc_block_base { @@ -325,6 +327,20 @@ static struct si_pc_block_base cik_WD = { .counter0_lo = R_034200_WD_PERFCOUNTER0_LO, }; +static struct si_pc_block_base cik_MC = { + .name = "MC", + .num_counters = 4, + + .layout = SI_PC_FAKE, +}; + +static struct si_pc_block_base cik_SRBM = { + .name = "SRBM", + .num_counters = 2, + + .layout = SI_PC_FAKE, +}; + /* Both the number of instances and selectors varies between chips of the same * class. We only differentiate by class here and simply expose the maximum * number over all chips in a class. @@ -352,6 +368,8 @@ static struct si_pc_block groups_CIK[] = { { &cik_GDS, 121 }, { &cik_VGT, 140 }, { &cik_IA, 22 }, + { &cik_MC, 22 }, + { &cik_SRBM, 19 }, { &cik_WD, 22 }, { &cik_CPG, 46 }, { &cik_CPC, 22 }, @@ -377,6 +395,8 @@ static struct si_pc_block groups_VI[] = { { &cik_GDS, 121 }, { &cik_VGT, 147 }, { &cik_IA, 24 }, + { &cik_MC, 22 }, + { &cik_SRBM, 27 }, { &cik_WD, 37 }, { &cik_CPG, 48 }, { &cik_CPC, 24 }, @@ -391,7 +411,9 @@ static void si_pc_get_size(struct r600_perfcounter_block *group, struct si_pc_block_base *regs = sigroup->b; unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK; - if (layout_multi == SI_PC_MULTI_BLOCK) { + if (regs->layout & SI_PC_FAKE) { + *num_select_dw = 0; + } else if (layout_multi == SI_PC_MULTI_BLOCK) { if (count < regs->num_multi) *num_select_dw = 2 * (count + 2) + regs->num_prelude; else @@ -454,6 +476,9 @@ static void si_pc_emit_select(struct r600_common_context *ctx, assert(count <= regs->num_counters); + if (regs->layout & SI_PC_FAKE) + return; + if (layout_multi == SI_PC_MULTI_BLOCK) { assert(!(regs->layout & SI_PC_REG_REVERSE)); @@ -613,22 +638,35 @@ static void si_pc_emit_read(struct r600_common_context *ctx, unsigned reg = regs->counter0_lo; unsigned reg_delta = 8; - if (regs->layout & SI_PC_REG_REVERSE) - reg_delta = -reg_delta; - - for (idx = 0; idx < count; ++idx) { - if (regs->counters) - reg = regs->counters[idx]; + if (!(regs->layout & SI_PC_FAKE)) { + if (regs->layout & SI_PC_REG_REVERSE) + reg_delta = -reg_delta; - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | - COPY_DATA_DST_SEL(COPY_DATA_MEM)); - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - va += 4; - reg += reg_delta; + for (idx = 0; idx < count; ++idx) { + if (regs->counters) + reg = regs->counters[idx]; + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | + COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + va += 4; + reg += reg_delta; + } + } else { + for (idx = 0; idx < count; ++idx) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | + COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, 0); /* immediate */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + va += 4; + } } } -- cgit v1.2.3