diff options
author | Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> | 2021-02-09 18:23:06 -0500 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-02-11 17:24:37 +0000 |
commit | a27d76a2d9a51dcb9028d0dfa8c1a3d1171e5006 (patch) | |
tree | 582507157a7edf70aee3d58dd720b2a2547a355c | |
parent | 040a350b1eb0be13613c7ddaa8524ac4afc45cfc (diff) |
pan/bi: Push UBOs on Bifrost
Based on the Midgard pass. Results look better since Midgard already had
a basic UBO pushing pass to begin with. Particularly nice to see the
dramatic reduction in spilling.
total instructions in shared programs: 169141 -> 161215 (-4.69%)
instructions in affected programs: 164102 -> 156176 (-4.83%)
helped: 1269
HURT: 90
helped stats (abs) min: 1 max: 61 x̄: 6.50 x̃: 4
helped stats (rel) min: 0.15% max: 17.58% x̄: 6.31% x̃: 5.88%
HURT stats (abs) min: 1 max: 170 x̄: 3.58 x̃: 1
HURT stats (rel) min: 0.08% max: 133.33% x̄: 16.65% x̃: 5.26%
95% mean confidence interval for instructions value: -6.28 -5.38
95% mean confidence interval for instructions %-change: -5.39% -4.18%
Instructions are helped.
total nops in shared programs: 121049 -> 120997 (-0.04%)
nops in affected programs: 110024 -> 109972 (-0.05%)
helped: 501
HURT: 758
helped stats (abs) min: 1 max: 45 x̄: 5.54 x̃: 2
helped stats (rel) min: 0.25% max: 47.06% x̄: 6.81% x̃: 4.55%
HURT stats (abs) min: 1 max: 102 x̄: 3.59 x̃: 3
HURT stats (rel) min: 0.32% max: 50.00% x̄: 7.13% x̃: 6.06%
95% mean confidence interval for nops value: -0.45 0.37
95% mean confidence interval for nops %-change: 1.07% 2.09%
Inconclusive result (value mean confidence interval includes 0).
total clauses in shared programs: 40388 -> 31610 (-21.73%)
clauses in affected programs: 38825 -> 30047 (-22.61%)
helped: 1367
HURT: 2
helped stats (abs) min: 1 max: 58 x̄: 6.43 x̃: 5
helped stats (rel) min: 1.34% max: 55.56% x̄: 24.97% x̃: 25.00%
HURT stats (abs) min: 2 max: 12 x̄: 7.00 x̃: 7
HURT stats (rel) min: 5.08% max: 6.67% x̄: 5.88% x̃: 5.88%
95% mean confidence interval for clauses value: -6.74 -6.08
95% mean confidence interval for clauses %-change: -25.50% -24.35%
Clauses are helped.
total quadwords in shared programs: 144937 -> 130686 (-9.83%)
quadwords in affected programs: 140419 -> 126168 (-10.15%)
helped: 1369
HURT: 13
helped stats (abs) min: 1 max: 112 x̄: 10.50 x̃: 7
helped stats (rel) min: 0.23% max: 31.82% x̄: 11.36% x̃: 10.78%
HURT stats (abs) min: 1 max: 106 x̄: 10.00 x̃: 1
HURT stats (rel) min: 5.88% max: 10.24% x̄: 9.26% x̃: 10.00%
95% mean confidence interval for quadwords value: -10.96 -9.66
95% mean confidence interval for quadwords %-change: -11.52% -10.82%
Quadwords are helped.
total spills in shared programs: 1106 -> 705 (-36.26%)
spills in affected programs: 1058 -> 657 (-37.90%)
helped: 41
HURT: 0
total fills in shared programs: 2241 -> 1645 (-26.60%)
fills in affected programs: 2219 -> 1623 (-26.86%)
helped: 43
HURT: 2
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
-rw-r--r-- | src/panfrost/Makefile.sources | 1 | ||||
-rw-r--r-- | src/panfrost/bifrost/bi_opt_push_ubo.c | 159 | ||||
-rw-r--r-- | src/panfrost/bifrost/bifrost_compile.c | 4 | ||||
-rw-r--r-- | src/panfrost/bifrost/compiler.h | 2 | ||||
-rw-r--r-- | src/panfrost/bifrost/meson.build | 1 |
5 files changed, 167 insertions, 0 deletions
diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources index 4ea3ccffeea..8edb5bd3535 100644 --- a/src/panfrost/Makefile.sources +++ b/src/panfrost/Makefile.sources @@ -11,6 +11,7 @@ bifrost_FILES := \ bifrost/bi_ra.c \ bifrost/bi_opt_copy_prop.c \ bifrost/bi_opt_dce.c \ + bifrost/bi_opt_push_ubo.c \ bifrost/bi_quirks.h \ bifrost/bi_test_pack.c \ bifrost/bir.c \ diff --git a/src/panfrost/bifrost/bi_opt_push_ubo.c b/src/panfrost/bifrost/bi_opt_push_ubo.c new file mode 100644 index 00000000000..bdd6ddf59a2 --- /dev/null +++ b/src/panfrost/bifrost/bi_opt_push_ubo.c @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" +#include "bi_builder.h" + +/* This optimization pass, intended to run once after code emission but before + * copy propagation, analyzes direct word-aligned UBO reads and promotes a + * subset to moves from FAU. It is the sole populator of the UBO push data + * structure returned back to the command stream. */ + +static bool +bi_is_direct_aligned_ubo(bi_instr *ins) +{ + return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) && + (ins->seg == BI_SEG_UBO) && + (ins->src[0].type == BI_INDEX_CONSTANT) && + (ins->src[1].type == BI_INDEX_CONSTANT) && + ((ins->src[0].value & 0x3) == 0); +} + +/* Represents use data for a single UBO */ + +#define MAX_UBO_WORDS (65536 / 16) + +struct bi_ubo_block { + BITSET_DECLARE(pushed, MAX_UBO_WORDS); + uint8_t range[MAX_UBO_WORDS]; +}; + +struct bi_ubo_analysis { + /* Per block analysis */ + unsigned nr_blocks; + struct bi_ubo_block *blocks; +}; + +static struct bi_ubo_analysis +bi_analyze_ranges(bi_context *ctx) +{ + struct bi_ubo_analysis res = { + .nr_blocks = ctx->nir->info.num_ubos + 1, + }; + + res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block)); + + bi_foreach_instr_global(ctx, ins) { + if (!bi_is_direct_aligned_ubo(ins)) continue; + + unsigned ubo = ins->src[1].value; + unsigned word = ins->src[0].value / 4; + unsigned channels = bi_opcode_props[ins->op].sr_count; + + assert(ubo < res.nr_blocks); + assert(channels > 0 && channels <= 4); + + if (word < MAX_UBO_WORDS) + res.blocks[ubo].range[word] = channels; + } + + return res; +} + +/* Select UBO words to push. A sophisticated implementation would consider the + * number of uses and perhaps the control flow to estimate benefit. This is not + * sophisticated. Select from the last UBO first to prioritize sysvals. */ + +static void +bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis) +{ + for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) { + struct bi_ubo_block *block = &analysis->blocks[ubo]; + + for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) { + unsigned range = block->range[r]; + + /* Don't push something we don't access */ + if (range == 0) continue; + + /* Don't push more than possible */ + if (push->count > PAN_MAX_PUSH - range) + return; + + for (unsigned offs = 0; offs < range; ++offs) { + struct panfrost_ubo_word word = { + .ubo = ubo, + .offset = (r + offs) * 4 + }; + + push->words[push->count++] = word; + } + + /* Mark it as pushed so we can rewrite */ + BITSET_SET(block->pushed, r); + } + } +} + +void +bi_opt_push_ubo(bi_context *ctx) +{ + /* This pass only runs once */ + assert(ctx->push->count == 0); + + struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx); + bi_pick_ubo(ctx->push, &analysis); + + bi_foreach_instr_global_safe(ctx, ins) { + if (!bi_is_direct_aligned_ubo(ins)) continue; + + unsigned ubo = ins->src[1].value; + unsigned offset = ins->src[0].value; + + /* Check if we decided to push this */ + assert(ubo < analysis.nr_blocks); + if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) continue; + + /* Replace the UBO load with moves from FAU */ + bi_builder b = bi_init_builder(ctx, bi_after_instr(ins)); + + unsigned channels = bi_opcode_props[ins->op].sr_count; + + for (unsigned w = 0; w < channels; ++w) { + /* FAU is grouped in pairs (2 x 4-byte) */ + unsigned base = pan_lookup_pushed_ubo(ctx->push, ubo, + (offset + 4 * w)); + + unsigned fau_idx = (base >> 1); + unsigned fau_hi = (base & 1); + + bi_mov_i32_to(&b, + bi_word(ins->dest[0], w), + bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi)); + } + + bi_remove_instruction(ins); + } + + free(analysis.blocks); +} diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 9ad489babea..52b3232386d 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -2446,6 +2446,7 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir, ctx->arch = inputs->gpu_id >> 12; ctx->is_blend = inputs->is_blend; ctx->blend_desc = inputs->blend.bifrost_blend_desc; + ctx->push = &program->push; memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants)); list_inithead(&ctx->blocks); @@ -2512,6 +2513,9 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir, bi_cull_dead_branch(block); } + /* Runs before copy prop */ + bi_opt_push_ubo(ctx); + bool progress = false; do { diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index e7086611cd5..0009d2c97d2 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -498,6 +498,7 @@ typedef struct { gl_shader_stage stage; struct list_head blocks; /* list of bi_block */ struct panfrost_sysvals sysvals; + struct panfrost_ubo_push *push; uint32_t quirks; unsigned arch; unsigned tls_size; @@ -748,6 +749,7 @@ void bi_print_shader(bi_context *ctx, FILE *fp); bool bi_opt_copy_prop(bi_context *ctx); bool bi_opt_dead_code_eliminate(bi_context *ctx, bi_block *block, bool soft); +void bi_opt_push_ubo(bi_context *ctx); void bi_schedule(bi_context *ctx); void bi_register_allocate(bi_context *ctx); diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index dd893854e54..bf58e79a357 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -25,6 +25,7 @@ libpanfrost_bifrost_files = files( 'bi_print.c', 'bi_opt_copy_prop.c', 'bi_opt_dce.c', + 'bi_opt_push_ubo.c', 'bi_pack.c', 'bi_ra.c', 'bi_schedule.c', |