diff options
Diffstat (limited to 'src/panfrost')
-rw-r--r-- | src/panfrost/midgard/compiler.h | 4 | ||||
-rw-r--r-- | src/panfrost/midgard/meson.build | 1 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_compile.c | 133 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_opt_prop.c | 239 | ||||
-rw-r--r-- | src/panfrost/util/meson.build | 1 | ||||
-rw-r--r-- | src/panfrost/util/nir_mod_helpers.c | 124 | ||||
-rw-r--r-- | src/panfrost/util/pan_ir.h | 4 |
7 files changed, 248 insertions, 258 deletions
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 482598b3c21..2272933a708 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -288,9 +288,6 @@ typedef struct compiler_context { int temp_count; int max_hash; - /* Set of NIR indices that were already emitted as outmods */ - BITSET_WORD *already_emitted; - /* Count of instructions emitted from NIR overall, across all blocks */ int instruction_count; @@ -683,6 +680,7 @@ unsigned midgard_get_first_tag_from_block(compiler_context *ctx, /* Optimizations */ bool midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block); +bool midgard_opt_prop(compiler_context *ctx); bool midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block); bool midgard_opt_varying_projection(compiler_context *ctx, diff --git a/src/panfrost/midgard/meson.build b/src/panfrost/midgard/meson.build index 607f0dc72ff..73dd2b0873e 100644 --- a/src/panfrost/midgard/meson.build +++ b/src/panfrost/midgard/meson.build @@ -39,6 +39,7 @@ libpanfrost_midgard_files = files( 'midgard_opt_copy_prop.c', 'midgard_opt_dce.c', 'midgard_opt_perspective.c', + 'midgard_opt_prop.c', 'midgard_errata_lod.c', 'nir_fuse_io_16.c', ) diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 32b7ec0eac1..c6d3b7f8129 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -586,94 +586,11 @@ nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components) ALU_CHECK_CMP(); \ break; -/* Compare mir_lower_invert */ -static bool -nir_accepts_inot(nir_op op, unsigned src) -{ - switch (op) { - case nir_op_ior: - case nir_op_iand: /* TODO: b2f16 */ - case nir_op_ixor: - return true; - default: - return false; - } -} - -static bool -mir_accept_dest_mod(compiler_context *ctx, nir_dest **dest, nir_op op) -{ - if (pan_has_dest_mod(dest, op)) { - assert((*dest)->is_ssa); - BITSET_SET(ctx->already_emitted, (*dest)->ssa.index); - return true; - } - - return false; -} - -/* Look for floating point mods. We have the mods clamp_m1_1, clamp_0_1, - * and clamp_0_inf. We also have the relations (note 3 * 2 = 6 cases): - * - * clamp_0_1(clamp_0_inf(x)) = clamp_m1_1(x) - * clamp_0_1(clamp_m1_1(x)) = clamp_m1_1(x) - * clamp_0_inf(clamp_0_1(x)) = clamp_m1_1(x) - * clamp_0_inf(clamp_m1_1(x)) = clamp_m1_1(x) - * clamp_m1_1(clamp_0_1(x)) = clamp_m1_1(x) - * clamp_m1_1(clamp_0_inf(x)) = clamp_m1_1(x) - * - * So by cases any composition of output modifiers is equivalent to - * clamp_m1_1 alone. - */ -static unsigned -mir_determine_float_outmod(compiler_context *ctx, nir_dest **dest, - unsigned prior_outmod) -{ - bool clamp_0_inf = mir_accept_dest_mod(ctx, dest, nir_op_fclamp_pos_mali); - bool clamp_0_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat); - bool clamp_m1_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat_signed_mali); - bool prior = (prior_outmod != midgard_outmod_none); - int count = (int)prior + (int)clamp_0_inf + (int)clamp_0_1 + (int)clamp_m1_1; - - return ((count > 1) || clamp_0_1) ? midgard_outmod_clamp_0_1 - : clamp_0_inf ? midgard_outmod_clamp_0_inf - : clamp_m1_1 ? midgard_outmod_clamp_m1_1 - : prior_outmod; -} - static void mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, - unsigned to, bool *abs, bool *neg, bool * not, - enum midgard_roundmode *roundmode, bool is_int, - unsigned bcast_count) + unsigned to, bool is_int, unsigned bcast_count) { nir_alu_src src = instr->src[i]; - - if (!is_int) { - if (pan_has_source_mod(&src, nir_op_fneg)) - *neg = !(*neg); - - if (pan_has_source_mod(&src, nir_op_fabs)) - *abs = true; - } - - if (nir_accepts_inot(instr->op, i) && pan_has_source_mod(&src, nir_op_inot)) - *not = true; - - if (roundmode) { - if (pan_has_source_mod(&src, nir_op_fround_even)) - *roundmode = MIDGARD_RTE; - - if (pan_has_source_mod(&src, nir_op_ftrunc)) - *roundmode = MIDGARD_RTZ; - - if (pan_has_source_mod(&src, nir_op_ffloor)) - *roundmode = MIDGARD_RTN; - - if (pan_has_source_mod(&src, nir_op_fceil)) - *roundmode = MIDGARD_RTP; - } - unsigned bits = nir_src_bit_size(src.src); ins->src[to] = nir_src_index(NULL, &src.src); @@ -709,9 +626,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) { nir_dest *dest = &instr->dest.dest; - if (dest->is_ssa && BITSET_TEST(ctx->already_emitted, dest->ssa.index)) - return; - /* Derivatives end up emitted on the texture pipe, not the ALUs. This * is handled elsewhere */ @@ -959,10 +873,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) unsigned opcode_props = alu_opcode_props[op].props; bool quirk_flipped_r24 = opcode_props & QUIRK_FLIPPED_R24; - if (!midgard_is_integer_out_op(op)) { - outmod = mir_determine_float_outmod(ctx, &dest, outmod); - } - midgard_instruction ins = { .type = TAG_ALU_4, .dest = nir_dest_index(dest), @@ -971,16 +881,12 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) .roundmode = roundmode, }; - enum midgard_roundmode *roundptr = - (opcode_props & MIDGARD_ROUNDS) ? &ins.roundmode : NULL; - for (unsigned i = nr_inputs; i < ARRAY_SIZE(ins.src); ++i) ins.src[i] = ~0; if (quirk_flipped_r24) { ins.src[0] = ~0; - mir_copy_src(&ins, instr, 0, 1, &ins.src_abs[1], &ins.src_neg[1], - &ins.src_invert[1], roundptr, is_int, broadcast_swizzle); + mir_copy_src(&ins, instr, 0, 1, is_int, broadcast_swizzle); } else { for (unsigned i = 0; i < nr_inputs; ++i) { unsigned to = i; @@ -1001,8 +907,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) to = 1 - to; } - mir_copy_src(&ins, instr, i, to, &ins.src_abs[to], &ins.src_neg[to], - &ins.src_invert[to], roundptr, is_int, broadcast_swizzle); + mir_copy_src(&ins, instr, i, to, is_int, broadcast_swizzle); } } @@ -1623,17 +1528,6 @@ emit_control_barrier(compiler_context *ctx) emit_mir_instruction(ctx, ins); } -static unsigned -mir_get_branch_cond(nir_src *src, bool *invert) -{ - /* Wrap it. No swizzle since it's a scalar */ - - nir_alu_src alu = {.src = *src}; - - *invert = pan_has_source_mod(&alu, nir_op_inot); - return nir_src_index(NULL, &alu.src); -} - static uint8_t output_load_rt_addr(compiler_context *ctx, nir_intrinsic_instr *instr) { @@ -1663,8 +1557,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) discard.branch.target_type = TARGET_DISCARD; if (conditional) { - discard.src[0] = mir_get_branch_cond( - &instr->src[0], &discard.branch.invert_conditional); + discard.src[0] = nir_src_index(ctx, &instr->src[0]); discard.src_types[0] = nir_type_uint32; } @@ -2259,14 +2152,6 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, if (!nir_tex_instr_need_sampler(instr)) sampler_index = 0; - nir_alu_type dest_base = nir_alu_type_get_base_type(instr->dest_type); - - /* texture instructions support float outmods */ - unsigned outmod = midgard_outmod_none; - if (dest_base == nir_type_float) { - outmod = mir_determine_float_outmod(ctx, &dest, 0); - } - midgard_instruction ins = { .type = TAG_TEXTURE_4, .mask = 0xF, @@ -2274,7 +2159,7 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, .src = {~0, ~0, ~0, ~0}, .dest_type = instr->dest_type, .swizzle = SWIZZLE_IDENTITY_4, - .outmod = outmod, + .outmod = midgard_outmod_none, .op = midgard_texop, .texture = { .format = midgard_tex_format(instr->sampler_dim), @@ -2774,12 +2659,10 @@ emit_if(struct compiler_context *ctx, nir_if *nif) midgard_block *before_block = ctx->current_block; /* Speculatively emit the branch, but we can't fill it in until later */ - bool inv = false; EMIT(branch, true, true); midgard_instruction *then_branch = mir_last_in_block(ctx->current_block); - then_branch->src[0] = mir_get_branch_cond(&nif->condition, &inv); + then_branch->src[0] = nir_src_index(ctx, &nif->condition); then_branch->src_types[0] = nir_type_uint32; - then_branch->branch.invert_conditional = !inv; /* Emit the two subblocks. */ midgard_block *then_block = emit_cf_list(ctx, &nif->then_list); @@ -3036,8 +2919,6 @@ midgard_compile_shader_nir(nir_shader *nir, list_inithead(&ctx->blocks); ctx->block_count = 0; ctx->func = func; - ctx->already_emitted = - calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD)); if (nir->info.outputs_read && !inputs->is_blend) { emit_block_init(ctx); @@ -3051,7 +2932,6 @@ midgard_compile_shader_nir(nir_shader *nir, } emit_cf_list(ctx, &func->impl->body); - free(ctx->already_emitted); break; /* TODO: Multi-function shaders */ } @@ -3069,6 +2949,7 @@ midgard_compile_shader_nir(nir_shader *nir, do { progress = false; progress |= midgard_opt_dead_code_eliminate(ctx); + progress |= midgard_opt_prop(ctx); mir_foreach_block(ctx, _block) { midgard_block *block = (midgard_block *)_block; diff --git a/src/panfrost/midgard/midgard_opt_prop.c b/src/panfrost/midgard/midgard_opt_prop.c new file mode 100644 index 00000000000..7246421dc9b --- /dev/null +++ b/src/panfrost/midgard/midgard_opt_prop.c @@ -0,0 +1,239 @@ +/* + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "compiler.h" +#include "midgard.h" +#include "midgard_ops.h" +#include "nir.h" + +static bool +is_inot(midgard_instruction *I) +{ + return I->type == TAG_ALU_4 && I->op == midgard_alu_op_inor && + I->has_inline_constant && I->inline_constant == 0; +} + +static bool +try_fold_fmov_src(midgard_instruction *use, unsigned src_idx, + midgard_instruction *fmov) +{ + if (use->type != TAG_ALU_4) + return false; + if (fmov->has_constants || fmov->has_inline_constant) + return false; + if (mir_nontrivial_outmod(fmov)) + return false; + + /* Don't propagate into non-float instructions */ + if (nir_alu_type_get_base_type(use->src_types[src_idx]) != nir_type_float) + return false; + + /* TODO: Size conversions not handled yet */ + if (use->src_types[src_idx] != fmov->src_types[1]) + return false; + + if (use->src_abs[src_idx]) { + /* abs(abs(x)) = abs(x) and abs(-x) = abs(x) */ + } else { + /* -(-(abs(x))) = abs(x) */ + use->src_abs[src_idx] = fmov->src_abs[1]; + use->src_neg[src_idx] ^= fmov->src_neg[1]; + } + + use->src[src_idx] = fmov->src[1]; + mir_compose_swizzle(use->swizzle[src_idx], fmov->swizzle[1], + use->swizzle[src_idx]); + return true; +} + +static bool +try_fold_inot(midgard_instruction *use, unsigned src_idx, + midgard_instruction *inot) +{ + /* TODO: Size conversions not handled yet */ + if (nir_alu_type_get_type_size(use->src_types[src_idx]) != + nir_alu_type_get_type_size(inot->src_types[0])) + return false; + + if (use->compact_branch) { + use->branch.invert_conditional ^= true; + } else if (use->type == TAG_ALU_4) { + switch (use->op) { + case midgard_alu_op_iand: + case midgard_alu_op_ior: + case midgard_alu_op_ixor: + break; + default: + return false; + } + + use->src_invert[src_idx] ^= true; + mir_compose_swizzle(use->swizzle[src_idx], inot->swizzle[0], + use->swizzle[src_idx]); + } else { + return false; + } + + use->src[src_idx] = inot->src[0]; + return true; +} + +static bool +midgard_opt_prop_forward(compiler_context *ctx) +{ + bool progress = false; + + midgard_instruction **defs = + calloc(ctx->temp_count, sizeof(midgard_instruction *)); + + mir_foreach_block(ctx, block_) { + midgard_block *block = (midgard_block *)block_; + + mir_foreach_instr_in_block(block, I) { + /* Record SSA defs */ + if (mir_is_ssa(I->dest)) { + assert(I->dest < ctx->temp_count); + defs[I->dest] = I; + } + + mir_foreach_src(I, s) { + unsigned src = I->src[s]; + if (!mir_is_ssa(src)) + continue; + + /* Try to fold a source mod in */ + assert(src < ctx->temp_count); + midgard_instruction *def = defs[src]; + if (def == NULL) + continue; + + if (def->type == TAG_ALU_4 && def->op == midgard_alu_op_fmov) { + progress |= try_fold_fmov_src(I, s, def); + } else if (is_inot(def)) { + progress |= try_fold_inot(I, s, def); + } + } + } + } + + free(defs); + return progress; +} + +enum outmod_state { + outmod_unknown = 0, + outmod_clamp_0_1, + outmod_clamp_m1_1, + outmod_clamp_0_inf, + outmod_incompatible, +}; + +static enum outmod_state +outmod_to_state(unsigned outmod) +{ + switch (outmod) { + case midgard_outmod_clamp_0_1: + return outmod_clamp_0_1; + case midgard_outmod_clamp_m1_1: + return outmod_clamp_m1_1; + case midgard_outmod_clamp_0_inf: + return outmod_clamp_0_inf; + default: + return outmod_incompatible; + } +} + +static enum outmod_state +union_outmod_state(enum outmod_state a, enum outmod_state b) +{ + if (a == outmod_unknown) + return b; + else if (b == outmod_unknown) + return a; + else if (a == b) + return a /* b */; + else + return outmod_incompatible; +} + +static bool +midgard_opt_prop_backward(compiler_context *ctx) +{ + bool progress = false; + enum outmod_state *state = calloc(ctx->temp_count, sizeof(*state)); + BITSET_WORD *folded = calloc(BITSET_WORDS(ctx->temp_count), sizeof(*folded)); + + /* Scan for outmod states */ + mir_foreach_instr_global(ctx, I) { + if (I->type == TAG_ALU_4 && I->op == midgard_alu_op_fmov && + !I->src_neg[1] && !I->src_abs[1] && mir_is_ssa(I->src[1])) { + + enum outmod_state outmod = outmod_to_state(I->outmod); + state[I->src[1]] = union_outmod_state(state[I->src[1]], outmod); + } else { + /* Anything used as any other source cannot have an outmod folded in */ + mir_foreach_src(I, s) { + if (mir_is_ssa(I->src[s])) + state[I->src[s]] = outmod_incompatible; + } + } + } + + /* Apply outmods */ + mir_foreach_instr_global(ctx, I) { + if (!mir_is_ssa(I->dest)) + continue; + + if (I->type != TAG_ALU_4 && I->type != TAG_TEXTURE_4) + continue; + + if (nir_alu_type_get_base_type(I->dest_type) != nir_type_float) + continue; + + if (I->outmod != midgard_outmod_none) + continue; + + switch (state[I->dest]) { + case outmod_clamp_0_1: + I->outmod = midgard_outmod_clamp_0_1; + break; + case outmod_clamp_m1_1: + I->outmod = midgard_outmod_clamp_m1_1; + break; + case outmod_clamp_0_inf: + I->outmod = midgard_outmod_clamp_0_inf; + break; + default: + break; + } + + if (I->outmod != midgard_outmod_none) { + BITSET_SET(folded, I->dest); + } + } + + /* Strip outmods from FMOVs to let copyprop go ahead */ + mir_foreach_instr_global(ctx, I) { + if (I->type == TAG_ALU_4 && I->op == midgard_alu_op_fmov && + mir_is_ssa(I->src[1]) && BITSET_TEST(folded, I->src[1])) { + + I->outmod = midgard_outmod_none; + } + } + + free(state); + free(folded); + return progress; +} + +bool +midgard_opt_prop(compiler_context *ctx) +{ + bool progress = false; + mir_compute_temp_count(ctx); + progress |= midgard_opt_prop_forward(ctx); + progress |= midgard_opt_prop_backward(ctx); + return progress; +} diff --git a/src/panfrost/util/meson.build b/src/panfrost/util/meson.build index 791923f91df..aeb8343f441 100644 --- a/src/panfrost/util/meson.build +++ b/src/panfrost/util/meson.build @@ -22,7 +22,6 @@ libpanfrost_util_files = files( 'lcra.c', 'lcra.h', - 'nir_mod_helpers.c', 'pan_collect_varyings.c', 'pan_ir.c', 'pan_ir.h', diff --git a/src/panfrost/util/nir_mod_helpers.c b/src/panfrost/util/nir_mod_helpers.c deleted file mode 100644 index 466a4d4252a..00000000000 --- a/src/panfrost/util/nir_mod_helpers.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (C) 2020 Collabora, Ltd. - * Copyright (C) 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" -#include "pan_ir.h" - -/* Check if a given ALU source is the result of a particular componentwise 1-op - * ALU source (principally fneg or fabs). If so, return true and rewrite the - * source to be the argument, respecting swizzles as needed. If not (or it - * cannot be proven), return false and leave the source untouched. - */ - -bool -pan_has_source_mod(nir_alu_src *src, nir_op op) -{ - if (!src->src.is_ssa || - src->src.ssa->parent_instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr); - - if (alu->op != op) - return false; - - /* This only works for unary ops */ - assert(nir_op_infos[op].num_inputs == 1); - - /* If the copied source is not SSA, moving it might not be valid */ - if (!alu->src[0].src.is_ssa) - return false; - - /* Okay - we've found the modifier we wanted. Let's construct the new ALU - * src. In a scalar world, this is just psrc, but for vector archs we need - * to respect the swizzle, so we compose. - */ - - nir_alu_src nsrc = { - .src = alu->src[0].src, - }; - - for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) { - /* (a o b)(i) = a(b(i)) ... swizzle composition is intense. */ - nsrc.swizzle[i] = alu->src[0].swizzle[src->swizzle[i]]; - } - - *src = nsrc; - return true; -} - -/* Check if a given instruction's result will be fed into a - * componentwise 1-op ALU instruction (principally fsat without - * swizzles). If so, return true and rewrite the destination. The - * backend will need to track the new destinations to avoid - * incorrect double-emits. */ - -bool -pan_has_dest_mod(nir_dest **odest, nir_op op) -{ - /* This only works for unary ops */ - assert(nir_op_infos[op].num_inputs == 1); - - /* If not SSA, this might not be legal */ - nir_dest *dest = *odest; - if (!dest->is_ssa) - return false; - - /* Check the uses. We want a single use, with the op `op` */ - if (!list_is_singular(&dest->ssa.uses)) - return false; - - nir_src *use = list_first_entry(&dest->ssa.uses, nir_src, use_link); - if (use->is_if) - return false; - - nir_instr *parent = use->parent_instr; - - /* Check if the op is `op` */ - if (parent->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu = nir_instr_as_alu(parent); - if (alu->op != op) - return false; - - /* We can't do expansions without a move in the middle */ - unsigned nr_components = nir_dest_num_components(alu->dest.dest); - - if (nir_dest_num_components(*dest) != nr_components) - return false; - - /* We don't handle swizzles here, so check for the identity */ - for (unsigned i = 0; i < nr_components; ++i) { - if (alu->src[0].swizzle[i] != i) - return false; - } - - if (!alu->dest.dest.is_ssa) - return false; - - /* Otherwise, we're good */ - *odest = &alu->dest.dest; - return true; -} diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 046729929a9..0faf816ec5b 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -418,10 +418,6 @@ pan_dest_index(nir_dest *dst) /* IR printing helpers */ void pan_print_alu_type(nir_alu_type t, FILE *fp); -/* Until it can be upstreamed.. */ -bool pan_has_source_mod(nir_alu_src *src, nir_op op); -bool pan_has_dest_mod(nir_dest **dest, nir_op op); - /* NIR passes to do some backend-specific lowering */ #define PAN_WRITEOUT_C 1 |