summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2023-03-02 14:47:30 +0100
committerMarge Bot <emma+marge@anholt.net>2024-04-26 12:55:13 +0000
commitce6c4f03201b5046b5bed74934b52a7a874288d2 (patch)
treef4b15906c1ef71ccdeab76b68589867d28a5d959
parent4c4234501fb6281ff0a4c235b0ba7cb0fda4325c (diff)
ir3: Add scalar ALU-specific passes
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
-rw-r--r--src/freedreno/ir3/ir3.h21
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c3
-rw-r--r--src/freedreno/ir3/ir3_lower_shared_phi.c134
-rw-r--r--src/freedreno/ir3/ir3_shared_folding.c150
-rw-r--r--src/freedreno/ir3/meson.build2
5 files changed, 310 insertions, 0 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index f5a303e3773..52df6e2d287 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -2056,6 +2056,9 @@ bool ir3_remove_unreachable(struct ir3 *ir);
/* calculate reconvergence information: */
void ir3_calc_reconvergence(struct ir3_shader_variant *so);
+/* lower invalid shared phis after calculating reconvergence information: */
+bool ir3_lower_shared_phis(struct ir3 *ir);
+
/* dead code elimination: */
struct ir3_shader_variant;
bool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so);
@@ -2063,6 +2066,9 @@ bool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so);
/* fp16 conversion folding */
bool ir3_cf(struct ir3 *ir);
+/* shared mov folding */
+bool ir3_shared_fold(struct ir3 *ir);
+
/* copy-propagate: */
bool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
@@ -2121,6 +2127,21 @@ ir3_has_latency_to_hide(struct ir3 *ir)
return false;
}
+/**
+ * Move 'instr' to after the last phi node at the beginning of the block:
+ */
+static inline void
+ir3_instr_move_after_phis(struct ir3_instruction *instr,
+ struct ir3_block *block)
+{
+ struct ir3_instruction *last_phi = ir3_block_get_last_phi(block);
+ if (last_phi)
+ ir3_instr_move_after(instr, last_phi);
+ else
+ ir3_instr_move_before_block(instr, block);
+}
+
+
/* ************************************************************************* */
/* instruction helpers */
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index e99ed4eb0f1..6a47e0dd21c 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -5182,6 +5182,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ir3_calc_reconvergence(so);
+ IR3_PASS(ir, ir3_lower_shared_phis);
+
do {
progress = false;
@@ -5192,6 +5194,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
progress |= IR3_PASS(ir, ir3_cse);
progress |= IR3_PASS(ir, ir3_dce, so);
progress |= IR3_PASS(ir, ir3_opt_predicates, so);
+ progress |= IR3_PASS(ir, ir3_shared_fold);
} while (progress);
/* at this point, for binning pass, throw away unneeded outputs:
diff --git a/src/freedreno/ir3/ir3_lower_shared_phi.c b/src/freedreno/ir3/ir3_lower_shared_phi.c
new file mode 100644
index 00000000000..12d3bc2960c
--- /dev/null
+++ b/src/freedreno/ir3/ir3_lower_shared_phi.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2023 Valve Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3.h"
+#include "util/ralloc.h"
+
+/* RA cannot handle phis of shared registers where there are extra physical
+ * sources, or the sources have extra physical destinations, because these edges
+ * are critical edges that we cannot resolve copies along. Here's a contrived
+ * example:
+ *
+ * loop {
+ * if non-uniform {
+ * if uniform {
+ * x_1 = ...;
+ * continue;
+ * }
+ * x_2 = ...;
+ * } else {
+ * break;
+ * }
+ * // continue block
+ * x_3 = phi(x_1, x_2)
+ * }
+ *
+ * Assuming x_1 and x_2 are uniform, x_3 will also be uniform, because all
+ * threads that stay in the loop take the same branch to the continue block,
+ * however execution may fall through from the assignment to x_2 to the
+ * break statement because the outer if is non-uniform, and then it will fall
+ * through again to the continue block. In cases like this we have to demote the
+ * phi to normal registers and insert movs around it (which will probably be
+ * coalesced).
+ */
+
+static void
+lower_phi(void *ctx, struct ir3_instruction *phi)
+{
+ struct ir3_block *block = phi->block;
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ if (phi->srcs[i]->def) {
+ struct ir3_instruction *pred_mov = ir3_instr_create(pred, OPC_MOV, 1, 1);
+ pred_mov->uses = _mesa_pointer_set_create(ctx);
+ __ssa_dst(pred_mov)->flags |= (phi->srcs[i]->flags & IR3_REG_HALF);
+ unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
+ (phi->srcs[i]->flags & IR3_REG_HALF);
+ ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
+ phi->srcs[i]->def;
+ pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
+ (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+
+ _mesa_set_remove_key(phi->srcs[i]->def->instr->uses, phi);
+ _mesa_set_add(phi->srcs[i]->def->instr->uses, pred_mov);
+ phi->srcs[i]->def = pred_mov->dsts[0];
+ }
+ phi->srcs[i]->flags &= ~IR3_REG_SHARED;
+ }
+
+ phi->dsts[0]->flags &= ~IR3_REG_SHARED;
+
+ struct ir3_instruction *shared_mov =
+ ir3_MOV(block, phi,
+ (phi->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
+ shared_mov->uses = _mesa_pointer_set_create(ctx);
+ shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
+ ir3_instr_move_after_phis(shared_mov, block);
+
+ foreach_ssa_use (use, phi) {
+ for (unsigned i = 0; i < use->srcs_count; i++) {
+ if (use->srcs[i]->def == phi->dsts[0])
+ use->srcs[i]->def = shared_mov->dsts[0];
+ }
+ }
+}
+
+bool
+ir3_lower_shared_phis(struct ir3 *ir)
+{
+ void *mem_ctx = ralloc_context(NULL);
+ bool progress = false;
+
+ ir3_find_ssa_uses(ir, mem_ctx, false);
+
+ foreach_block (block, &ir->block_list) {
+ bool pred_physical_edge = false;
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ unsigned successors_count =
+ block->predecessors[i]->successors[1] ? 2 : 1;
+ if (block->predecessors[i]->physical_successors_count > successors_count) {
+ pred_physical_edge = true;
+ break;
+ }
+ }
+
+ if (!pred_physical_edge &&
+ block->physical_predecessors_count == block->predecessors_count)
+ continue;
+
+ foreach_instr_safe (phi, &block->instr_list) {
+ if (phi->opc != OPC_META_PHI)
+ break;
+
+ if (!(phi->dsts[0]->flags & IR3_REG_SHARED))
+ continue;
+
+ lower_phi(mem_ctx, phi);
+ progress = true;
+ }
+ }
+
+ ralloc_free(mem_ctx);
+ return progress;
+}
+
diff --git a/src/freedreno/ir3/ir3_shared_folding.c b/src/freedreno/ir3/ir3_shared_folding.c
new file mode 100644
index 00000000000..29b3f28ea33
--- /dev/null
+++ b/src/freedreno/ir3/ir3_shared_folding.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2023 Valve Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Try to fold a shared -> non-shared mov into the instruction producing the
+ * shared src. We do this aggresively, even if there are other uses of the
+ * source, on the assumption that the "default" state should be non-shared and
+ * we should be able to fold the other sources eventually.
+ */
+
+#include "util/ralloc.h"
+
+#include "ir3.h"
+
+static bool
+try_shared_folding(struct ir3_instruction *mov, void *mem_ctx)
+{
+ if (mov->opc != OPC_MOV)
+ return false;
+
+ if ((mov->dsts[0]->flags & IR3_REG_SHARED) ||
+ !(mov->srcs[0]->flags & IR3_REG_SHARED))
+ return false;
+
+ struct ir3_instruction *src = ssa(mov->srcs[0]);
+ if (!src)
+ return false;
+
+ if (mov->cat1.dst_type != mov->cat1.src_type) {
+ /* Check if the conversion can be folded into the source by ir3_cf */
+ bool can_fold;
+ type_t output_type = ir3_output_conv_type(src, &can_fold);
+ if (!can_fold || output_type != TYPE_U32)
+ return false;
+ foreach_ssa_use (use, src) {
+ if (use->opc != OPC_MOV ||
+ use->cat1.src_type != mov->cat1.src_type ||
+ use->cat1.dst_type != mov->cat1.dst_type)
+ return false;
+ }
+ }
+
+ if (src->opc == OPC_META_PHI) {
+ struct ir3_block *block = src->block;
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ if (src->srcs[i]->def) {
+ struct ir3_instruction *pred_mov = ir3_instr_create(pred, OPC_MOV, 1, 1);
+ __ssa_dst(pred_mov)->flags |= (src->srcs[i]->flags & IR3_REG_HALF);
+ unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
+ (src->srcs[i]->flags & IR3_REG_HALF);
+ ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
+ src->srcs[i]->def;
+ pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
+ (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+
+ _mesa_set_remove_key(src->srcs[i]->def->instr->uses, src);
+ _mesa_set_add(src->srcs[i]->def->instr->uses, pred_mov);
+ src->srcs[i]->def = pred_mov->dsts[0];
+ }
+ src->srcs[i]->flags &= ~IR3_REG_SHARED;
+ }
+ } else if (opc_cat(src->opc) == 2 && src->srcs_count >= 2) {
+ /* cat2 vector ALU instructions cannot have both shared sources */
+ if ((src->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_CONST)) &&
+ (src->srcs[1]->flags & (IR3_REG_SHARED | IR3_REG_CONST)))
+ return false;
+ } else if (opc_cat(src->opc) == 3) {
+ /* cat3 vector ALU instructions cannot have src1 shared */
+ if (src->srcs[1]->flags & IR3_REG_SHARED)
+ return false;
+ } else if (src->opc == OPC_LDC) {
+ src->flags &= ~IR3_INSTR_U;
+ } else {
+ return false;
+ }
+
+ /* Remove IR3_REG_SHARED from the original destination, which should make the
+ * mov trivial so that it can be cleaned up later by copy prop.
+ */
+ src->dsts[0]->flags &= ~IR3_REG_SHARED;
+ mov->srcs[0]->flags &= ~IR3_REG_SHARED;
+
+ /* Insert a copy to shared for uses other than this move instruction. */
+ struct ir3_instruction *shared_mov = NULL;
+ foreach_ssa_use (use, src) {
+ if (use == mov)
+ continue;
+
+ if (!shared_mov) {
+ shared_mov = ir3_MOV(src->block, src, mov->cat1.src_type);
+ shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
+ if (src->opc == OPC_META_PHI)
+ ir3_instr_move_after_phis(shared_mov, src->block);
+ else
+ ir3_instr_move_after(shared_mov, src);
+ shared_mov->uses = _mesa_pointer_set_create(mem_ctx);
+ }
+
+ for (unsigned i = 0; i < use->srcs_count; i++) {
+ if (use->srcs[i]->def == src->dsts[0])
+ use->srcs[i]->def = shared_mov->dsts[0];
+ }
+ _mesa_set_add(shared_mov->uses, use);
+ }
+
+ return true;
+}
+
+bool
+ir3_shared_fold(struct ir3 *ir)
+{
+ void *mem_ctx = ralloc_context(NULL);
+ bool progress = false;
+
+ ir3_find_ssa_uses(ir, mem_ctx, false);
+
+ /* Folding a phi can push the mov up to its sources, so iterate blocks in
+ * reverse to try and convert an entire phi-web in one go.
+ */
+ foreach_block_rev (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ progress |= try_shared_folding(instr, mem_ctx);
+ }
+ }
+
+ ralloc_free(mem_ctx);
+
+ return progress;
+}
+
diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build
index adad6ee9a32..7351c236174 100644
--- a/src/freedreno/ir3/meson.build
+++ b/src/freedreno/ir3/meson.build
@@ -97,6 +97,7 @@ libfreedreno_ir3_files = files(
'ir3_legalize_relative.c',
'ir3_liveness.c',
'ir3_lower_parallelcopy.c',
+ 'ir3_lower_shared_phi.c',
'ir3_lower_spill.c',
'ir3_lower_subgroups.c',
'ir3_merge_regs.c',
@@ -127,6 +128,7 @@ libfreedreno_ir3_files = files(
'ir3_sched.c',
'ir3_shader.c',
'ir3_shader.h',
+ 'ir3_shared_folding.c',
'ir3_shared_ra.c',
'ir3_spill.c',
'ir3_validate.c',