summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2024-03-14 00:32:25 -0700
committerKenneth Graunke <kenneth@whitecape.org>2024-03-20 01:04:22 -0700
commita075b4449319966397f69ebdc220ee06000665b6 (patch)
treeb5f6dedf2779e2fdb5c2a72b69c7be8e4a0e57a9
parent5814534de57444475cc3e20c88c4110e7629d9de (diff)
intel/brw: Eliminate top-level FIND_LIVE_CHANNEL & BROADCAST once
brw_fs_opt_eliminate_find_live_channel eliminates FIND_LIVE_CHANNEL outside of control flow. None of our optimization passes generate additional cases of that instruction, so once it's gone, we shouldn't ever have to run the pass again. Moving it out of the loop should save a bit of CPU time. While we're at it, also clean adjacent BROADCAST instructions that consume the result of our FIND_LIVE_CHANNEL. Without this, we have to perform copy propagation to get the MOV 0 immediate into the BROADCAST, then algebraic to turn it into a MOV, which enables more copy propagation...not to mention CSE gets involved. Since this FIND_LIVE_CHANNEL + BROADCAST pattern from emit_uniformize() is really common, and it's trivial to clean up, we can do that. This lets the initial copy prop in the loop see MOV instead of BROADCAST. Zero impact on fossil-db, but less work in the optimization loop. Together with the previous patches, this cuts compile time in Borderlands 3 on Alchemist by -1.38539% +/- 0.1632% (n = 24). Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28286>
-rw-r--r--src/intel/compiler/brw_fs_opt.cpp23
1 files changed, 22 insertions, 1 deletions
diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp
index 7835d060084..e9abeef4098 100644
--- a/src/intel/compiler/brw_fs_opt.cpp
+++ b/src/intel/compiler/brw_fs_opt.cpp
@@ -56,6 +56,8 @@ brw_fs_optimize(fs_visitor &s)
OPT(brw_fs_opt_remove_extra_rounding_modes);
+ OPT(brw_fs_opt_eliminate_find_live_channel);
+
do {
progress = false;
pass_num = 0;
@@ -71,7 +73,6 @@ brw_fs_optimize(fs_visitor &s)
OPT(brw_fs_opt_dead_control_flow_eliminate);
OPT(brw_fs_opt_saturate_propagation);
OPT(brw_fs_opt_register_coalesce);
- OPT(brw_fs_opt_eliminate_find_live_channel);
OPT(brw_fs_opt_compact_virtual_grfs);
} while (progress);
@@ -430,6 +431,26 @@ brw_fs_opt_eliminate_find_live_channel(fs_visitor &s)
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
+
+ /* emit_uniformize() frequently emits FIND_LIVE_CHANNEL paired
+ * with a BROADCAST. Save some work for opt_copy_propagation
+ * and opt_algebraic by trivially cleaning up both together.
+ */
+ assert(!inst->next->is_tail_sentinel());
+ fs_inst *bcast = (fs_inst *) inst->next;
+
+ /* Ignore stride when comparing */
+ if (bcast->opcode == SHADER_OPCODE_BROADCAST &&
+ inst->dst.file == VGRF &&
+ inst->dst.file == bcast->src[1].file &&
+ inst->dst.nr == bcast->src[1].nr &&
+ inst->dst.offset == bcast->src[1].offset) {
+ bcast->opcode = BRW_OPCODE_MOV;
+ if (!is_uniform(bcast->src[0]))
+ bcast->src[0] = component(bcast->src[0], 0);
+ bcast->sources = 1;
+ bcast->force_writemask_all = true;
+ }
}
break;