summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2024-03-18 22:52:35 -0700
committerKenneth Graunke <kenneth@whitecape.org>2024-03-20 01:04:17 -0700
commitea423aba1b45f90526149f1e0c190ce113ffa7b7 (patch)
tree27bb753ff6e476ea751e18a80b368ecee88bab37
parent831703157e7b272fcd1d36de2b3bcc8d5ec7d500 (diff)
intel/brw: Split out 64-bit lowering from algebraic optimizations
We don't necessarily want to split up MOVs for 64-bit addresses into 2x 32-bit MOVs right away, as this makes things like copy propagating the whole address around harder. We should do this late, once, while still doing other algebraic optimizations earlier. fossil-db results for Alchemist show tiny improvements: Totals: Instrs: 161310502 -> 161310436 (-0.00%); split: -0.00%, +0.00% Cycles: 14370605606 -> 14370605159 (-0.00%); split: -0.00%, +0.00% Totals from 33 (0.01% of 652298) affected shaders: Instrs: 15053 -> 14987 (-0.44%); split: -0.64%, +0.20% Cycles: 196947 -> 196500 (-0.23%); split: -0.25%, +0.02% Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28286>
-rw-r--r--src/intel/compiler/brw_fs.h1
-rw-r--r--src/intel/compiler/brw_fs_lower.cpp98
-rw-r--r--src/intel/compiler/brw_fs_opt.cpp7
-rw-r--r--src/intel/compiler/brw_fs_opt_algebraic.cpp67
4 files changed, 101 insertions, 72 deletions
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 895a449b1ac..a36ec900b71 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -593,6 +593,7 @@ void nir_to_brw(fs_visitor *s);
void brw_fs_optimize(fs_visitor &s);
bool brw_fs_lower_3src_null_dest(fs_visitor &s);
+bool brw_fs_lower_alu_restrictions(fs_visitor &s);
bool brw_fs_lower_barycentrics(fs_visitor &s);
bool brw_fs_lower_constant_loads(fs_visitor &s);
bool brw_fs_lower_derivatives(fs_visitor &s);
diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp
index ff038b3961a..c8ce381aa31 100644
--- a/src/intel/compiler/brw_fs_lower.cpp
+++ b/src/intel/compiler/brw_fs_lower.cpp
@@ -562,3 +562,101 @@ brw_fs_lower_3src_null_dest(fs_visitor &s)
return progress;
}
+/**
+ * Perform lowering to legalize the IR for various ALU restrictions.
+ *
+ * For example:
+ * - Splitting 64-bit MOV/SEL into 2x32-bit where needed
+ */
+bool
+brw_fs_lower_alu_restrictions(fs_visitor &s)
+{
+ const intel_device_info *devinfo = s.devinfo;
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ if (!devinfo->has_64bit_float &&
+ inst->dst.type == BRW_REGISTER_TYPE_DF) {
+ assert(inst->dst.type == inst->src[0].type);
+ assert(!inst->saturate);
+ assert(!inst->src[0].abs);
+ assert(!inst->src[0].negate);
+ const brw::fs_builder ibld(&s, block, inst);
+
+ if (!inst->is_partial_write())
+ ibld.emit_undef_for_dst(inst);
+
+ ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
+ ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
+
+ inst->remove(block);
+ progress = true;
+ }
+
+ if (!devinfo->has_64bit_int &&
+ (inst->dst.type == BRW_REGISTER_TYPE_UQ ||
+ inst->dst.type == BRW_REGISTER_TYPE_Q)) {
+ assert(inst->dst.type == inst->src[0].type);
+ assert(!inst->saturate);
+ assert(!inst->src[0].abs);
+ assert(!inst->src[0].negate);
+ const brw::fs_builder ibld(&s, block, inst);
+
+ if (!inst->is_partial_write())
+ ibld.emit_undef_for_dst(inst);
+
+ ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
+ ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
+
+ inst->remove(block);
+ progress = true;
+ }
+ break;
+
+ case BRW_OPCODE_SEL:
+ if (!devinfo->has_64bit_float &&
+ !devinfo->has_64bit_int &&
+ (inst->dst.type == BRW_REGISTER_TYPE_DF ||
+ inst->dst.type == BRW_REGISTER_TYPE_UQ ||
+ inst->dst.type == BRW_REGISTER_TYPE_Q)) {
+ assert(inst->dst.type == inst->src[0].type);
+ assert(!inst->saturate);
+ assert(!inst->src[0].abs && !inst->src[0].negate);
+ assert(!inst->src[1].abs && !inst->src[1].negate);
+ const brw::fs_builder ibld(&s, block, inst);
+
+ if (!inst->is_partial_write())
+ ibld.emit_undef_for_dst(inst);
+
+ set_predicate(inst->predicate,
+ ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
+ subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
+ set_predicate(inst->predicate,
+ ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
+ subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
+
+ inst->remove(block);
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (progress) {
+ s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
+ DEPENDENCY_INSTRUCTION_DETAIL);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp
index f7a1f2a618a..7835d060084 100644
--- a/src/intel/compiler/brw_fs_opt.cpp
+++ b/src/intel/compiler/brw_fs_opt.cpp
@@ -12,7 +12,6 @@ using namespace brw;
void
brw_fs_optimize(fs_visitor &s)
{
- const intel_device_info *devinfo = s.devinfo;
const nir_shader *nir = s.nir;
s.debug_optimizer(nir, "start", 0, 0);
@@ -123,15 +122,13 @@ brw_fs_optimize(fs_visitor &s)
if (OPT(brw_fs_lower_load_payload)) {
OPT(brw_fs_opt_split_virtual_grfs);
- /* Lower 64 bit MOVs generated by payload lowering. */
- if (!devinfo->has_64bit_float || !devinfo->has_64bit_int)
- OPT(brw_fs_opt_algebraic);
-
OPT(brw_fs_opt_register_coalesce);
OPT(brw_fs_lower_simd_width);
OPT(brw_fs_opt_dead_code_eliminate);
}
+ OPT(brw_fs_lower_alu_restrictions);
+
OPT(brw_fs_opt_combine_constants);
if (OPT(brw_fs_lower_integer_multiplication)) {
/* If lower_integer_multiplication made progress, it may have produced
diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp
index aab17ae4bdd..e92bf4c5727 100644
--- a/src/intel/compiler/brw_fs_opt_algebraic.cpp
+++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp
@@ -73,47 +73,6 @@ brw_fs_opt_algebraic(fs_visitor &s)
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
switch (inst->opcode) {
case BRW_OPCODE_MOV:
- if (!devinfo->has_64bit_float &&
- inst->dst.type == BRW_REGISTER_TYPE_DF) {
- assert(inst->dst.type == inst->src[0].type);
- assert(!inst->saturate);
- assert(!inst->src[0].abs);
- assert(!inst->src[0].negate);
- const brw::fs_builder ibld(&s, block, inst);
-
- if (!inst->is_partial_write())
- ibld.emit_undef_for_dst(inst);
-
- ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1),
- subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1));
- ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0),
- subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0));
-
- inst->remove(block);
- progress = true;
- }
-
- if (!devinfo->has_64bit_int &&
- (inst->dst.type == BRW_REGISTER_TYPE_UQ ||
- inst->dst.type == BRW_REGISTER_TYPE_Q)) {
- assert(inst->dst.type == inst->src[0].type);
- assert(!inst->saturate);
- assert(!inst->src[0].abs);
- assert(!inst->src[0].negate);
- const brw::fs_builder ibld(&s, block, inst);
-
- if (!inst->is_partial_write())
- ibld.emit_undef_for_dst(inst);
-
- ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
- subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
- ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
- subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
-
- inst->remove(block);
- progress = true;
- }
-
if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
inst->dst.is_null() &&
@@ -299,32 +258,6 @@ brw_fs_opt_algebraic(fs_visitor &s)
}
break;
case BRW_OPCODE_SEL:
- if (!devinfo->has_64bit_float &&
- !devinfo->has_64bit_int &&
- (inst->dst.type == BRW_REGISTER_TYPE_DF ||
- inst->dst.type == BRW_REGISTER_TYPE_UQ ||
- inst->dst.type == BRW_REGISTER_TYPE_Q)) {
- assert(inst->dst.type == inst->src[0].type);
- assert(!inst->saturate);
- assert(!inst->src[0].abs && !inst->src[0].negate);
- assert(!inst->src[1].abs && !inst->src[1].negate);
- const brw::fs_builder ibld(&s, block, inst);
-
- if (!inst->is_partial_write())
- ibld.emit_undef_for_dst(inst);
-
- set_predicate(inst->predicate,
- ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
- subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
- subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
- set_predicate(inst->predicate,
- ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
- subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
- subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
-
- inst->remove(block);
- progress = true;
- }
if (inst->src[0].equals(inst->src[1])) {
inst->opcode = BRW_OPCODE_MOV;
inst->sources = 1;