summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2024-01-16 17:25:39 -0800
committerMarge Bot <emma+marge@anholt.net>2024-03-12 21:31:30 +0000
commite7480f94c1bda89d6f263180ac0e3da4ab1fe235 (patch)
tree4a700dcc53938d38265dd0eb7b47d377ddd944e6
parentdd3bed1d92715a22a0abd37e4f559bbe6c5220c6 (diff)
intel/brw: Combine constants for src0 of integer multiply too
The majority of cases that would have been affected by this actually had both sources as integer constants. The earlier commit "intel/rt: Don't directly generate umul_32x16" allowed those to be constant folded. v2: Move the a*-1 block to be near the existing a*-1 block. No shader-db changes on any Intel platform. fossil-db results: All Intel platforms had similar results. (Ice Lake shown) Totals: Instrs: 165510246 -> 165510222 (-0.00%) Cycles: 15125198238 -> 15125195835 (-0.00%); split: -0.00%, +0.00% Totals from 46 (0.01% of 656118) affected shaders: Instrs: 36010 -> 35986 (-0.07%) Cycles: 2613658 -> 2611255 (-0.09%); split: -0.17%, +0.07% Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27552>
-rw-r--r--src/intel/compiler/brw_fs_combine_constants.cpp1
-rw-r--r--src/intel/compiler/brw_fs_copy_propagation.cpp16
-rw-r--r--src/intel/compiler/brw_fs_opt_algebraic.cpp21
3 files changed, 33 insertions, 5 deletions
diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp
index 3b6676449e9..329b1fb37e2 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -1378,6 +1378,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
case BRW_OPCODE_ASR:
case BRW_OPCODE_BFI1:
+ case BRW_OPCODE_MUL:
case BRW_OPCODE_ROL:
case BRW_OPCODE_ROR:
case BRW_OPCODE_SHL:
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index c923b7dd936..db62a269e82 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -1017,12 +1017,12 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
inst->src[arg] = val;
progress = true;
} else if (arg == 0 && inst->src[1].file != IMM) {
- /* Don't copy propagate the constant in situations like
+ /* We used to not copy propagate the constant in situations like
*
* mov(8) g8<1>D 0x7fffffffD
* mul(8) g16<1>D g8<8,8,1>D g15<16,8,2>W
*
- * On platforms that only have a 32x16 multiplier, this will
+ * On platforms that only have a 32x16 multiplier, this would
* result in lowering the multiply to
*
* mul(8) g15<1>D g14<8,8,1>D 0xffffUW
@@ -1030,7 +1030,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
* add(8) g15.1<2>UW g15.1<16,8,2>UW g16<16,8,2>UW
*
* On Gfx8 and Gfx9, which have the full 32x32 multiplier, it
- * results in
+ * would results in
*
* mul(8) g16<1>D g15<16,8,2>W 0x7fffffffD
*
@@ -1038,11 +1038,19 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
*
* When multiplying a DW and any lower precision integer, the
* DW operand must on src0.
+ *
+ * So it would have been invalid. However, brw_fs_combine_constants
+ * will now "fix" the constant.
*/
if (inst->opcode == BRW_OPCODE_MUL &&
type_sz(inst->src[1].type) < 4 &&
- type_sz(val.type) == 4)
+ (inst->src[0].type == BRW_REGISTER_TYPE_D ||
+ inst->src[0].type == BRW_REGISTER_TYPE_UD)) {
+ inst->src[0] = val;
+ inst->src[0].type = BRW_REGISTER_TYPE_D;
+ progress = true;
break;
+ }
/* Fit this constant in by commuting the operands.
* Exception: we can't do this for 32-bit integer MUL/MACH
diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp
index f9bf88a841c..aab17ae4bdd 100644
--- a/src/intel/compiler/brw_fs_opt_algebraic.cpp
+++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp
@@ -148,7 +148,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
break;
case BRW_OPCODE_MUL:
- if (inst->src[1].file != IMM)
+ if (inst->src[0].file != IMM && inst->src[1].file != IMM)
continue;
if (brw_reg_type_is_floating_point(inst->src[1].type))
@@ -177,6 +177,15 @@ brw_fs_opt_algebraic(fs_visitor &s)
inst->writes_accumulator_implicitly(devinfo)))
break;
+ if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->sources = 1;
+ inst->src[0] = brw_imm_d(0);
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+
/* a * 1.0 = a */
if (inst->src[1].is_one()) {
inst->opcode = BRW_OPCODE_MOV;
@@ -187,6 +196,16 @@ brw_fs_opt_algebraic(fs_visitor &s)
}
/* a * -1.0 = -a */
+ if (inst->src[0].is_negative_one()) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->sources = 1;
+ inst->src[0] = inst->src[1];
+ inst->src[0].negate = !inst->src[0].negate;
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+
if (inst->src[1].is_negative_one()) {
inst->opcode = BRW_OPCODE_MOV;
inst->sources = 1;