intel/brw: Combine constants for src0 of integer multiply too

The majority of cases that would have been affected by this actually had both sources as integer constants. The earlier commit "intel/rt: Don't directly generate umul_32x16" allowed those to be constant folded. v2: Move the a*-1 block to be near the existing a*-1 block. No shader-db changes on any Intel platform. fossil-db results: All Intel platforms had similar results. (Ice Lake shown) Totals: Instrs: 165510246 -> 165510222 (-0.00%) Cycles: 15125198238 -> 15125195835 (-0.00%); split: -0.00%, +0.00% Totals from 46 (0.01% of 656118) affected shaders: Instrs: 36010 -> 35986 (-0.07%) Cycles: 2613658 -> 2611255 (-0.09%); split: -0.17%, +0.07% Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27552>
author: Ian Romanick <ian.d.romanick@intel.com> 2024-01-16 17:25:39 -0800
committer: Marge Bot <emma+marge@anholt.net> 2024-03-12 21:31:30 +0000
commit: e7480f94c1bda89d6f263180ac0e3da4ab1fe235 (patch)
tree: 4a700dcc53938d38265dd0eb7b47d377ddd944e6
parent: dd3bed1d92715a22a0abd37e4f559bbe6c5220c6 (diff)
3 files changed, 33 insertions, 5 deletions
diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp
index 3b6676449e9..329b1fb37e2 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -1378,6 +1378,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
 
       case BRW_OPCODE_ASR:
       case BRW_OPCODE_BFI1:
+      case BRW_OPCODE_MUL:
       case BRW_OPCODE_ROL:
       case BRW_OPCODE_ROR:
       case BRW_OPCODE_SHL:
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index c923b7dd936..db62a269e82 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -1017,12 +1017,12 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
          inst->src[arg] = val;
          progress = true;
       } else if (arg == 0 && inst->src[1].file != IMM) {
-         /* Don't copy propagate the constant in situations like
+         /* We used to not copy propagate the constant in situations like
           *
           *    mov(8)          g8<1>D          0x7fffffffD
           *    mul(8)          g16<1>D         g8<8,8,1>D      g15<16,8,2>W
           *
-          * On platforms that only have a 32x16 multiplier, this will
+          * On platforms that only have a 32x16 multiplier, this would
           * result in lowering the multiply to
           *
           *    mul(8)          g15<1>D         g14<8,8,1>D     0xffffUW
@@ -1030,7 +1030,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
           *    add(8)          g15.1<2>UW      g15.1<16,8,2>UW g16<16,8,2>UW
           *
           * On Gfx8 and Gfx9, which have the full 32x32 multiplier, it
-          * results in
+          * would results in
           *
           *    mul(8)          g16<1>D         g15<16,8,2>W    0x7fffffffD
           *
@@ -1038,11 +1038,19 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
           *
           *    When multiplying a DW and any lower precision integer, the
           *    DW operand must on src0.
+          *
+          * So it would have been invalid. However, brw_fs_combine_constants
+          * will now "fix" the constant.
           */
          if (inst->opcode == BRW_OPCODE_MUL &&
              type_sz(inst->src[1].type) < 4 &&
-             type_sz(val.type) == 4)
+             (inst->src[0].type == BRW_REGISTER_TYPE_D ||
+              inst->src[0].type == BRW_REGISTER_TYPE_UD)) {
+            inst->src[0] = val;
+            inst->src[0].type = BRW_REGISTER_TYPE_D;
+            progress = true;
             break;
+         }
 
          /* Fit this constant in by commuting the operands.
           * Exception: we can't do this for 32-bit integer MUL/MACH
diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp
index f9bf88a841c..aab17ae4bdd 100644
--- a/src/intel/compiler/brw_fs_opt_algebraic.cpp
+++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp
@@ -148,7 +148,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
          break;
 
       case BRW_OPCODE_MUL:
-         if (inst->src[1].file != IMM)
+         if (inst->src[0].file != IMM && inst->src[1].file != IMM)
             continue;
 
          if (brw_reg_type_is_floating_point(inst->src[1].type))
@@ -177,6 +177,15 @@ brw_fs_opt_algebraic(fs_visitor &s)
               inst->writes_accumulator_implicitly(devinfo)))
             break;
 
+         if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->sources = 1;
+            inst->src[0] = brw_imm_d(0);
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+
          /* a * 1.0 = a */
          if (inst->src[1].is_one()) {
             inst->opcode = BRW_OPCODE_MOV;
@@ -187,6 +196,16 @@ brw_fs_opt_algebraic(fs_visitor &s)
          }
 
          /* a * -1.0 = -a */
+         if (inst->src[0].is_negative_one()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->sources = 1;
+            inst->src[0] = inst->src[1];
+            inst->src[0].negate = !inst->src[0].negate;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+
          if (inst->src[1].is_negative_one()) {
             inst->opcode = BRW_OPCODE_MOV;
             inst->sources = 1;
author	Ian Romanick <ian.d.romanick@intel.com>	2024-01-16 17:25:39 -0800
committer	Marge Bot <emma+marge@anholt.net>	2024-03-12 21:31:30 +0000
commit	e7480f94c1bda89d6f263180ac0e3da4ab1fe235 (patch)
tree	4a700dcc53938d38265dd0eb7b47d377ddd944e6
parent	dd3bed1d92715a22a0abd37e4f559bbe6c5220c6 (diff)