diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2024-01-16 17:25:39 -0800 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-03-12 21:31:30 +0000 |
commit | e7480f94c1bda89d6f263180ac0e3da4ab1fe235 (patch) | |
tree | 4a700dcc53938d38265dd0eb7b47d377ddd944e6 | |
parent | dd3bed1d92715a22a0abd37e4f559bbe6c5220c6 (diff) |
intel/brw: Combine constants for src0 of integer multiply too
The majority of cases that would have been affected by this actually
had both sources as integer constants. The earlier commit "intel/rt:
Don't directly generate umul_32x16" allowed those to be constant
folded.
v2: Move the a*-1 block to be near the existing a*-1 block.
No shader-db changes on any Intel platform.
fossil-db results:
All Intel platforms had similar results. (Ice Lake shown)
Totals:
Instrs: 165510246 -> 165510222 (-0.00%)
Cycles: 15125198238 -> 15125195835 (-0.00%); split: -0.00%, +0.00%
Totals from 46 (0.01% of 656118) affected shaders:
Instrs: 36010 -> 35986 (-0.07%)
Cycles: 2613658 -> 2611255 (-0.09%); split: -0.17%, +0.07%
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27552>
-rw-r--r-- | src/intel/compiler/brw_fs_combine_constants.cpp | 1 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_copy_propagation.cpp | 16 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_opt_algebraic.cpp | 21 |
3 files changed, 33 insertions, 5 deletions
diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index 3b6676449e9..329b1fb37e2 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -1378,6 +1378,7 @@ brw_fs_opt_combine_constants(fs_visitor &s) case BRW_OPCODE_ASR: case BRW_OPCODE_BFI1: + case BRW_OPCODE_MUL: case BRW_OPCODE_ROL: case BRW_OPCODE_ROR: case BRW_OPCODE_SHL: diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index c923b7dd936..db62a269e82 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -1017,12 +1017,12 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, inst->src[arg] = val; progress = true; } else if (arg == 0 && inst->src[1].file != IMM) { - /* Don't copy propagate the constant in situations like + /* We used to not copy propagate the constant in situations like * * mov(8) g8<1>D 0x7fffffffD * mul(8) g16<1>D g8<8,8,1>D g15<16,8,2>W * - * On platforms that only have a 32x16 multiplier, this will + * On platforms that only have a 32x16 multiplier, this would * result in lowering the multiply to * * mul(8) g15<1>D g14<8,8,1>D 0xffffUW @@ -1030,7 +1030,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, * add(8) g15.1<2>UW g15.1<16,8,2>UW g16<16,8,2>UW * * On Gfx8 and Gfx9, which have the full 32x32 multiplier, it - * results in + * would results in * * mul(8) g16<1>D g15<16,8,2>W 0x7fffffffD * @@ -1038,11 +1038,19 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, * * When multiplying a DW and any lower precision integer, the * DW operand must on src0. + * + * So it would have been invalid. However, brw_fs_combine_constants + * will now "fix" the constant. */ if (inst->opcode == BRW_OPCODE_MUL && type_sz(inst->src[1].type) < 4 && - type_sz(val.type) == 4) + (inst->src[0].type == BRW_REGISTER_TYPE_D || + inst->src[0].type == BRW_REGISTER_TYPE_UD)) { + inst->src[0] = val; + inst->src[0].type = BRW_REGISTER_TYPE_D; + progress = true; break; + } /* Fit this constant in by commuting the operands. * Exception: we can't do this for 32-bit integer MUL/MACH diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp index f9bf88a841c..aab17ae4bdd 100644 --- a/src/intel/compiler/brw_fs_opt_algebraic.cpp +++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp @@ -148,7 +148,7 @@ brw_fs_opt_algebraic(fs_visitor &s) break; case BRW_OPCODE_MUL: - if (inst->src[1].file != IMM) + if (inst->src[0].file != IMM && inst->src[1].file != IMM) continue; if (brw_reg_type_is_floating_point(inst->src[1].type)) @@ -177,6 +177,15 @@ brw_fs_opt_algebraic(fs_visitor &s) inst->writes_accumulator_implicitly(devinfo))) break; + if (inst->src[0].is_zero() || inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0] = brw_imm_d(0); + inst->src[1] = reg_undef; + progress = true; + break; + } + /* a * 1.0 = a */ if (inst->src[1].is_one()) { inst->opcode = BRW_OPCODE_MOV; @@ -187,6 +196,16 @@ brw_fs_opt_algebraic(fs_visitor &s) } /* a * -1.0 = -a */ + if (inst->src[0].is_negative_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0] = inst->src[1]; + inst->src[0].negate = !inst->src[0].negate; + inst->src[1] = reg_undef; + progress = true; + break; + } + if (inst->src[1].is_negative_one()) { inst->opcode = BRW_OPCODE_MOV; inst->sources = 1; |