diff options
author | Kenneth Graunke <kenneth@whitecape.org> | 2024-03-18 22:52:35 -0700 |
---|---|---|
committer | Kenneth Graunke <kenneth@whitecape.org> | 2024-03-20 01:04:17 -0700 |
commit | ea423aba1b45f90526149f1e0c190ce113ffa7b7 (patch) | |
tree | 27bb753ff6e476ea751e18a80b368ecee88bab37 | |
parent | 831703157e7b272fcd1d36de2b3bcc8d5ec7d500 (diff) |
intel/brw: Split out 64-bit lowering from algebraic optimizations
We don't necessarily want to split up MOVs for 64-bit addresses into
2x 32-bit MOVs right away, as this makes things like copy propagating
the whole address around harder. We should do this late, once, while
still doing other algebraic optimizations earlier.
fossil-db results for Alchemist show tiny improvements:
Totals:
Instrs: 161310502 -> 161310436 (-0.00%); split: -0.00%, +0.00%
Cycles: 14370605606 -> 14370605159 (-0.00%); split: -0.00%, +0.00%
Totals from 33 (0.01% of 652298) affected shaders:
Instrs: 15053 -> 14987 (-0.44%); split: -0.64%, +0.20%
Cycles: 196947 -> 196500 (-0.23%); split: -0.25%, +0.02%
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28286>
-rw-r--r-- | src/intel/compiler/brw_fs.h | 1 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_lower.cpp | 98 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_opt.cpp | 7 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_opt_algebraic.cpp | 67 |
4 files changed, 101 insertions, 72 deletions
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 895a449b1ac..a36ec900b71 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -593,6 +593,7 @@ void nir_to_brw(fs_visitor *s); void brw_fs_optimize(fs_visitor &s); bool brw_fs_lower_3src_null_dest(fs_visitor &s); +bool brw_fs_lower_alu_restrictions(fs_visitor &s); bool brw_fs_lower_barycentrics(fs_visitor &s); bool brw_fs_lower_constant_loads(fs_visitor &s); bool brw_fs_lower_derivatives(fs_visitor &s); diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index ff038b3961a..c8ce381aa31 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -562,3 +562,101 @@ brw_fs_lower_3src_null_dest(fs_visitor &s) return progress; } +/** + * Perform lowering to legalize the IR for various ALU restrictions. + * + * For example: + * - Splitting 64-bit MOV/SEL into 2x32-bit where needed + */ +bool +brw_fs_lower_alu_restrictions(fs_visitor &s) +{ + const intel_device_info *devinfo = s.devinfo; + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { + switch (inst->opcode) { + case BRW_OPCODE_MOV: + if (!devinfo->has_64bit_float && + inst->dst.type == BRW_REGISTER_TYPE_DF) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs); + assert(!inst->src[0].negate); + const brw::fs_builder ibld(&s, block, inst); + + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1)); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0)); + + inst->remove(block); + progress = true; + } + + if (!devinfo->has_64bit_int && + (inst->dst.type == BRW_REGISTER_TYPE_UQ || + inst->dst.type == BRW_REGISTER_TYPE_Q)) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs); + assert(!inst->src[0].negate); + const brw::fs_builder ibld(&s, block, inst); + + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1)); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0)); + + inst->remove(block); + progress = true; + } + break; + + case BRW_OPCODE_SEL: + if (!devinfo->has_64bit_float && + !devinfo->has_64bit_int && + (inst->dst.type == BRW_REGISTER_TYPE_DF || + inst->dst.type == BRW_REGISTER_TYPE_UQ || + inst->dst.type == BRW_REGISTER_TYPE_Q)) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs && !inst->src[0].negate); + assert(!inst->src[1].abs && !inst->src[1].negate); + const brw::fs_builder ibld(&s, block, inst); + + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + + set_predicate(inst->predicate, + ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0))); + set_predicate(inst->predicate, + ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1))); + + inst->remove(block); + progress = true; + } + break; + + default: + break; + } + } + + if (progress) { + s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | + DEPENDENCY_INSTRUCTION_DETAIL); + } + + return progress; +} diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp index f7a1f2a618a..7835d060084 100644 --- a/src/intel/compiler/brw_fs_opt.cpp +++ b/src/intel/compiler/brw_fs_opt.cpp @@ -12,7 +12,6 @@ using namespace brw; void brw_fs_optimize(fs_visitor &s) { - const intel_device_info *devinfo = s.devinfo; const nir_shader *nir = s.nir; s.debug_optimizer(nir, "start", 0, 0); @@ -123,15 +122,13 @@ brw_fs_optimize(fs_visitor &s) if (OPT(brw_fs_lower_load_payload)) { OPT(brw_fs_opt_split_virtual_grfs); - /* Lower 64 bit MOVs generated by payload lowering. */ - if (!devinfo->has_64bit_float || !devinfo->has_64bit_int) - OPT(brw_fs_opt_algebraic); - OPT(brw_fs_opt_register_coalesce); OPT(brw_fs_lower_simd_width); OPT(brw_fs_opt_dead_code_eliminate); } + OPT(brw_fs_lower_alu_restrictions); + OPT(brw_fs_opt_combine_constants); if (OPT(brw_fs_lower_integer_multiplication)) { /* If lower_integer_multiplication made progress, it may have produced diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp index aab17ae4bdd..e92bf4c5727 100644 --- a/src/intel/compiler/brw_fs_opt_algebraic.cpp +++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp @@ -73,47 +73,6 @@ brw_fs_opt_algebraic(fs_visitor &s) foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { switch (inst->opcode) { case BRW_OPCODE_MOV: - if (!devinfo->has_64bit_float && - inst->dst.type == BRW_REGISTER_TYPE_DF) { - assert(inst->dst.type == inst->src[0].type); - assert(!inst->saturate); - assert(!inst->src[0].abs); - assert(!inst->src[0].negate); - const brw::fs_builder ibld(&s, block, inst); - - if (!inst->is_partial_write()) - ibld.emit_undef_for_dst(inst); - - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1), - subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1)); - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0), - subscript(inst->src[0], BRW_REGISTER_TYPE_F, 0)); - - inst->remove(block); - progress = true; - } - - if (!devinfo->has_64bit_int && - (inst->dst.type == BRW_REGISTER_TYPE_UQ || - inst->dst.type == BRW_REGISTER_TYPE_Q)) { - assert(inst->dst.type == inst->src[0].type); - assert(!inst->saturate); - assert(!inst->src[0].abs); - assert(!inst->src[0].negate); - const brw::fs_builder ibld(&s, block, inst); - - if (!inst->is_partial_write()) - ibld.emit_undef_for_dst(inst); - - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1)); - ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0)); - - inst->remove(block); - progress = true; - } - if ((inst->conditional_mod == BRW_CONDITIONAL_Z || inst->conditional_mod == BRW_CONDITIONAL_NZ) && inst->dst.is_null() && @@ -299,32 +258,6 @@ brw_fs_opt_algebraic(fs_visitor &s) } break; case BRW_OPCODE_SEL: - if (!devinfo->has_64bit_float && - !devinfo->has_64bit_int && - (inst->dst.type == BRW_REGISTER_TYPE_DF || - inst->dst.type == BRW_REGISTER_TYPE_UQ || - inst->dst.type == BRW_REGISTER_TYPE_Q)) { - assert(inst->dst.type == inst->src[0].type); - assert(!inst->saturate); - assert(!inst->src[0].abs && !inst->src[0].negate); - assert(!inst->src[1].abs && !inst->src[1].negate); - const brw::fs_builder ibld(&s, block, inst); - - if (!inst->is_partial_write()) - ibld.emit_undef_for_dst(inst); - - set_predicate(inst->predicate, - ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0), - subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0))); - set_predicate(inst->predicate, - ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), - subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1), - subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1))); - - inst->remove(block); - progress = true; - } if (inst->src[0].equals(inst->src[1])) { inst->opcode = BRW_OPCODE_MOV; inst->sources = 1; |