diff options
author | Gert Wollny <gert.wollny@collabora.com> | 2022-12-07 18:16:57 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-12-09 08:26:31 +0000 |
commit | 18a8d148d862d4a0f5b83f4385f00cf8a4a71b6d (patch) | |
tree | fa4bb0ac102d0138ae82da1eb5e07ce74c893a78 | |
parent | 38da65c44559a89b91eeed76c5f594bbe9a99760 (diff) |
r600/sfn: Cleanup copy-prop into vec4 source values
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20205>
-rw-r--r-- | src/gallium/drivers/r600/sfn/sfn_optimizer.cpp | 152 |
1 files changed, 96 insertions, 56 deletions
diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp index 6bf5aff84a1..31b43069877 100644 --- a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp @@ -438,103 +438,143 @@ static bool register_chan_is_pinned(Pin pin) void -CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr) +CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr) { + /* Collect parent instructions - only ALU move without modifiers + * and without indirect access are allowed. */ AluInstr *parents[4] = {nullptr}; + bool have_candidates = false; for (int i = 0; i < 4; ++i) { - if (src[i]->chan() < 4 && src[i]->has_flag(Register::ssa)) { + if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) { /* We have a pre-define value, so we can't propagate a copy */ - if (src[i]->parents().empty()) + if (value[i]->parents().empty()) return; - assert(src[i]->parents().size() == 1); - parents[i] = (*src[i]->parents().begin())->as_alu(); + assert(value[i]->parents().size() == 1); + parents[i] = (*value[i]->parents().begin())->as_alu(); + + /* Parent op is not an ALU instruction, so we can't + copy-propagate */ + if (!parents[i]) + return; + + if ((parents[i]->opcode() != op1_mov) || + parents[i]->has_alu_flag(alu_src0_neg) || + parents[i]->has_alu_flag(alu_src0_abs) || + parents[i]->has_alu_flag(alu_dst_clamp) || + parents[i]->has_alu_flag(alu_src0_rel) || + std::get<0>(parents[i]->indirect_addr())) + return; + have_candidates = true; } } + + if (!have_candidates) + return; + + /* Collect the new source registers. We may have to move all registers + * to a new virtual sel index. */ + PRegister new_src[4] = {0}; + int new_chan[4] = {0,0,0,0}; - uint8_t mask = 0; + uint8_t used_chan_mask = 0; int new_sel = -1; bool all_sel_can_change = true; bool is_ssa = true; - int new_chan[4] = {0,0,0,0}; for (int i = 0; i < 4; ++i) { - unsigned allowed_mask = 0xf & ~mask; + + /* No parent means we either ignore the channel or insert 0 or 1.*/ if (!parents[i]) continue; - if ((parents[i]->opcode() != op1_mov) || parents[i]->has_alu_flag(alu_src0_neg) || - parents[i]->has_alu_flag(alu_src0_abs) || - parents[i]->has_alu_flag(alu_dst_clamp) || - parents[i]->has_alu_flag(alu_src0_rel) || - std::get<0>(parents[i]->indirect_addr())) { + + unsigned allowed_mask = 0xf & ~used_chan_mask; + + auto src = parents[i]->src(0).as_register(); + if (!src) return; - } else { - auto src = parents[i]->src(0).as_register(); - if (!src) - return; - if (src->pin() == pin_array) - return; - if (!src->has_flag(Register::ssa) && - !assigned_register_direct(src)) { - return; - } - if (register_chan_is_pinned(src->pin())) { - allowed_mask = 1 << src->chan(); - } - new_chan[i] = src->chan(); - for (auto p : src->parents()) { - auto alu = p->as_alu(); - if (alu) - allowed_mask &= alu->allowed_dest_chan_mask(); - } - if (!allowed_mask) { - return; - } + /* Don't accept an array element for now, we would need extra checking + * that the value is not overwritten by an indirect access */ + if (src->pin() == pin_array) + return; - if (new_sel < 0) { - new_sel = src->sel(); - is_ssa = src->has_flag(Register::ssa); - new_chan[i] = src->chan(); - } else if (new_sel != src->sel()) { - if (all_sel_can_change && - register_sel_can_change(src->pin()) && - (is_ssa == src->has_flag(Register::ssa))) { - new_chan[i] = u_bit_scan(&allowed_mask); - new_sel = value_factory.new_register_index(); - } else - return; - } + /* Is this check still needed ? */ + if (!src->has_flag(Register::ssa) && + !assigned_register_direct(src)) { + return; + } - new_src[i] = src; - mask |= 1 << new_chan[i]; - if (!register_sel_can_change(src->pin())) - all_sel_can_change = false; + /* If the channel chan't switch we have to update the channel mask + * TODO: assign channel pinned registers first might give more + * opportunities for this optimization */ + if (register_chan_is_pinned(src->pin())) + allowed_mask = 1 << src->chan(); + + /* Update the possible channel mask based on the sourcee's parent + * instruction(s) */ + for (auto p : src->parents()) { + auto alu = p->as_alu(); + if (alu) + allowed_mask &= alu->allowed_dest_chan_mask(); } + + if (!allowed_mask) + return; + + /* Prefer keeping the channel, but if that's not possible + * i.e. if the sel has to change, then pick the next free channel + * (see below) */ + new_chan[i] = src->chan(); + + if (new_sel < 0) { + new_sel = src->sel(); + is_ssa = src->has_flag(Register::ssa); + } else if (new_sel != src->sel()) { + /* If we have to assign a new register sel index do so only + * if all already assigned source can get a new register index, + * and all registers are either SSA or registers. + * TODO: check whether this last restriction is required */ + if (all_sel_can_change && + register_sel_can_change(src->pin()) && + (is_ssa == src->has_flag(Register::ssa))) { + new_sel = value_factory.new_register_index(); + new_chan[i] = u_bit_scan(&allowed_mask); + } else /* Sources can't be combined to a vec4 so bail out */ + return; + } + + new_src[i] = src; + used_chan_mask |= 1 << new_chan[i]; + if (!register_sel_can_change(src->pin())) + all_sel_can_change = false; } + /* Apply the changes to the vec4 source */ + value.del_use(instr); for (int i = 0; i < 4; ++i) { if (parents[i]) { - src.del_use(instr); new_src[i]->set_sel(new_sel); if (is_ssa) new_src[i]->set_flag(Register::ssa); new_src[i]->set_chan(new_chan[i]); - src.set_value(i, new_src[i]); + + value.set_value(i, new_src[i]); + if (new_src[i]->pin() != pin_fully) { if (new_src[i]->pin() == pin_chan) new_src[i]->set_pin(pin_chgr); else new_src[i]->set_pin(pin_group); } - src.add_use(instr); progress |= true; } } + value.add_use(instr); if (progress) - src.validate(); + value.validate(); } bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg) |