summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGert Wollny <gert.wollny@collabora.com>2022-12-07 18:16:57 +0100
committerMarge Bot <emma+marge@anholt.net>2022-12-09 08:26:31 +0000
commit18a8d148d862d4a0f5b83f4385f00cf8a4a71b6d (patch)
treefa4bb0ac102d0138ae82da1eb5e07ce74c893a78
parent38da65c44559a89b91eeed76c5f594bbe9a99760 (diff)
r600/sfn: Cleanup copy-prop into vec4 source values
Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20205>
-rw-r--r--src/gallium/drivers/r600/sfn/sfn_optimizer.cpp152
1 files changed, 96 insertions, 56 deletions
diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
index 6bf5aff84a1..31b43069877 100644
--- a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp
@@ -438,103 +438,143 @@ static bool register_chan_is_pinned(Pin pin)
void
-CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr)
+CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr)
{
+ /* Collect parent instructions - only ALU move without modifiers
+ * and without indirect access are allowed. */
AluInstr *parents[4] = {nullptr};
+ bool have_candidates = false;
for (int i = 0; i < 4; ++i) {
- if (src[i]->chan() < 4 && src[i]->has_flag(Register::ssa)) {
+ if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) {
/* We have a pre-define value, so we can't propagate a copy */
- if (src[i]->parents().empty())
+ if (value[i]->parents().empty())
return;
- assert(src[i]->parents().size() == 1);
- parents[i] = (*src[i]->parents().begin())->as_alu();
+ assert(value[i]->parents().size() == 1);
+ parents[i] = (*value[i]->parents().begin())->as_alu();
+
+ /* Parent op is not an ALU instruction, so we can't
+ copy-propagate */
+ if (!parents[i])
+ return;
+
+ if ((parents[i]->opcode() != op1_mov) ||
+ parents[i]->has_alu_flag(alu_src0_neg) ||
+ parents[i]->has_alu_flag(alu_src0_abs) ||
+ parents[i]->has_alu_flag(alu_dst_clamp) ||
+ parents[i]->has_alu_flag(alu_src0_rel) ||
+ std::get<0>(parents[i]->indirect_addr()))
+ return;
+ have_candidates = true;
}
}
+
+ if (!have_candidates)
+ return;
+
+ /* Collect the new source registers. We may have to move all registers
+ * to a new virtual sel index. */
+
PRegister new_src[4] = {0};
+ int new_chan[4] = {0,0,0,0};
- uint8_t mask = 0;
+ uint8_t used_chan_mask = 0;
int new_sel = -1;
bool all_sel_can_change = true;
bool is_ssa = true;
- int new_chan[4] = {0,0,0,0};
for (int i = 0; i < 4; ++i) {
- unsigned allowed_mask = 0xf & ~mask;
+
+ /* No parent means we either ignore the channel or insert 0 or 1.*/
if (!parents[i])
continue;
- if ((parents[i]->opcode() != op1_mov) || parents[i]->has_alu_flag(alu_src0_neg) ||
- parents[i]->has_alu_flag(alu_src0_abs) ||
- parents[i]->has_alu_flag(alu_dst_clamp) ||
- parents[i]->has_alu_flag(alu_src0_rel) ||
- std::get<0>(parents[i]->indirect_addr())) {
+
+ unsigned allowed_mask = 0xf & ~used_chan_mask;
+
+ auto src = parents[i]->src(0).as_register();
+ if (!src)
return;
- } else {
- auto src = parents[i]->src(0).as_register();
- if (!src)
- return;
- if (src->pin() == pin_array)
- return;
- if (!src->has_flag(Register::ssa) &&
- !assigned_register_direct(src)) {
- return;
- }
- if (register_chan_is_pinned(src->pin())) {
- allowed_mask = 1 << src->chan();
- }
- new_chan[i] = src->chan();
- for (auto p : src->parents()) {
- auto alu = p->as_alu();
- if (alu)
- allowed_mask &= alu->allowed_dest_chan_mask();
- }
- if (!allowed_mask) {
- return;
- }
+ /* Don't accept an array element for now, we would need extra checking
+ * that the value is not overwritten by an indirect access */
+ if (src->pin() == pin_array)
+ return;
- if (new_sel < 0) {
- new_sel = src->sel();
- is_ssa = src->has_flag(Register::ssa);
- new_chan[i] = src->chan();
- } else if (new_sel != src->sel()) {
- if (all_sel_can_change &&
- register_sel_can_change(src->pin()) &&
- (is_ssa == src->has_flag(Register::ssa))) {
- new_chan[i] = u_bit_scan(&allowed_mask);
- new_sel = value_factory.new_register_index();
- } else
- return;
- }
+ /* Is this check still needed ? */
+ if (!src->has_flag(Register::ssa) &&
+ !assigned_register_direct(src)) {
+ return;
+ }
- new_src[i] = src;
- mask |= 1 << new_chan[i];
- if (!register_sel_can_change(src->pin()))
- all_sel_can_change = false;
+ /* If the channel chan't switch we have to update the channel mask
+ * TODO: assign channel pinned registers first might give more
+ * opportunities for this optimization */
+ if (register_chan_is_pinned(src->pin()))
+ allowed_mask = 1 << src->chan();
+
+ /* Update the possible channel mask based on the sourcee's parent
+ * instruction(s) */
+ for (auto p : src->parents()) {
+ auto alu = p->as_alu();
+ if (alu)
+ allowed_mask &= alu->allowed_dest_chan_mask();
}
+
+ if (!allowed_mask)
+ return;
+
+ /* Prefer keeping the channel, but if that's not possible
+ * i.e. if the sel has to change, then pick the next free channel
+ * (see below) */
+ new_chan[i] = src->chan();
+
+ if (new_sel < 0) {
+ new_sel = src->sel();
+ is_ssa = src->has_flag(Register::ssa);
+ } else if (new_sel != src->sel()) {
+ /* If we have to assign a new register sel index do so only
+ * if all already assigned source can get a new register index,
+ * and all registers are either SSA or registers.
+ * TODO: check whether this last restriction is required */
+ if (all_sel_can_change &&
+ register_sel_can_change(src->pin()) &&
+ (is_ssa == src->has_flag(Register::ssa))) {
+ new_sel = value_factory.new_register_index();
+ new_chan[i] = u_bit_scan(&allowed_mask);
+ } else /* Sources can't be combined to a vec4 so bail out */
+ return;
+ }
+
+ new_src[i] = src;
+ used_chan_mask |= 1 << new_chan[i];
+ if (!register_sel_can_change(src->pin()))
+ all_sel_can_change = false;
}
+ /* Apply the changes to the vec4 source */
+ value.del_use(instr);
for (int i = 0; i < 4; ++i) {
if (parents[i]) {
- src.del_use(instr);
new_src[i]->set_sel(new_sel);
if (is_ssa)
new_src[i]->set_flag(Register::ssa);
new_src[i]->set_chan(new_chan[i]);
- src.set_value(i, new_src[i]);
+
+ value.set_value(i, new_src[i]);
+
if (new_src[i]->pin() != pin_fully) {
if (new_src[i]->pin() == pin_chan)
new_src[i]->set_pin(pin_chgr);
else
new_src[i]->set_pin(pin_group);
}
- src.add_use(instr);
progress |= true;
}
}
+ value.add_use(instr);
if (progress)
- src.validate();
+ value.validate();
}
bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg)