summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2023-02-24 14:11:49 +0100
committerMarge Bot <emma+marge@anholt.net>2024-04-26 12:55:13 +0000
commitfec5b9397f43ec350b40a1c3a6c013213559bb4a (patch)
treecacddb92f9561377ff8edbaad1a4a0fcdfcbe345
parent100096394f3db44bd41863e30dfec8e74962832b (diff)
ir3/lower_copies: Handle HW bug with shared half-floats
In the past we avoided emitting pure 16-bit subgroup macros because of this bug, but because we're going to start emitting the special moves they expand to directly, we also have to handle the bug directly. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
-rw-r--r--src/freedreno/ir3/ir3_lower_parallelcopy.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/src/freedreno/ir3/ir3_lower_parallelcopy.c b/src/freedreno/ir3/ir3_lower_parallelcopy.c
index d380174b128..f84be2a5b57 100644
--- a/src/freedreno/ir3/ir3_lower_parallelcopy.c
+++ b/src/freedreno/ir3/ir3_lower_parallelcopy.c
@@ -572,6 +572,63 @@ ir3_lower_copies(struct ir3_shader_variant *v)
list_del(&instr->node);
} else if (instr->opc == OPC_META_PHI) {
list_del(&instr->node);
+ } else if (instr->opc == OPC_MOV) {
+ /* There seems to be a HW bug where moves where the source is 16-bit
+ * non-shared and the destination is 16-bit shared don't work when
+ * only fibers 64-127 are active. We work around it by instead
+ * generating a narrowing mov, which only works with even-numbered
+ * registers (i.e. .x and .z), but for odd numbers we can swap the
+ * components of the normal src and its even neighbor and then
+ * unswap afterwords to make it work for everything.
+ */
+ if ((instr->dsts[0]->flags & IR3_REG_SHARED) &&
+ (instr->dsts[0]->flags & IR3_REG_HALF) &&
+ !(instr->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_IMMED |
+ IR3_REG_CONST)) &&
+ (instr->srcs[0]->flags & IR3_REG_HALF)) {
+ unsigned src_num = instr->srcs[0]->num;
+ unsigned dst_num = instr->dsts[0]->num;
+
+ for (unsigned i = 0; i <= instr->repeat; i++,
+ src_num++, dst_num++) {
+ if (src_num & 1) {
+ for (unsigned i = 0; i < 2; i++) {
+ struct ir3_instruction *swz = ir3_instr_create(instr->block, OPC_SWZ, 2, 2);
+ ir3_dst_create(swz, src_num - 1, IR3_REG_HALF);
+ ir3_dst_create(swz, src_num, IR3_REG_HALF);
+ ir3_src_create(swz, src_num, IR3_REG_HALF);
+ ir3_src_create(swz, src_num - 1, IR3_REG_HALF);
+ swz->cat1.dst_type = TYPE_U16;
+ swz->cat1.src_type = TYPE_U16;
+ swz->repeat = 1;
+ if (i == 0)
+ ir3_instr_move_before(swz, instr);
+ else
+ ir3_instr_move_after(swz, instr);
+ }
+ }
+
+ struct ir3_instruction *mov =
+ ir3_instr_create(instr->block, OPC_MOV, 1, 1);
+
+ ir3_dst_create(mov, dst_num, instr->dsts[0]->flags);
+ ir3_src_create(mov, src_num / 2,
+ instr->srcs[0]->flags & ~IR3_REG_HALF);
+
+ /* Float conversions are banned in this case in
+ * ir3_valid_flags(), so we only have to worry about normal
+ * non-converting moves.
+ */
+ assert(instr->cat1.src_type == TYPE_U16 ||
+ instr->cat1.src_type == TYPE_S16);
+ mov->cat1.src_type = TYPE_U32;
+ mov->cat1.dst_type = TYPE_U16;
+
+ ir3_instr_move_before(mov, instr);
+ }
+
+ list_del(&instr->node);
+ }
}
}
}