summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2023-02-16 18:38:13 +0100
committerMarge Bot <emma+marge@anholt.net>2024-04-26 12:55:13 +0000
commit06cf178edeb66fa99c6f8aa1854cc64eb24daf72 (patch)
tree8806aa5ce14e421e9a00452a3e8ce764a88c08a2
parent876c5396a7893d2a55a2d6635085d70c03c216d4 (diff)
ir3: Implement source restrictions for shared ALU
cat1-cat4 instructions executed on the shared ALU can use shared registers in an unlimited capacity, as opposed to the vector ALU which apparently treats shared registers and consts the same. However they cannot use "normal" sources (which must be "uniformized" via a mov). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
-rw-r--r--src/freedreno/ir3/ir3.c58
-rw-r--r--src/freedreno/ir3/ir3_opt_predicates.c18
2 files changed, 66 insertions, 10 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 713c7651854..81321ab2842 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -1066,9 +1066,6 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
struct ir3_compiler *compiler = instr->block->shader->compiler;
unsigned valid_flags;
- if ((flags & IR3_REG_SHARED) && opc_cat(instr->opc) > 3)
- return false;
-
flags = cp_flags(flags);
/* If destination is indirect, then source cannot be.. at least
@@ -1137,9 +1134,30 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
else
return flags == 0;
break;
- default:
+ default: {
valid_flags =
IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV | IR3_REG_SHARED;
+
+ /* floating-point conversions when moving from non-shared to shared
+ * seem not to work. We only use floating-point types in ir3 for
+ * conversions, so don't bother specially handling the case where the
+ * types are equal.
+ */
+ if ((instr->dsts[0]->flags & IR3_REG_SHARED) &&
+ !(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)) &&
+ (full_type(instr->cat1.src_type) == TYPE_F32 ||
+ full_type(instr->cat1.dst_type) == TYPE_F32))
+ return false;
+
+ /* Conversions seem not to work in shared->shared copies before scalar
+ * ALU is supported.
+ */
+ if (!compiler->has_scalar_alu &&
+ (flags & IR3_REG_SHARED) &&
+ (instr->dsts[0]->flags & IR3_REG_SHARED) &&
+ instr->cat1.src_type != instr->cat1.dst_type)
+ return false;
+ }
}
if (flags & ~valid_flags)
return false;
@@ -1156,6 +1174,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
n == 1 && flags == IR3_REG_IMMED)
return true;
+ /* cat2/cat3 scalar ALU instructions must not have regular sources. */
+ if (instr->dsts[0]->flags & IR3_REG_SHARED) {
+ if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)))
+ return false;
+ }
+
if (flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SHARED)) {
unsigned m = n ^ 1;
/* cannot deal w/ const or shared in both srcs:
@@ -1163,9 +1187,14 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
*/
if (m < instr->srcs_count) {
struct ir3_register *reg = instr->srcs[m];
- if ((flags & (IR3_REG_CONST | IR3_REG_SHARED)) &&
- (reg->flags & (IR3_REG_CONST | IR3_REG_SHARED)))
- return false;
+ if (instr->dsts[0]->flags & IR3_REG_SHARED) {
+ if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
+ return false;
+ } else {
+ if ((flags & (IR3_REG_CONST | IR3_REG_SHARED)) &&
+ (reg->flags & (IR3_REG_CONST | IR3_REG_SHARED)))
+ return false;
+ }
if ((flags & IR3_REG_IMMED) && reg->flags & (IR3_REG_IMMED))
return false;
}
@@ -1204,14 +1233,23 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
if (flags & ~valid_flags)
return false;
- if (flags & (IR3_REG_CONST | IR3_REG_SHARED | IR3_REG_RELATIV)) {
+ if (flags & (IR3_REG_CONST | IR3_REG_RELATIV) ||
+ (!(instr->dsts[0]->flags & IR3_REG_SHARED) &&
+ (flags & IR3_REG_SHARED))) {
/* cannot deal w/ const/shared/relativ in 2nd src: */
if (n == 1)
return false;
}
+ if (instr->dsts[0]->flags & IR3_REG_SHARED) {
+ if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)))
+ return false;
+ }
+
break;
case 4:
+ if ((instr->dsts[0]->flags & IR3_REG_SHARED) != (flags & IR3_REG_SHARED))
+ return false;
/* seems like blob compiler avoids const as src.. */
/* TODO double check if this is still the case on a4xx */
if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
@@ -1226,6 +1264,10 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
break;
case 6:
valid_flags = IR3_REG_IMMED;
+
+ if (instr->opc == OPC_STC && n == 1)
+ valid_flags |= IR3_REG_SHARED;
+
if (flags & ~valid_flags)
return false;
diff --git a/src/freedreno/ir3/ir3_opt_predicates.c b/src/freedreno/ir3/ir3_opt_predicates.c
index e68cf36e9ee..cf63ee4e98f 100644
--- a/src/freedreno/ir3/ir3_opt_predicates.c
+++ b/src/freedreno/ir3/ir3_opt_predicates.c
@@ -51,6 +51,19 @@ clone_with_predicate_dst(struct opt_predicates_ctx *ctx,
}
static bool
+is_shared_or_const(struct ir3_register *reg)
+{
+ return reg->flags & (IR3_REG_CONST | IR3_REG_SHARED);
+}
+
+static bool
+cat2_needs_scalar_alu(struct ir3_instruction *instr)
+{
+ return is_shared_or_const(instr->srcs[0]) &&
+ (instr->srcs_count == 1 || is_shared_or_const(instr->srcs[1]));
+}
+
+static bool
can_write_predicate(struct opt_predicates_ctx *ctx,
struct ir3_instruction *instr)
{
@@ -58,13 +71,14 @@ can_write_predicate(struct opt_predicates_ctx *ctx,
case OPC_CMPS_S:
case OPC_CMPS_U:
case OPC_CMPS_F:
- return true;
+ return !cat2_needs_scalar_alu(instr);
case OPC_AND_B:
case OPC_OR_B:
case OPC_NOT_B:
case OPC_XOR_B:
case OPC_GETBIT_B:
- return ctx->ir->compiler->bitops_can_write_predicates;
+ return ctx->ir->compiler->bitops_can_write_predicates &&
+ !cat2_needs_scalar_alu(instr);
default:
return false;
}