diff options
author | Connor Abbott <cwabbott0@gmail.com> | 2023-02-16 18:38:13 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-04-26 12:55:13 +0000 |
commit | 06cf178edeb66fa99c6f8aa1854cc64eb24daf72 (patch) | |
tree | 8806aa5ce14e421e9a00452a3e8ce764a88c08a2 | |
parent | 876c5396a7893d2a55a2d6635085d70c03c216d4 (diff) |
ir3: Implement source restrictions for shared ALU
cat1-cat4 instructions executed on the shared ALU can use shared
registers in an unlimited capacity, as opposed to the vector ALU which
apparently treats shared registers and consts the same. However they
cannot use "normal" sources (which must be "uniformized" via a mov).
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
-rw-r--r-- | src/freedreno/ir3/ir3.c | 58 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_opt_predicates.c | 18 |
2 files changed, 66 insertions, 10 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 713c7651854..81321ab2842 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1066,9 +1066,6 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) struct ir3_compiler *compiler = instr->block->shader->compiler; unsigned valid_flags; - if ((flags & IR3_REG_SHARED) && opc_cat(instr->opc) > 3) - return false; - flags = cp_flags(flags); /* If destination is indirect, then source cannot be.. at least @@ -1137,9 +1134,30 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) else return flags == 0; break; - default: + default: { valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV | IR3_REG_SHARED; + + /* floating-point conversions when moving from non-shared to shared + * seem not to work. We only use floating-point types in ir3 for + * conversions, so don't bother specially handling the case where the + * types are equal. + */ + if ((instr->dsts[0]->flags & IR3_REG_SHARED) && + !(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)) && + (full_type(instr->cat1.src_type) == TYPE_F32 || + full_type(instr->cat1.dst_type) == TYPE_F32)) + return false; + + /* Conversions seem not to work in shared->shared copies before scalar + * ALU is supported. + */ + if (!compiler->has_scalar_alu && + (flags & IR3_REG_SHARED) && + (instr->dsts[0]->flags & IR3_REG_SHARED) && + instr->cat1.src_type != instr->cat1.dst_type) + return false; + } } if (flags & ~valid_flags) return false; @@ -1156,6 +1174,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) n == 1 && flags == IR3_REG_IMMED) return true; + /* cat2/cat3 scalar ALU instructions must not have regular sources. */ + if (instr->dsts[0]->flags & IR3_REG_SHARED) { + if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST))) + return false; + } + if (flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SHARED)) { unsigned m = n ^ 1; /* cannot deal w/ const or shared in both srcs: @@ -1163,9 +1187,14 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) */ if (m < instr->srcs_count) { struct ir3_register *reg = instr->srcs[m]; - if ((flags & (IR3_REG_CONST | IR3_REG_SHARED)) && - (reg->flags & (IR3_REG_CONST | IR3_REG_SHARED))) - return false; + if (instr->dsts[0]->flags & IR3_REG_SHARED) { + if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST)) + return false; + } else { + if ((flags & (IR3_REG_CONST | IR3_REG_SHARED)) && + (reg->flags & (IR3_REG_CONST | IR3_REG_SHARED))) + return false; + } if ((flags & IR3_REG_IMMED) && reg->flags & (IR3_REG_IMMED)) return false; } @@ -1204,14 +1233,23 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) if (flags & ~valid_flags) return false; - if (flags & (IR3_REG_CONST | IR3_REG_SHARED | IR3_REG_RELATIV)) { + if (flags & (IR3_REG_CONST | IR3_REG_RELATIV) || + (!(instr->dsts[0]->flags & IR3_REG_SHARED) && + (flags & IR3_REG_SHARED))) { /* cannot deal w/ const/shared/relativ in 2nd src: */ if (n == 1) return false; } + if (instr->dsts[0]->flags & IR3_REG_SHARED) { + if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST))) + return false; + } + break; case 4: + if ((instr->dsts[0]->flags & IR3_REG_SHARED) != (flags & IR3_REG_SHARED)) + return false; /* seems like blob compiler avoids const as src.. */ /* TODO double check if this is still the case on a4xx */ if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) @@ -1226,6 +1264,10 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) break; case 6: valid_flags = IR3_REG_IMMED; + + if (instr->opc == OPC_STC && n == 1) + valid_flags |= IR3_REG_SHARED; + if (flags & ~valid_flags) return false; diff --git a/src/freedreno/ir3/ir3_opt_predicates.c b/src/freedreno/ir3/ir3_opt_predicates.c index e68cf36e9ee..cf63ee4e98f 100644 --- a/src/freedreno/ir3/ir3_opt_predicates.c +++ b/src/freedreno/ir3/ir3_opt_predicates.c @@ -51,6 +51,19 @@ clone_with_predicate_dst(struct opt_predicates_ctx *ctx, } static bool +is_shared_or_const(struct ir3_register *reg) +{ + return reg->flags & (IR3_REG_CONST | IR3_REG_SHARED); +} + +static bool +cat2_needs_scalar_alu(struct ir3_instruction *instr) +{ + return is_shared_or_const(instr->srcs[0]) && + (instr->srcs_count == 1 || is_shared_or_const(instr->srcs[1])); +} + +static bool can_write_predicate(struct opt_predicates_ctx *ctx, struct ir3_instruction *instr) { @@ -58,13 +71,14 @@ can_write_predicate(struct opt_predicates_ctx *ctx, case OPC_CMPS_S: case OPC_CMPS_U: case OPC_CMPS_F: - return true; + return !cat2_needs_scalar_alu(instr); case OPC_AND_B: case OPC_OR_B: case OPC_NOT_B: case OPC_XOR_B: case OPC_GETBIT_B: - return ctx->ir->compiler->bitops_can_write_predicates; + return ctx->ir->compiler->bitops_can_write_predicates && + !cat2_needs_scalar_alu(instr); default: return false; } |