diff options
author | Connor Abbott <cwabbott0@gmail.com> | 2023-03-02 15:09:39 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-04-26 12:55:14 +0000 |
commit | 3bec9e684d02642fdebdebd6f5c509f7eecea2b4 (patch) | |
tree | 13a0775a5e4d510ab7c94a73c370d93eec0a20b5 | |
parent | 4828942d0c3353ced285c9aebed1109bd696db77 (diff) |
ir3: Rewrite shared reg handling when translating from NIR
In the future we will have many ALU instructions passing shared
registers to each other, and surrounding them each with moves to/from
shared registers will severely bloat the IR size coming out of NIR and
make more pointless work for copy propagation. Instead, do something
more like the ACO approach and allow values stored in the hash table to
be shared, and move the burden of emitting a mov to non-shared to
ir3_get_src(). We will then use ir3_get_src_shared() or
ir3_get_src_maybe_shared() as appropriate in cases where we can handle
shared registers or where we can handle both.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 21 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_context.c | 56 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_context.h | 13 |
3 files changed, 72 insertions, 18 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index b27253afd05..7478ec14d00 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2644,6 +2644,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) src, 0); dst[0]->dsts[0]->flags |= IR3_REG_SHARED; dst[0]->srcs[0]->flags |= IR3_REG_PREDICATE; + /* Work around a bug with half-register shared -> non-shared moves by + * adding an extra mov here so that the original destination stays full. + */ + if (src->dsts[0]->flags & IR3_REG_HALF) { + dst[0] = ir3_MOV(b, dst[0], TYPE_U32); + if (!ctx->compiler->has_scalar_alu) + dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED; + } break; } @@ -2651,6 +2659,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0]; dst[0] = ir3_READ_FIRST_MACRO(ctx->block, src, 0); dst[0]->dsts[0]->flags |= IR3_REG_SHARED; + /* See above. */ + if (src->dsts[0]->flags & IR3_REG_HALF) { + dst[0] = ir3_MOV(b, dst[0], TYPE_U32); + if (!ctx->compiler->has_scalar_alu) + dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED; + } break; } @@ -3586,7 +3600,12 @@ read_phi_src(struct ir3_context *ctx, struct ir3_block *blk, /* Create an ir3 undef */ return NULL; } else { - return ir3_get_src(ctx, &nsrc->src)[0]; + /* We need to insert the move at the end of the block */ + struct ir3_block *old_block = ctx->block; + ctx->block = blk; + struct ir3_instruction *src = ir3_get_src(ctx, &nsrc->src)[0]; + ctx->block = old_block; + return src; } } } diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 423bb1fbaa0..65db920e429 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -227,7 +227,7 @@ ir3_get_def(struct ir3_context *ctx, nir_def *def, unsigned n) } struct ir3_instruction *const * -ir3_get_src(struct ir3_context *ctx, nir_src *src) +ir3_get_src_maybe_shared(struct ir3_context *ctx, nir_src *src) { struct hash_entry *entry; entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); @@ -235,24 +235,49 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src) return entry->data; } -void -ir3_put_def(struct ir3_context *ctx, nir_def *def) +static struct ir3_instruction * +get_shared(struct ir3_block *block, struct ir3_instruction *src, bool shared) { - unsigned bit_size = ir3_bitsize(ctx, def->bit_size); + if (!!(src->dsts[0]->flags & IR3_REG_SHARED) != shared) { + struct ir3_instruction *mov = + ir3_MOV(block, src, (src->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32); + mov->dsts[0]->flags &= ~IR3_REG_SHARED; + mov->dsts[0]->flags |= COND(shared, IR3_REG_SHARED); + return mov; + } - /* add extra mov if dst value is shared reg.. in some cases not all - * instructions can read from shared regs, in cases where they can - * ir3_cp will clean up the extra mov: - */ - for (unsigned i = 0; i < ctx->last_dst_n; i++) { - if (!ctx->last_dst[i]) - continue; - if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) { - ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32); - ctx->last_dst[i]->dsts[0]->flags &= ~IR3_REG_SHARED; + return src; +} + +struct ir3_instruction *const * +ir3_get_src_shared(struct ir3_context *ctx, nir_src *src, bool shared) +{ + unsigned num_components = nir_src_num_components(*src); + struct ir3_instruction *const *value = ir3_get_src_maybe_shared(ctx, src); + bool mismatch = false; + for (unsigned i = 0; i < nir_src_num_components(*src); i++) { + if (!!(value[i]->dsts[0]->flags & IR3_REG_SHARED) != shared) { + mismatch = true; + break; } } + if (!mismatch) + return value; + + struct ir3_instruction **new_value = + ralloc_array(ctx, struct ir3_instruction *, num_components); + for (unsigned i = 0; i < num_components; i++) + new_value[i] = get_shared(ctx->block, value[i], shared); + + return new_value; +} + +void +ir3_put_def(struct ir3_context *ctx, nir_def *def) +{ + unsigned bit_size = ir3_bitsize(ctx, def->bit_size); + if (bit_size <= 16) { for (unsigned i = 0; i < ctx->last_dst_n; i++) { struct ir3_instruction *dst = ctx->last_dst[i]; @@ -627,7 +652,8 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, dst->array.id = arr->id; dst->array.offset = n; dst->array.base = INVALID_REG; - ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags)->def = src->dsts[0]; + ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags | + (src->dsts[0]->flags & IR3_REG_SHARED))->def = src->dsts[0]; if (arr->last_write && arr->last_write->instr->block == block) ir3_reg_set_last_array(mov, dst, arr->last_write); diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index c0274deeb92..fbecb2c95e5 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -201,8 +201,17 @@ struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx, nir_def *dst, unsigned n); struct ir3_instruction **ir3_get_def(struct ir3_context *ctx, nir_def *def, unsigned n); -struct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx, - nir_src *src); +struct ir3_instruction *const *ir3_get_src_maybe_shared(struct ir3_context *ctx, + nir_src *src); +struct ir3_instruction *const *ir3_get_src_shared(struct ir3_context *ctx, + nir_src *src, bool shared); + +static inline struct ir3_instruction *const * +ir3_get_src(struct ir3_context *ctx, nir_src *src) +{ + return ir3_get_src_shared(ctx, src, false); +} + void ir3_put_def(struct ir3_context *ctx, nir_def *def); struct ir3_instruction *ir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr, |