summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2023-03-02 15:09:39 +0100
committerMarge Bot <emma+marge@anholt.net>2024-04-26 12:55:14 +0000
commit3bec9e684d02642fdebdebd6f5c509f7eecea2b4 (patch)
tree13a0775a5e4d510ab7c94a73c370d93eec0a20b5
parent4828942d0c3353ced285c9aebed1109bd696db77 (diff)
ir3: Rewrite shared reg handling when translating from NIR
In the future we will have many ALU instructions passing shared registers to each other, and surrounding them each with moves to/from shared registers will severely bloat the IR size coming out of NIR and make more pointless work for copy propagation. Instead, do something more like the ACO approach and allow values stored in the hash table to be shared, and move the burden of emitting a mov to non-shared to ir3_get_src(). We will then use ir3_get_src_shared() or ir3_get_src_maybe_shared() as appropriate in cases where we can handle shared registers or where we can handle both. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c21
-rw-r--r--src/freedreno/ir3/ir3_context.c56
-rw-r--r--src/freedreno/ir3/ir3_context.h13
3 files changed, 72 insertions, 18 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index b27253afd05..7478ec14d00 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2644,6 +2644,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
src, 0);
dst[0]->dsts[0]->flags |= IR3_REG_SHARED;
dst[0]->srcs[0]->flags |= IR3_REG_PREDICATE;
+ /* Work around a bug with half-register shared -> non-shared moves by
+ * adding an extra mov here so that the original destination stays full.
+ */
+ if (src->dsts[0]->flags & IR3_REG_HALF) {
+ dst[0] = ir3_MOV(b, dst[0], TYPE_U32);
+ if (!ctx->compiler->has_scalar_alu)
+ dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED;
+ }
break;
}
@@ -2651,6 +2659,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
dst[0] = ir3_READ_FIRST_MACRO(ctx->block, src, 0);
dst[0]->dsts[0]->flags |= IR3_REG_SHARED;
+ /* See above. */
+ if (src->dsts[0]->flags & IR3_REG_HALF) {
+ dst[0] = ir3_MOV(b, dst[0], TYPE_U32);
+ if (!ctx->compiler->has_scalar_alu)
+ dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED;
+ }
break;
}
@@ -3586,7 +3600,12 @@ read_phi_src(struct ir3_context *ctx, struct ir3_block *blk,
/* Create an ir3 undef */
return NULL;
} else {
- return ir3_get_src(ctx, &nsrc->src)[0];
+ /* We need to insert the move at the end of the block */
+ struct ir3_block *old_block = ctx->block;
+ ctx->block = blk;
+ struct ir3_instruction *src = ir3_get_src(ctx, &nsrc->src)[0];
+ ctx->block = old_block;
+ return src;
}
}
}
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 423bb1fbaa0..65db920e429 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -227,7 +227,7 @@ ir3_get_def(struct ir3_context *ctx, nir_def *def, unsigned n)
}
struct ir3_instruction *const *
-ir3_get_src(struct ir3_context *ctx, nir_src *src)
+ir3_get_src_maybe_shared(struct ir3_context *ctx, nir_src *src)
{
struct hash_entry *entry;
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
@@ -235,24 +235,49 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src)
return entry->data;
}
-void
-ir3_put_def(struct ir3_context *ctx, nir_def *def)
+static struct ir3_instruction *
+get_shared(struct ir3_block *block, struct ir3_instruction *src, bool shared)
{
- unsigned bit_size = ir3_bitsize(ctx, def->bit_size);
+ if (!!(src->dsts[0]->flags & IR3_REG_SHARED) != shared) {
+ struct ir3_instruction *mov =
+ ir3_MOV(block, src, (src->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
+ mov->dsts[0]->flags &= ~IR3_REG_SHARED;
+ mov->dsts[0]->flags |= COND(shared, IR3_REG_SHARED);
+ return mov;
+ }
- /* add extra mov if dst value is shared reg.. in some cases not all
- * instructions can read from shared regs, in cases where they can
- * ir3_cp will clean up the extra mov:
- */
- for (unsigned i = 0; i < ctx->last_dst_n; i++) {
- if (!ctx->last_dst[i])
- continue;
- if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
- ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
- ctx->last_dst[i]->dsts[0]->flags &= ~IR3_REG_SHARED;
+ return src;
+}
+
+struct ir3_instruction *const *
+ir3_get_src_shared(struct ir3_context *ctx, nir_src *src, bool shared)
+{
+ unsigned num_components = nir_src_num_components(*src);
+ struct ir3_instruction *const *value = ir3_get_src_maybe_shared(ctx, src);
+ bool mismatch = false;
+ for (unsigned i = 0; i < nir_src_num_components(*src); i++) {
+ if (!!(value[i]->dsts[0]->flags & IR3_REG_SHARED) != shared) {
+ mismatch = true;
+ break;
}
}
+ if (!mismatch)
+ return value;
+
+ struct ir3_instruction **new_value =
+ ralloc_array(ctx, struct ir3_instruction *, num_components);
+ for (unsigned i = 0; i < num_components; i++)
+ new_value[i] = get_shared(ctx->block, value[i], shared);
+
+ return new_value;
+}
+
+void
+ir3_put_def(struct ir3_context *ctx, nir_def *def)
+{
+ unsigned bit_size = ir3_bitsize(ctx, def->bit_size);
+
if (bit_size <= 16) {
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
struct ir3_instruction *dst = ctx->last_dst[i];
@@ -627,7 +652,8 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
dst->array.id = arr->id;
dst->array.offset = n;
dst->array.base = INVALID_REG;
- ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags)->def = src->dsts[0];
+ ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags |
+ (src->dsts[0]->flags & IR3_REG_SHARED))->def = src->dsts[0];
if (arr->last_write && arr->last_write->instr->block == block)
ir3_reg_set_last_array(mov, dst, arr->last_write);
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index c0274deeb92..fbecb2c95e5 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -201,8 +201,17 @@ struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,
nir_def *dst, unsigned n);
struct ir3_instruction **ir3_get_def(struct ir3_context *ctx, nir_def *def,
unsigned n);
-struct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx,
- nir_src *src);
+struct ir3_instruction *const *ir3_get_src_maybe_shared(struct ir3_context *ctx,
+ nir_src *src);
+struct ir3_instruction *const *ir3_get_src_shared(struct ir3_context *ctx,
+ nir_src *src, bool shared);
+
+static inline struct ir3_instruction *const *
+ir3_get_src(struct ir3_context *ctx, nir_src *src)
+{
+ return ir3_get_src_shared(ctx, src, false);
+}
+
void ir3_put_def(struct ir3_context *ctx, nir_def *def);
struct ir3_instruction *ir3_create_collect(struct ir3_block *block,
struct ir3_instruction *const *arr,