summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2023-03-02 13:17:02 +0100
committerMarge Bot <emma+marge@anholt.net>2024-04-26 12:55:13 +0000
commit4c4234501fb6281ff0a4c235b0ba7cb0fda4325c (patch)
tree1f3184b0122881fbc19cf5ec1182db85cc5d69b1
parent823e034db28392362868a441cfc4833275676d89 (diff)
ir3: Support scalar ALU in the builder
Propagate shared-ness to the the source, and when creating an ALU instruction with all scalar sources and the instruction is supported on the scalar ALU, automatically make the destination scalar. This includes MOV/COV. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
-rw-r--r--src/freedreno/ir3/ir3.h170
-rw-r--r--src/freedreno/ir3/ir3_context.c1
2 files changed, 91 insertions, 80 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 2879a8f4e7e..f5a303e3773 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -2130,8 +2130,7 @@ __ssa_src(struct ir3_instruction *instr, struct ir3_instruction *src,
unsigned flags)
{
struct ir3_register *reg;
- if (src->dsts[0]->flags & IR3_REG_HALF)
- flags |= IR3_REG_HALF;
+ flags |= src->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED);
reg = ir3_src_create(instr, INVALID_REG, IR3_REG_SSA | flags);
reg->def = src->dsts[0];
reg->wrmask = src->dsts[0]->wrmask;
@@ -2229,14 +2228,14 @@ static inline struct ir3_instruction *
ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
{
struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
- ir3_register_flags flags = type_flags(type);
+ ir3_register_flags flags = type_flags(type) | (src->dsts[0]->flags & IR3_REG_SHARED);
__ssa_dst(instr)->flags |= flags;
if (src->dsts[0]->flags & IR3_REG_ARRAY) {
struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY);
src_reg->array = src->dsts[0]->array;
} else {
- __ssa_src(instr, src, src->dsts[0]->flags & IR3_REG_SHARED);
+ __ssa_src(instr, src, 0);
}
assert(!(src->dsts[0]->flags & IR3_REG_RELATIV));
instr->cat1.src_type = type;
@@ -2249,7 +2248,7 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type,
type_t dst_type)
{
struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
- ir3_register_flags dst_flags = type_flags(dst_type);
+ ir3_register_flags dst_flags = type_flags(dst_type) | (src->dsts[0]->flags & IR3_REG_SHARED);
ASSERTED ir3_register_flags src_flags = type_flags(src_type);
assert((src->dsts[0]->flags & IR3_REG_HALF) == src_flags);
@@ -2309,44 +2308,51 @@ static inline struct ir3_instruction *ir3_##name(struct ir3_block *block) \
#define INSTR0(name) __INSTR0((ir3_instruction_flags)0, name, OPC_##name)
/* clang-format off */
-#define __INSTR1(flag, dst_count, name, opc) \
+#define __INSTR1(flag, dst_count, name, opc, scalar_alu) \
static inline struct ir3_instruction *ir3_##name( \
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags) \
{ \
struct ir3_instruction *instr = \
ir3_instr_create(block, opc, dst_count, 1); \
+ unsigned dst_flag = scalar_alu ? (a->dsts[0]->flags & IR3_REG_SHARED) : 0; \
for (unsigned i = 0; i < dst_count; i++) \
- __ssa_dst(instr); \
+ __ssa_dst(instr)->flags |= dst_flag; \
__ssa_src(instr, a, aflags); \
instr->flags |= flag; \
return instr; \
}
/* clang-format on */
-#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
-#define INSTR1(name) __INSTR1((ir3_instruction_flags)0, 1, name, OPC_##name)
-#define INSTR1NODST(name) __INSTR1((ir3_instruction_flags)0, 0, name, OPC_##name)
+#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name, \
+ false)
+#define INSTR1(name) __INSTR1((ir3_instruction_flags)0, 1, name, OPC_##name, false)
+#define INSTR1S(name) __INSTR1((ir3_instruction_flags)0, 1, name, OPC_##name, true)
+#define INSTR1NODST(name) __INSTR1((ir3_instruction_flags)0, 0, name, OPC_##name, false)
/* clang-format off */
-#define __INSTR2(flag, dst_count, name, opc) \
+#define __INSTR2(flag, dst_count, name, opc, scalar_alu) \
static inline struct ir3_instruction *ir3_##name( \
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
struct ir3_instruction *b, unsigned bflags) \
{ \
struct ir3_instruction *instr = ir3_instr_create(block, opc, dst_count, 2); \
+ unsigned dst_flag = scalar_alu ? (a->dsts[0]->flags & b->dsts[0]->flags & \
+ IR3_REG_SHARED) : 0; \
for (unsigned i = 0; i < dst_count; i++) \
- __ssa_dst(instr); \
+ __ssa_dst(instr)->flags |= dst_flag; \
__ssa_src(instr, a, aflags); \
__ssa_src(instr, b, bflags); \
instr->flags |= flag; \
return instr; \
}
/* clang-format on */
-#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
-#define INSTR2(name) __INSTR2((ir3_instruction_flags)0, 1, name, OPC_##name)
-#define INSTR2NODST(name) __INSTR2((ir3_instruction_flags)0, 0, name, OPC_##name)
+#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, 1, name##_##f, OPC_##name, \
+ false)
+#define INSTR2(name) __INSTR2((ir3_instruction_flags)0, 1, name, OPC_##name, false)
+#define INSTR2S(name) __INSTR2((ir3_instruction_flags)0, 1, name, OPC_##name, true)
+#define INSTR2NODST(name) __INSTR2((ir3_instruction_flags)0, 0, name, OPC_##name, false)
/* clang-format off */
-#define __INSTR3(flag, dst_count, name, opc) \
+#define __INSTR3(flag, dst_count, name, opc, scalar_alu) \
static inline struct ir3_instruction *ir3_##name( \
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
@@ -2354,8 +2360,10 @@ static inline struct ir3_instruction *ir3_##name( \
{ \
struct ir3_instruction *instr = \
ir3_instr_create(block, opc, dst_count, 3); \
+ unsigned dst_flag = scalar_alu ? (a->dsts[0]->flags & b->dsts[0]->flags & \
+ c->dsts[0]->flags & IR3_REG_SHARED) : 0; \
for (unsigned i = 0; i < dst_count; i++) \
- __ssa_dst(instr); \
+ __ssa_dst(instr)->flags |= dst_flag; \
__ssa_src(instr, a, aflags); \
__ssa_src(instr, b, bflags); \
__ssa_src(instr, c, cflags); \
@@ -2363,9 +2371,11 @@ static inline struct ir3_instruction *ir3_##name( \
return instr; \
}
/* clang-format on */
-#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
-#define INSTR3(name) __INSTR3((ir3_instruction_flags)0, 1, name, OPC_##name)
-#define INSTR3NODST(name) __INSTR3((ir3_instruction_flags)0, 0, name, OPC_##name)
+#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name, \
+ false)
+#define INSTR3(name) __INSTR3((ir3_instruction_flags)0, 1, name, OPC_##name, false)
+#define INSTR3S(name) __INSTR3((ir3_instruction_flags)0, 1, name, OPC_##name, true)
+#define INSTR3NODST(name) __INSTR3((ir3_instruction_flags)0, 0, name, OPC_##name, false)
/* clang-format off */
#define __INSTR4(flag, dst_count, name, opc) \
@@ -2483,53 +2493,53 @@ ir3_SHPS_MACRO(struct ir3_block *block)
}
/* cat2 instructions, most 2 src but some 1 src: */
-INSTR2(ADD_F)
-INSTR2(MIN_F)
-INSTR2(MAX_F)
-INSTR2(MUL_F)
-INSTR1(SIGN_F)
-INSTR2(CMPS_F)
-INSTR1(ABSNEG_F)
-INSTR2(CMPV_F)
-INSTR1(FLOOR_F)
-INSTR1(CEIL_F)
-INSTR1(RNDNE_F)
-INSTR1(RNDAZ_F)
-INSTR1(TRUNC_F)
-INSTR2(ADD_U)
-INSTR2(ADD_S)
-INSTR2(SUB_U)
-INSTR2(SUB_S)
-INSTR2(CMPS_U)
-INSTR2(CMPS_S)
-INSTR2(MIN_U)
-INSTR2(MIN_S)
-INSTR2(MAX_U)
-INSTR2(MAX_S)
-INSTR1(ABSNEG_S)
-INSTR2(AND_B)
-INSTR2(OR_B)
-INSTR1(NOT_B)
-INSTR2(XOR_B)
-INSTR2(CMPV_U)
-INSTR2(CMPV_S)
-INSTR2(MUL_U24)
-INSTR2(MUL_S24)
-INSTR2(MULL_U)
-INSTR1(BFREV_B)
-INSTR1(CLZ_S)
-INSTR1(CLZ_B)
-INSTR2(SHL_B)
-INSTR2(SHR_B)
-INSTR2(ASHR_B)
+INSTR2S(ADD_F)
+INSTR2S(MIN_F)
+INSTR2S(MAX_F)
+INSTR2S(MUL_F)
+INSTR1S(SIGN_F)
+INSTR2S(CMPS_F)
+INSTR1S(ABSNEG_F)
+INSTR2S(CMPV_F)
+INSTR1S(FLOOR_F)
+INSTR1S(CEIL_F)
+INSTR1S(RNDNE_F)
+INSTR1S(RNDAZ_F)
+INSTR1S(TRUNC_F)
+INSTR2S(ADD_U)
+INSTR2S(ADD_S)
+INSTR2S(SUB_U)
+INSTR2S(SUB_S)
+INSTR2S(CMPS_U)
+INSTR2S(CMPS_S)
+INSTR2S(MIN_U)
+INSTR2S(MIN_S)
+INSTR2S(MAX_U)
+INSTR2S(MAX_S)
+INSTR1S(ABSNEG_S)
+INSTR2S(AND_B)
+INSTR2S(OR_B)
+INSTR1S(NOT_B)
+INSTR2S(XOR_B)
+INSTR2S(CMPV_U)
+INSTR2S(CMPV_S)
+INSTR2S(MUL_U24)
+INSTR2S(MUL_S24)
+INSTR2S(MULL_U)
+INSTR1S(BFREV_B)
+INSTR1S(CLZ_S)
+INSTR1S(CLZ_B)
+INSTR2S(SHL_B)
+INSTR2S(SHR_B)
+INSTR2S(ASHR_B)
INSTR2(BARY_F)
INSTR2(FLAT_B)
-INSTR2(MGEN_B)
-INSTR2(GETBIT_B)
+INSTR2S(MGEN_B)
+INSTR2S(GETBIT_B)
INSTR1(SETRM)
-INSTR1(CBITS_B)
-INSTR2(SHB)
-INSTR2(MSAD)
+INSTR1S(CBITS_B)
+INSTR2S(SHB)
+INSTR2S(MSAD)
/* cat3 instructions: */
INSTR3(MAD_U16)
@@ -2543,26 +2553,26 @@ INSTR3(MAD_F32)
INSTR3(DP2ACC)
INSTR3(DP4ACC)
/* NOTE: SEL_B32 checks for zero vs nonzero */
-INSTR3(SEL_B16)
-INSTR3(SEL_B32)
-INSTR3(SEL_S16)
-INSTR3(SEL_S32)
-INSTR3(SEL_F16)
-INSTR3(SEL_F32)
+INSTR3S(SEL_B16)
+INSTR3S(SEL_B32)
+INSTR3S(SEL_S16)
+INSTR3S(SEL_S32)
+INSTR3S(SEL_F16)
+INSTR3S(SEL_F32)
INSTR3(SAD_S16)
INSTR3(SAD_S32)
/* cat4 instructions: */
-INSTR1(RCP)
-INSTR1(RSQ)
-INSTR1(HRSQ)
-INSTR1(LOG2)
-INSTR1(HLOG2)
-INSTR1(EXP2)
-INSTR1(HEXP2)
-INSTR1(SIN)
-INSTR1(COS)
-INSTR1(SQRT)
+INSTR1S(RCP)
+INSTR1S(RSQ)
+INSTR1S(HRSQ)
+INSTR1S(LOG2)
+INSTR1S(HLOG2)
+INSTR1S(EXP2)
+INSTR1S(HEXP2)
+INSTR1S(SIN)
+INSTR1S(COS)
+INSTR1S(SQRT)
/* cat5 instructions: */
INSTR1(DSX)
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 35a1453780c..423bb1fbaa0 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -249,6 +249,7 @@ ir3_put_def(struct ir3_context *ctx, nir_def *def)
continue;
if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
+ ctx->last_dst[i]->dsts[0]->flags &= ~IR3_REG_SHARED;
}
}