summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2021-03-24 17:17:38 +0000
committerMarge Bot <emma+marge@anholt.net>2021-12-13 11:22:33 +0000
commit165ca5088b79a5f0b2ffcbfbf15c0f90b924bcd3 (patch)
tree5b68b7a111a6ba1e7ac8f98b698b9627c7364272
parentc5f02a1cd3b110bafff0fc55064938604bf539ee (diff)
radv,aco: implement nir_op_ffma
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9805>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp29
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp1
-rw-r--r--src/amd/compiler/aco_optimizer.cpp4
-rw-r--r--src/amd/vulkan/radv_pipeline.c1
4 files changed, 32 insertions, 3 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 7b786c85667..7366cbb4fef 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2092,6 +2092,35 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
break;
}
+ case nir_op_ffma: {
+ if (dst.regClass() == v2b) {
+ emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3);
+ } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
+ assert(instr->dest.dest.ssa.num_components == 2);
+
+ Temp src0 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[0]));
+ Temp src1 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[1]));
+ Temp src2 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[2]));
+
+ /* swizzle to opsel: all swizzles are either 0 (x) or 1 (y) */
+ unsigned opsel_lo = 0, opsel_hi = 0;
+ for (unsigned i = 0; i < 3; i++) {
+ opsel_lo |= (instr->src[i].swizzle[0] & 1) << i;
+ opsel_hi |= (instr->src[i].swizzle[1] & 1) << i;
+ }
+
+ bld.vop3p(aco_opcode::v_pk_fma_f16, Definition(dst), src0, src1, src2, opsel_lo, opsel_hi);
+ emit_split_vector(ctx, dst, 2);
+ } else if (dst.regClass() == v1) {
+ emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f32, dst,
+ ctx->block->fp_mode.must_flush_denorms32, 3);
+ } else if (dst.regClass() == v2) {
+ emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f64, dst, false, 3);
+ } else {
+ isel_err(&instr->instr, "Unimplemented NIR instr bit size");
+ }
+ break;
+ }
case nir_op_fmax: {
if (dst.regClass() == v2b) {
// TODO: check fp_mode.must_flush_denorms16_64
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 8ad3a515b29..ed72d3009ba 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -467,6 +467,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_fmul:
case nir_op_fadd:
case nir_op_fsub:
+ case nir_op_ffma:
case nir_op_fmax:
case nir_op_fmin:
case nir_op_fneg:
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index b2569a07f6d..ed90c63bf30 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -3112,9 +3112,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
/* check for fneg modifiers */
if (instr_info.can_use_input_modifiers[(int)instr->opcode]) {
- /* at this point, we only have 2-operand instructions */
- assert(instr->operands.size() == 2);
- for (unsigned i = 0; i < 2; i++) {
+ for (unsigned i = 0; i < instr->operands.size(); i++) {
Operand& op = instr->operands[i];
if (!op.isTemp())
continue;
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 9c6e5a1adcb..fd8cf558c72 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3396,6 +3396,7 @@ opt_vectorize_callback(const nir_instr *instr, void *_)
case nir_op_fadd:
case nir_op_fsub:
case nir_op_fmul:
+ case nir_op_ffma:
case nir_op_fneg:
case nir_op_fsat:
case nir_op_fmin: