diff options
20 files changed, 62 insertions, 12 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 296236006b..c5c51c18a0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -129,6 +129,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* if we get here, we missed a shader cap above (and should have seen diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 609c81b1d4..0e59b88489 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -459,6 +459,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* if we get here, we missed a shader cap above (and should have seen diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 4d838fdd52..11947097e9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -56,7 +56,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 0, 0, 0, 0, 0, 0, NONE, "", 19 }, /* removed */ + { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ @@ -155,7 +155,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, { 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, NONE, "", 118 }, /* removed */ + { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, { 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, { 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, { 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index d572ff03d7..e5b8427a03 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -193,6 +193,7 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_MAD: case TGSI_OPCODE_SUB: case TGSI_OPCODE_LRP: + case TGSI_OPCODE_FMA: case TGSI_OPCODE_FRC: case TGSI_OPCODE_CEIL: case TGSI_OPCODE_CLAMP: diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index e0fd1a2dba..26cc9ffc6f 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -336,6 +336,8 @@ to be 0. is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used. * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and DLDEXP are supported. +* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only) + are supported. .. _pipe_compute_cap: diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index b0a975aa70..7771136f16 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -272,6 +272,21 @@ This instruction replicates its result. dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w +.. opcode:: FMA - Fused Multiply-Add + +Perform a * b + c with no intermediate rounding step. + +.. math:: + + dst.x = src0.x \times src1.x + src2.x + + dst.y = src0.y \times src1.y + src2.y + + dst.z = src0.z \times src1.z + src2.z + + dst.w = src0.w \times src1.w + src2.w + + .. opcode:: DP2A - 2-component Dot Product And Add .. math:: @@ -1962,6 +1977,17 @@ source is an integer. dst.zw = src0.zw \times src1.zw + src2.zw +.. opcode:: DFMA - Fused Multiply-Add + +Perform a * b + c with no intermediate rounding step. + +.. math:: + + dst.xy = src0.xy \times src1.xy + src2.xy + + dst.zw = src0.zw \times src1.zw + src2.zw + + .. opcode:: DRCP - Reciprocal .. math:: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index a4699e4b69..1d73513704 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -363,6 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index dc76464f1f..50847e2b42 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -158,6 +158,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 0fca9e06af..eeb714864e 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -250,6 +250,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: debug_printf("unknown vertex shader param %d\n", param); @@ -289,6 +290,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: debug_printf("unknown fragment shader param %d\n", param); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ed07ba442d..829dfbc13f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -289,6 +289,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 686d892d0e..04c34f537e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -295,6 +295,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return 16; /* would be 32 in linked (OpenGL-style) mode */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index fca8001a03..752d7e59fd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -287,6 +287,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; @@ -341,6 +342,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 24d901ead8..21e5d42adc 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -493,6 +493,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 0; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } return 0; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 2ee59c8bac..54540c3840 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -7295,7 +7295,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, {22, 0, ALU_OP0_NOP, tgsi_unsupported}, @@ -7494,7 +7494,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, {22, 0, ALU_OP0_NOP, tgsi_unsupported}, @@ -7693,7 +7693,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, {22, 0, ALU_OP0_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index f1a53883f2..0aacab12db 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -425,6 +425,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } return 0; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index bac0dbcdff..7b01d35a93 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -375,6 +375,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* If we get here, we failed to handle a cap above */ @@ -431,6 +432,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* If we get here, we failed to handle a cap above */ diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 7c628470ea..0be8ec2c98 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -319,6 +319,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a8ffe9cfff..67f48e4293 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -644,6 +644,7 @@ enum pipe_shader_cap PIPE_SHADER_CAP_DOUBLES, PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */ PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED, + PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED, }; /** diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 95ac5900f3..c14bcbca33 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -306,7 +306,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_MAD 16 #define TGSI_OPCODE_SUB 17 #define TGSI_OPCODE_LRP 18 - /* gap */ +#define TGSI_OPCODE_FMA 19 #define TGSI_OPCODE_SQRT 20 #define TGSI_OPCODE_DP2A 21 /* gap */ @@ -404,7 +404,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_BREAKC 115 #define TGSI_OPCODE_KILL_IF 116 /* conditional kill */ #define TGSI_OPCODE_END 117 /* aka HALT */ - /* gap */ +#define TGSI_OPCODE_DFMA 118 #define TGSI_OPCODE_F2I 119 #define TGSI_OPCODE_IDIV 120 #define TGSI_OPCODE_IMAX 121 @@ -510,7 +510,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_DSNE 206 /* SM5 */ #define TGSI_OPCODE_DRCP 207 /* eg, cayman */ #define TGSI_OPCODE_DSQRT 208 /* eg, cayman also has DRSQ */ -#define TGSI_OPCODE_DMAD 209 /* DFMA? */ +#define TGSI_OPCODE_DMAD 209 #define TGSI_OPCODE_DFRAC 210 /* eg, cayman */ #define TGSI_OPCODE_DLDEXP 211 /* eg, cayman */ #define TGSI_OPCODE_DFRACEXP 212 /* eg, cayman */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index bd191d864f..efee4b258e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -332,6 +332,7 @@ public: int glsl_version; bool native_integers; bool have_sqrt; + bool have_fma; variable_storage *find_variable_storage(ir_variable *var); @@ -836,6 +837,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case3fid(ADD, UADD, DADD); case3fid(MUL, UMUL, DMUL); case3fid(MAD, UMAD, DMAD); + case3fid(FMA, UMAD, DFMA); case3(DIV, IDIV, UDIV); case4d(MAX, IMAX, UMAX, DMAX); case4d(MIN, IMIN, UMIN, DMIN); @@ -2222,10 +2224,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); break; case ir_triop_fma: - /* NOTE: Perhaps there should be a special opcode that enforces fused - * mul-add. Just use MAD for now. - */ - emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); + /* In theory, MAD is incorrect here. */ + if (have_fma) + emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); + else + emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); break; case ir_unop_interpolate_at_centroid: emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); @@ -5564,6 +5567,8 @@ get_mesa_program(struct gl_context *ctx, v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); + v->have_fma = pscreen->get_shader_param(pscreen, ptarget, + PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); _mesa_copy_linked_program_data(shader->Stage, shader_program, prog); _mesa_generate_parameters_list_for_uniforms(shader_program, shader, |