diff options
author | Marek Olšák <marek.olsak@amd.com> | 2017-08-19 21:41:57 +0200 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2017-08-22 13:29:47 +0200 |
commit | 86e6f7a73bdbced24e10fb80fdcba591e0568120 (patch) | |
tree | 8cb2a014a944585bf49e6af66075500c927cca49 | |
parent | 0bb367830a613da30991ca60b4dccdda61302038 (diff) |
gallium: remove TGSI opcode DP2A
use DP3 instead.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 29 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/nir/tgsi_to_nir.c | 14 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 29 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_lowering.c | 30 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_lowering.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_util.c | 4 | ||||
-rw-r--r-- | src/gallium/docs/source/tgsi.rst | 13 | ||||
-rw-r--r-- | src/gallium/drivers/etnaviv/etnaviv_compiler.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_tgsi_to_rc.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_insn.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 51 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_shader_tokens.h | 2 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_atifs_to_tgsi.c | 1 |
17 files changed, 6 insertions, 183 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index dc6568a2d4..0319b8885f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -159,34 +159,6 @@ static struct lp_build_tgsi_action dp2_action = { dp2_emit /* emit */ }; -/* TGSI_OPCODE_DP2A */ -static void -dp2a_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - dp_fetch_args(bld_base, emit_data, 2); - emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst, - 2, TGSI_CHAN_X); -} - -static void -dp2a_emit( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - LLVMValueRef tmp; - tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data); - emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, - emit_data->args[5], tmp); -} - -static struct lp_build_tgsi_action dp2a_action = { - dp2a_fetch_args, /* fetch_args */ - dp2a_emit /* emit */ -}; - /* TGSI_OPCODE_DP3 */ static void dp3_fetch_args( @@ -1286,7 +1258,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action; bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action; bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action; - bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action; bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action; bld_base->op_actions[TGSI_OPCODE_DST] = dst_action; bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 92ecb43a2b..3cc50796bc 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -554,9 +554,6 @@ lp_emit_instruction_aos( dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; - case TGSI_OPCODE_DP2A: - return FALSE; - case TGSI_OPCODE_FRC: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_floor(&bld->bld_base.base, src0); diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 46238a1025..3daa896062 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1001,15 +1001,6 @@ ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) } static void -ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) -{ - ttn_move_dest(b, dest, - ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]), - src[2]), - X)); -} - -static void ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) { ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1])); @@ -1536,7 +1527,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_MAD] = nir_op_ffma, [TGSI_OPCODE_LRP] = 0, [TGSI_OPCODE_SQRT] = nir_op_fsqrt, - [TGSI_OPCODE_DP2A] = 0, [TGSI_OPCODE_FRC] = nir_op_ffract, [TGSI_OPCODE_FLR] = nir_op_ffloor, [TGSI_OPCODE_ROUND] = nir_op_fround_even, @@ -1773,10 +1763,6 @@ ttn_emit_instruction(struct ttn_compile *c) ttn_dp4(b, op_trans[tgsi_op], dest, src); break; - case TGSI_OPCODE_DP2A: - ttn_dp2a(b, op_trans[tgsi_op], dest, src); - break; - case TGSI_OPCODE_DPH: ttn_dph(b, op_trans[tgsi_op], dest, src); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index cc3e232d38..34a4af6f55 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3185,31 +3185,6 @@ exec_dp4(struct tgsi_exec_machine *mach, } static void -exec_dp2a(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned int chan; - union tgsi_exec_channel arg[3]; - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&arg[2], &arg[0], &arg[1]); - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); - - fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_add(&arg[0], &arg[0], &arg[1]); - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void exec_dph(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -5183,10 +5158,6 @@ exec_instruction( exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_DP2A: - exec_dp2a(mach, inst); - break; - case TGSI_OPCODE_FRC: exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 8450cd7a07..c31705a0ee 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -58,7 +58,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, + { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */ { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index c26c13b5e5..3013a41a8e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -920,14 +920,11 @@ transform_log(struct tgsi_transform_context *tctx, * DP2 - 2-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y * - * DP2A - 2-component Dot Product And Add - * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x - * * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar * operations, which is what you'd prefer for a ISA that is natively * scalar. Probably a native vector ISA would at least already have * DP4/DP3 instructions, but perhaps there is room for an alternative - * translation for DPH/DP2/DP2A using vector instructions. + * translation for DPH/DP2 using vector instructions. * * ; needs: 1 tmp * MUL tmpA.x, src0.x, src1.x @@ -939,8 +936,6 @@ transform_log(struct tgsi_transform_context *tctx, * } else if (DP4) { * MAD tmpA.x, src0.w, src1.w, tmpA.x * } - * } else if (DP2A) { - * ADD tmpA.x, src2.x, tmpA.x * } * ; fixup last instruction to replicate into dst */ @@ -948,7 +943,6 @@ transform_log(struct tgsi_transform_context *tctx, #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2)) #define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) -#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3)) #define DOTP_TMP 1 static void transform_dotp(struct tgsi_transform_context *tctx, @@ -958,7 +952,6 @@ transform_dotp(struct tgsi_transform_context *tctx, struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_src_register *src1 = &inst->Src[1]; - struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */ struct tgsi_full_instruction new_inst; unsigned opcode = inst->Instruction.Opcode; @@ -1026,17 +1019,6 @@ transform_dotp(struct tgsi_transform_context *tctx, reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); } - } else if (opcode == TGSI_OPCODE_DP2A) { - tctx->emit_instruction(tctx, &new_inst); - - /* ADD tmpA.x, src2.x, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X)); } /* fixup last instruction to write to dst: */ @@ -1562,11 +1544,6 @@ transform_instr(struct tgsi_transform_context *tctx, goto skip; transform_dotp(tctx, inst); break; - case TGSI_OPCODE_DP2A: - if (!ctx->config->lower_DP2A) - goto skip; - transform_dotp(tctx, inst); - break; case TGSI_OPCODE_FLR: if (!ctx->config->lower_FLR) goto skip; @@ -1657,7 +1634,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, OPCS(DP3) || OPCS(DPH) || OPCS(DP2) || - OPCS(DP2A) || OPCS(FLR) || OPCS(CEIL) || OPCS(TRUNC) || @@ -1725,10 +1701,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += DP2_GROW * OPCS(DP2); numtmp = MAX2(numtmp, DOTP_TMP); } - if (OPCS(DP2A)) { - newlen += DP2A_GROW * OPCS(DP2A); - numtmp = MAX2(numtmp, DOTP_TMP); - } if (OPCS(FLR)) { newlen += FLR_GROW * OPCS(FLR); numtmp = MAX2(numtmp, FLR_TMP); diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h index 20e4f843a9..85e4b8e983 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h @@ -67,7 +67,6 @@ struct tgsi_lowering_config unsigned lower_DP3:1; unsigned lower_DPH:1; unsigned lower_DP2:1; - unsigned lower_DP2A:1; unsigned lower_FLR:1; unsigned lower_CEIL:1; unsigned lower_TRUNC:1; diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index f244db6102..e4cbdae271 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -56,7 +56,6 @@ OP13(MAD) OP12_TEX(TEX_LZ) OP13(LRP) OP11(SQRT) -OP13(DP2A) OP11(FRC) OP12_TEX(TXF_LZ) OP11(FLR) diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 932545cb89..fc61351f6e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -262,10 +262,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0; break; - case TGSI_OPCODE_DP2A: - read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY; - break; - case TGSI_OPCODE_DP2: read_mask = TGSI_WRITEMASK_XY; break; diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 20749a12a4..660216219c 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -285,19 +285,6 @@ Perform a * b + c with no intermediate rounding step. dst.w = src0.w \times src1.w + src2.w -.. opcode:: DP2A - 2-component Dot Product And Add - -.. math:: - - dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x - - dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x - - dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x - - dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x - - .. opcode:: FRC - Fraction .. math:: diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c index f65a168672..88f204caca 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c @@ -2344,7 +2344,6 @@ etna_compile_shader(struct etna_shader_variant *v) .lower_EXP = true, .lower_LOG = true, .lower_DP2 = true, - .lower_DP2A = true, .lower_TRUNC = true, .lower_XPD = true }; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index a4583302ac..cc0ac4810e 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -51,7 +51,6 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_SGE: return RC_OPCODE_SGE; case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; - /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index b49ecbab7a..d3728fb61d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -9089,7 +9089,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, - [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [21] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, @@ -9287,7 +9287,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, - [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [21] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, @@ -9510,7 +9510,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, - [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [21] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index fc3ec5eed7..dd29f74a2a 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -44,7 +44,6 @@ translate_opcode(uint opcode) { switch (opcode) { case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; - case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index d7ec48eb8a..56afd497d7 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -3578,55 +3578,6 @@ emit_cmp(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_DP2A instruction. - */ -static boolean -emit_dp2a(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) -{ - /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x - * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x - * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x - * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x - * Translate into - * MAD tmp.x, s0.y, s1.y, s2.x - * MAD tmp.x, s0.x, s1.x, tmp.x - * MOV dst.xyzw, tmp.xxxx - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - - struct tgsi_full_src_register tmp_src_xxxx = - scalar_src(&tmp_src, TGSI_SWIZZLE_X); - struct tgsi_full_dst_register tmp_dst_x = - writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); - - struct tgsi_full_src_register src0_xxxx = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); - struct tgsi_full_src_register src0_yyyy = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); - struct tgsi_full_src_register src1_xxxx = - scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); - struct tgsi_full_src_register src1_yyyy = - scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); - struct tgsi_full_src_register src2_xxxx = - scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); - - emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, - &src1_yyyy, &src2_xxxx, FALSE); - emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, - &src1_xxxx, &tmp_src_xxxx, FALSE); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], - &tmp_src_xxxx, inst->Instruction.Saturate); - - free_temp_indexes(emit); - - return TRUE; -} - - -/** * Emit code for TGSI_OPCODE_DPH instruction. */ static boolean @@ -5761,8 +5712,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_cmp(emit, inst); case TGSI_OPCODE_COS: return emit_sincos(emit, inst); - case TGSI_OPCODE_DP2A: - return emit_dp2a(emit, inst); case TGSI_OPCODE_DPH: return emit_dph(emit, inst); case TGSI_OPCODE_DST: diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 9fd8419853..5c4af89b25 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -358,7 +358,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_LRP 18 #define TGSI_OPCODE_FMA 19 #define TGSI_OPCODE_SQRT 20 -#define TGSI_OPCODE_DP2A 21 +/* gap */ #define TGSI_OPCODE_F2U64 22 #define TGSI_OPCODE_F2I64 23 #define TGSI_OPCODE_FRC 24 diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c index 13e013cebc..2a171b5a84 100644 --- a/src/mesa/state_tracker/st_atifs_to_tgsi.c +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c @@ -228,7 +228,6 @@ emit_special_inst(struct st_translate *t, const struct instruction_desc *desc, src[2] = args[0]; ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0); } else if (!strcmp(desc->name, "DOT2_ADD")) { - /* note: DP2A is not implemented in most pipe drivers */ tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */ src[0] = args[0]; src[1] = args[1]; |