gallium: remove TGSI opcode DPH

use DP4 or DP3 + ADD. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
author: Marek Olšák <marek.olsak@amd.com> 2017-08-19 22:00:02 +0200
committer: Marek Olšák <marek.olsak@amd.com> 2017-08-22 13:29:47 +0200
commit: 3e2ff8fade879cedfdff0e180a6996df1223a823 (patch)
tree: c6601d72e5f5ba13264f7355fef41558ed65cbf3
parent: 86e6f7a73bdbced24e10fb80fdcba591e0568120 (diff)
22 files changed, 22 insertions, 257 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 0319b8885f..f6baca0ec4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -234,22 +234,6 @@ static struct lp_build_tgsi_action dp4_action = {
    dp4_emit	 /* emit */
 };
 
-/* TGSI_OPCODE_DPH */
-static void
-dph_fetch_args(
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   dp_fetch_args(bld_base, emit_data, 4);
-   /* src0.w */
-   emit_data->args[3] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action dph_action = {
-   dph_fetch_args,	 /* fetch_args */
-   dp4_emit	 /* emit */
-};
-
 /* TGSI_OPCODE_DST */
 static void
 dst_fetch_args(
@@ -1258,7 +1242,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
    bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
    bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
    bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
-   bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
    bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
    bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
    bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 3cc50796bc..46abda0cde 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -593,9 +593,6 @@ lp_emit_instruction_aos(
    case TGSI_OPCODE_XPD:
       return FALSE;
 
-   case TGSI_OPCODE_DPH:
-      return FALSE;
-
    case TGSI_OPCODE_COS:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 3daa896062..ea1f064b83 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1019,13 +1019,6 @@ ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 }
 
 static void
-ttn_dph(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
-   ttn_move_dest(b, dest, nir_fadd(b, nir_fdot3(b, src[0], src[1]),
-                                   ttn_channel(b, src[1], W)));
-}
-
-static void
 ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
@@ -1534,7 +1527,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
    [TGSI_OPCODE_LG2] = nir_op_flog2,
    [TGSI_OPCODE_POW] = nir_op_fpow,
    [TGSI_OPCODE_XPD] = 0,
-   [TGSI_OPCODE_DPH] = 0,
    [TGSI_OPCODE_COS] = nir_op_fcos,
    [TGSI_OPCODE_DDX] = nir_op_fddx,
    [TGSI_OPCODE_DDY] = nir_op_fddy,
@@ -1763,10 +1755,6 @@ ttn_emit_instruction(struct ttn_compile *c)
       ttn_dp4(b, op_trans[tgsi_op], dest, src);
       break;
 
-   case TGSI_OPCODE_DPH:
-      ttn_dph(b, op_trans[tgsi_op], dest, src);
-      break;
-
    case TGSI_OPCODE_UMAD:
       ttn_umad(b, op_trans[tgsi_op], dest, src);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 34a4af6f55..c9265fbfda 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3185,35 +3185,6 @@ exec_dp4(struct tgsi_exec_machine *mach,
 }
 
 static void
-exec_dph(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   unsigned int chan;
-   union tgsi_exec_channel arg[3];
-
-   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   micro_mul(&arg[2], &arg[0], &arg[1]);
-
-   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
-
-   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-   micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
-
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
-   micro_add(&arg[0], &arg[0], &arg[1]);
-
-   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
-         store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
-      }
-   }
-}
-
-static void
 exec_dp2(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
@@ -5186,10 +5157,6 @@ exec_instruction(
       exec_xpd(mach, inst);
       break;
 
-   case TGSI_OPCODE_DPH:
-      exec_dph(mach, inst);
-      break;
-
    case TGSI_OPCODE_COS:
       exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index c31705a0ee..b2477940b9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -72,7 +72,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
    { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
-   { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
+   { 1, 2, 0, 0, 0, 0, 0, REPL, "", 35 }, /* removed */
    { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index 3013a41a8e..f3b5ade226 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -914,9 +914,6 @@ transform_log(struct tgsi_transform_context *tctx,
  * DP3 - 3-component Dot Product
  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
  *
- * DPH - Homogeneous Dot Product
- *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
- *
  * DP2 - 2-component Dot Product
  *   dst = src0.x \times src1.x + src0.y \times src1.y
  *
@@ -924,16 +921,14 @@ transform_log(struct tgsi_transform_context *tctx,
  * operations, which is what you'd prefer for a ISA that is natively
  * scalar.  Probably a native vector ISA would at least already have
  * DP4/DP3 instructions, but perhaps there is room for an alternative
- * translation for DPH/DP2 using vector instructions.
+ * translation for DP2 using vector instructions.
  *
  * ; needs: 1 tmp
  * MUL tmpA.x, src0.x, src1.x
  * MAD tmpA.x, src0.y, src1.y, tmpA.x
- * if (DPH || DP3 || DP4) {
+ * if (DP3 || DP4) {
  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
- *   if (DPH) {
- *     ADD tmpA.x, src1.w, tmpA.x
- *   } else if (DP4) {
+ *   if (DP4) {
  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
  *   }
  * }
@@ -941,7 +936,6 @@ transform_log(struct tgsi_transform_context *tctx,
  */
 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
-#define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
 #define DOTP_TMP  1
 static void
@@ -980,8 +974,7 @@ transform_dotp(struct tgsi_transform_context *tctx,
       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 
-      if ((opcode == TGSI_OPCODE_DPH) ||
-          (opcode == TGSI_OPCODE_DP3) ||
+      if ((opcode == TGSI_OPCODE_DP3) ||
           (opcode == TGSI_OPCODE_DP4)) {
          tctx->emit_instruction(tctx, &new_inst);
 
@@ -995,18 +988,7 @@ transform_dotp(struct tgsi_transform_context *tctx,
          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 
-         if (opcode == TGSI_OPCODE_DPH) {
-            tctx->emit_instruction(tctx, &new_inst);
-
-            /* ADD tmpA.x, src1.w, tmpA.x */
-            new_inst = tgsi_default_full_instruction();
-            new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
-            new_inst.Instruction.NumDstRegs = 1;
-            reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
-            new_inst.Instruction.NumSrcRegs = 2;
-            reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
-            reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
-         } else if (opcode == TGSI_OPCODE_DP4) {
+         if (opcode == TGSI_OPCODE_DP4) {
             tctx->emit_instruction(tctx, &new_inst);
 
             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
@@ -1534,11 +1516,6 @@ transform_instr(struct tgsi_transform_context *tctx,
          goto skip;
       transform_dotp(tctx, inst);
       break;
-   case TGSI_OPCODE_DPH:
-      if (!ctx->config->lower_DPH)
-         goto skip;
-      transform_dotp(tctx, inst);
-      break;
    case TGSI_OPCODE_DP2:
       if (!ctx->config->lower_DP2)
          goto skip;
@@ -1632,7 +1609,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
          OPCS(LOG) ||
          OPCS(DP4) ||
          OPCS(DP3) ||
-         OPCS(DPH) ||
          OPCS(DP2) ||
          OPCS(FLR) ||
          OPCS(CEIL) ||
@@ -1693,10 +1669,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
       newlen += DP3_GROW * OPCS(DP3);
       numtmp = MAX2(numtmp, DOTP_TMP);
    }
-   if (OPCS(DPH)) {
-      newlen += DPH_GROW * OPCS(DPH);
-      numtmp = MAX2(numtmp, DOTP_TMP);
-   }
    if (OPCS(DP2)) {
       newlen += DP2_GROW * OPCS(DP2);
       numtmp = MAX2(numtmp, DOTP_TMP);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index 85e4b8e983..f65d915c65 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -65,7 +65,6 @@ struct tgsi_lowering_config
    unsigned lower_LOG:1;
    unsigned lower_DP4:1;
    unsigned lower_DP3:1;
-   unsigned lower_DPH:1;
    unsigned lower_DP2:1;
    unsigned lower_FLR:1;
    unsigned lower_CEIL:1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e4cbdae271..68b4af81b7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -64,7 +64,6 @@ OP11(EX2)
 OP11(LG2)
 OP12(POW)
 OP12(XPD)
-OP12(DPH)
 OP11(COS)
 OP11(DDX)
 OP11(DDY)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index fc61351f6e..6756bca23d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -274,10 +274,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
       read_mask = TGSI_WRITEMASK_XYZW;
       break;
 
-   case TGSI_OPCODE_DPH:
-      read_mask = src_idx == 0 ? TGSI_WRITEMASK_XYZ : TGSI_WRITEMASK_XYZW;
-      break;
-
    case TGSI_OPCODE_TEX:
    case TGSI_OPCODE_TXD:
    case TGSI_OPCODE_TXB:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 660216219c..54767a7189 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -363,15 +363,6 @@ This instruction replicates its result.
   dst.w = 1
 
 
-.. opcode:: DPH - Homogeneous Dot Product
-
-This instruction replicates its result.
-
-.. math::
-
-  dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
-
-
 .. opcode:: COS - Cosine
 
 This instruction replicates its result.
@@ -3672,7 +3663,7 @@ of the operands are equal to 0. That means that 0 * Inf = 0. This
 should be set the same way for an entire pipeline. Note that this
 applies not only to the literal MUL TGSI opcode, but all FP32
 multiplications implied by other operations, such as MAD, FMA, DP2,
-DP3, DP4, DPH, DST, LOG, LRP, XPD, and possibly others. If there is a
+DP3, DP4, DST, LOG, LRP, XPD, and possibly others. If there is a
 mismatch between shaders, then it is unspecified whether this behavior
 will be enabled.
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
index 88f204caca..3ccb737381 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
@@ -1664,33 +1664,6 @@ trans_lg2(const struct instr_translater *t, struct etna_compile *c,
 }
 
 static void
-trans_dph(const struct instr_translater *t, struct etna_compile *c,
-          const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
-   /*
-   DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
-   ADD dst.xyzw, tmp.xyzw, void, src1.wwww
-   */
-   struct etna_native_reg temp = etna_compile_get_inner_temp(c);
-   struct etna_inst ins[2] = { };
-
-   ins[0].opcode = INST_OPCODE_DP3;
-   ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
-                                         INST_COMPS_Z | INST_COMPS_W);
-   ins[0].src[0] = src[0];
-   ins[0].src[1] = src[1];
-
-   ins[1].opcode = INST_OPCODE_ADD;
-   ins[1].sat = inst->Instruction.Saturate;
-   ins[1].dst = convert_dst(c, &inst->Dst[0]);
-   ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
-   ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
-
-   emit_inst(c, &ins[0]);
-   emit_inst(c, &ins[1]);
-}
-
-static void
 trans_sampler(const struct instr_translater *t, struct etna_compile *c,
               const struct tgsi_full_instruction *inst,
               struct etna_inst_src *src)
@@ -1833,7 +1806,6 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
    INSTR(LRP, trans_lrp),
    INSTR(LIT, trans_lit),
    INSTR(SSG, trans_ssg),
-   INSTR(DPH, trans_dph),
 
    INSTR(SIN, trans_trig),
    INSTR(COS, trans_trig),
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index 5f2a876047..da06e16bf0 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -85,7 +85,6 @@ static const struct {
    [ TGSI_OPCODE_DP2     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
    [ TGSI_OPCODE_DP3     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
    [ TGSI_OPCODE_DP4     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
-   [ TGSI_OPCODE_DPH     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_DST     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_END     ] = { false,  false,                  0,  0,  0 },
    [ TGSI_OPCODE_EX2     ] = { false,  false,                  0,  1,  1 },
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 241c92dd28..22a42eeeab 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -604,17 +604,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
       emit_simple_arith(p, inst, A0_DP4, 2, fs);
       break;
 
-   case TGSI_OPCODE_DPH:
-      src0 = src_vector(p, &inst->Src[0], fs);
-      src1 = src_vector(p, &inst->Src[1], fs);
-
-      i915_emit_arith(p,
-                      A0_DP4,
-                      get_result_vector(p, &inst->Dst[0]),
-                      get_result_flags(inst), 0,
-                      swizzle(src0, X, Y, Z, ONE), src1, 0);
-      break;
-
    case TGSI_OPCODE_DST:
       src0 = src_vector(p, &inst->Src[0], fs);
       src1 = src_vector(p, &inst->Src[1], fs);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 23c7d106e3..c06e74e2f8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -277,7 +277,6 @@ unsigned int Instruction::srcMask(unsigned int s) const
    case TGSI_OPCODE_DP3:
       return 0x7;
    case TGSI_OPCODE_DP4:
-   case TGSI_OPCODE_DPH:
    case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
       return 0xf;
    case TGSI_OPCODE_DST:
@@ -3321,13 +3320,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
          mkMov(dst0[c], val0);
       break;
-   case TGSI_OPCODE_DPH:
-      val0 = buildDot(3);
-      src1 = fetchSrc(1, 3);
-      mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
-      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
-         mkMov(dst0[c], val0);
-      break;
    case TGSI_OPCODE_DST:
       if (dst0[0])
          loadImm(dst0[0], 1.0f);
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
index 278a8a4a43..a3ed5c6526 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
@@ -591,11 +591,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
    case TGSI_OPCODE_DP4:
       nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
       break;
-   case TGSI_OPCODE_DPH:
-      tmp = nvfx_src(temp(fpc));
-      nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
-      nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
-      break;
    case TGSI_OPCODE_DST:
       nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
       break;
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
index bec9975de9..8ba3d5a050 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -588,9 +588,6 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
    case TGSI_OPCODE_DP4:
       nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none));
       break;
-   case TGSI_OPCODE_DPH:
-      nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none));
-      break;
    case TGSI_OPCODE_DST:
       nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none));
       break;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index cc0ac4810e..78af1242b4 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -58,7 +58,6 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
         case TGSI_OPCODE_POW: return RC_OPCODE_POW;
         case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
-        case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
         case TGSI_OPCODE_COS: return RC_OPCODE_COS;
         case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
         case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index d3728fb61d..fd76c93de8 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6617,13 +6617,6 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
 				alu.src[0].chan = alu.src[1].chan = 0;
 			}
 			break;
-		case TGSI_OPCODE_DPH:
-			if (i == 3) {
-				alu.src[0].sel = V_SQ_ALU_SRC_1;
-				alu.src[0].chan = 0;
-				alu.src[0].neg = 0;
-			}
-			break;
 		default:
 			break;
 		}
@@ -9103,7 +9096,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
 	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[33]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_DPH]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
+	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
 	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
 	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9301,7 +9294,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[33]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_DPH]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
+	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
 	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
 	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9524,7 +9517,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[33]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
-	[TGSI_OPCODE_DPH]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
+	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, cayman_trig},
 	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
 	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index dd29f74a2a..928330ce14 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -1237,39 +1237,6 @@ emit_dp2(struct svga_shader_emitter *emit,
 
 
 /**
- * Translate the following TGSI DPH instruction.
- *    DPH  DST, SRC1, SRC2
- * To the following SVGA3D instruction sequence.
- *    DP3  TMP, SRC1, SRC2
- *    ADD  DST, TMP, SRC2.wwww
- */
-static boolean
-emit_dph(struct svga_shader_emitter *emit,
-         const struct tgsi_full_instruction *insn )
-{
-   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
-   struct src_register src1 =
-      translate_src_register(emit, &insn->Src[1]);
-   SVGA3dShaderDestToken temp = get_temp( emit );
-
-   /* DP3  TMP, SRC1, SRC2 */
-   if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
-      return FALSE;
-
-   src1 = scalar(src1, TGSI_SWIZZLE_W);
-
-   /* ADD  DST, TMP, SRC2.wwww */
-   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
-                    src( temp ), src1 ))
-      return FALSE;
-
-   return TRUE;
-}
-
-
-/**
  * Sine / Cosine helper function.
  */
 static boolean
@@ -2924,9 +2891,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
    case TGSI_OPCODE_DP2:
       return emit_dp2( emit, insn );
 
-   case TGSI_OPCODE_DPH:
-      return emit_dph( emit, insn );
-
    case TGSI_OPCODE_COS:
       return emit_cos( emit, insn );
 
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 56afd497d7..e57e78d11a 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -3578,40 +3578,6 @@ emit_cmp(struct svga_shader_emitter_v10 *emit,
 
 
 /**
- * Emit code for TGSI_OPCODE_DPH instruction.
- */
-static boolean
-emit_dph(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /*
-    * DP3 tmp, s0, s1
-    * ADD dst, tmp, s1.wwww
-    */
-
-   struct tgsi_full_src_register s1_wwww =
-      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
-                  TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
-
-   unsigned tmp = get_temp_index(emit);
-   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
-   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
-   /* DP3 tmp, s0, s1 */
-   emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
-                        &inst->Src[1], FALSE);
-
-   /* ADD dst, tmp, s1.wwww */
-   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
-                        &s1_wwww, inst->Instruction.Saturate);
-
-   free_temp_indexes(emit);
-
-   return TRUE;
-}
-
-
-/**
  * Emit code for TGSI_OPCODE_DST instruction.
  */
 static boolean
@@ -5712,8 +5678,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
       return emit_cmp(emit, inst);
    case TGSI_OPCODE_COS:
       return emit_sincos(emit, inst);
-   case TGSI_OPCODE_DPH:
-      return emit_dph(emit, inst);
    case TGSI_OPCODE_DST:
       return emit_dst(emit, inst);
    case TGSI_OPCODE_EX2:
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 5c4af89b25..af0d0252c6 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -372,7 +372,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_U2I64               32
 #define TGSI_OPCODE_CLOCK               33
 #define TGSI_OPCODE_I2I64               34
-#define TGSI_OPCODE_DPH                 35
+/* gap */
 #define TGSI_OPCODE_COS                 36
 #define TGSI_OPCODE_DDX                 37
 #define TGSI_OPCODE_DDY                 38
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index f6eb5effc6..5f78026dda 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -457,8 +457,6 @@ translate_opcode( unsigned op )
       return TGSI_OPCODE_DP3;
    case OPCODE_DP4:
       return TGSI_OPCODE_DP4;
-   case OPCODE_DPH:
-      return TGSI_OPCODE_DPH;
    case OPCODE_DST:
       return TGSI_OPCODE_DST;
    case OPCODE_EX2:
@@ -589,6 +587,17 @@ compile_instruction(
       ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1]));
       break;
 
+   case OPCODE_DPH: {
+      struct ureg_dst temp = ureg_DECL_temporary(ureg);
+
+      /* DPH = DP4(src0, src1) where src0.w = 1. */
+      ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_XYZ), src[0]);
+      ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_W),
+               ureg_imm1f(ureg, 1));
+      ureg_DP4(ureg, dst[0], ureg_src(temp), src[1]);
+      break;
+   }
+
    default:
       ureg_insn( ureg, 
                  translate_opcode( inst->Opcode ),
author	Marek Olšák <marek.olsak@amd.com>	2017-08-19 22:00:02 +0200
committer	Marek Olšák <marek.olsak@amd.com>	2017-08-22 13:29:47 +0200
commit	3e2ff8fade879cedfdff0e180a6996df1223a823 (patch)
tree	c6601d72e5f5ba13264f7355fef41558ed65cbf3
parent	86e6f7a73bdbced24e10fb80fdcba591e0568120 (diff)