diff options
Diffstat (limited to 'lib/Target/R600/R600Instructions.td')
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 51 |
1 files changed, 50 insertions, 1 deletions
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index cbe2a87bf6..de7d1ae2e1 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -618,6 +618,13 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", [SDNPVariadic] >; +def DOT4 : SDNode<"AMDGPUISD::DOT4", + SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>, + SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>, + SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>, + [] +>; + //===----------------------------------------------------------------------===// // Interpolation Instructions //===----------------------------------------------------------------------===// @@ -1254,12 +1261,54 @@ class CNDGE_Common <bits<5> inst> : R600_3OP < [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))] >; + +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins +// Slot X + UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X, + OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X, + R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X, + R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X, + R600_Pred:$pred_sel_X, +// Slot Y + UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y, + OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y, + R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y, + R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y, + R600_Pred:$pred_sel_Y, +// Slot Z + UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z, + OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z, + R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z, + R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z, + R600_Pred:$pred_sel_Z, +// Slot W + UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W, + OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W, + R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W, + R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W, + R600_Pred:$pred_sel_W, + LITERAL:$literal0, LITERAL:$literal1), + "", + pattern, + AnyALU> {} +} + +def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4 + R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X, + R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y, + R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z, + R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>; + + + + multiclass DOT4_Common <bits<11> inst> { def _pseudo : R600_REDUCTION <inst, (ins R600_Reg128:$src0, R600_Reg128:$src1), "DOT4 $dst $src0, $src1", - [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))] + [] >; def _real : R600_2OP <inst, "DOT4", []>; |