diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-02-24 16:31:32 +0100 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-02-26 13:37:20 +0100 |
commit | 69e4fc24c7753f37cdc97128dea40eba0a1c9e46 (patch) | |
tree | 69a557c2b05660e318c14a7ebdd3d8549412b682 | |
parent | 3f42b0d479ff1902879a8590994fe68b94a7a9a3 (diff) |
bundling dot4scheduling2
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600ExpandSpecialInstrs.cpp | 17 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 30 |
4 files changed, 60 insertions, 1 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 404f6201a5..2435fc8091 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -124,6 +124,7 @@ enum { SMIN, UMIN, URECIP, + DOT4, EXPORT, CONST_ADDRESS, REGISTER_LOAD, diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp index f8c900f727..634dee03a8 100644 --- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -182,6 +182,23 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); continue; } + case AMDGPU::DOT_4: { + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + for (unsigned Chan = 0; Chan < 4; ++Chan) { + unsigned DstBase = TRI.getEncodingValue(MI.getOperand(0).getReg()) & HW_REG_MASK; + unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); + MachineInstr *BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::DOT4_eg_real, + DstReg, MI.getOperand(1 + 2 * Chan).getReg(), MI.getOperand(2 + 2 * Chan).getReg() ); + if (Chan > 0) { + TII->addFlag(BMI, 0, MO_FLAG_MASK); + BMI->bundleWithPred(); + } + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + } + MI.eraseFromParent(); + continue; + } } bool IsReduction = TII->isReductionOp(MI.getOpcode()); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index f25ced134f..fcc046b6ed 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -397,6 +397,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return SDValue(interp, slot % 2); } + case AMDGPUIntrinsic::AMDGPU_dp4: { + SDValue Args[8] = { + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(0, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(0, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(1, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(1, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(2, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(2, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(3, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(3, MVT::i32)) + }; + return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8); + } case r600_read_ngroups_x: return LowerImplicitParameter(DAG, VT, DL, 0); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 10bcdcf3f5..edafea4776 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -516,6 +516,13 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", [SDNPVariadic] >; +def DOT4 : SDNode<"AMDGPUISD::DOT4", + SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>, + SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>, + SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>, + [] +>; + //===----------------------------------------------------------------------===// // Interpolation Instructions //===----------------------------------------------------------------------===// @@ -982,12 +989,33 @@ class CNDGE_Common <bits<5> inst> : R600_3OP < COND_GE))] >; +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { + def DOT_4 : Instruction { + let OutOperandList = (outs R600_TReg32_X:$dst); + let InOperandList = + (ins R600_TReg32_X:$src0_x, R600_TReg32_X:$src1_x, + R600_TReg32_Y:$src0_y, R600_TReg32_Y:$src1_y, + R600_TReg32_Z:$src0_z, R600_TReg32_Z:$src1_z, + R600_TReg32_W:$src0_w, R600_TReg32_W:$src1_w); + let AsmString = "DOT4"; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let Itinerary = NullALU; +} +} + +def : Pat<(DOT4 R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, R600_Reg32:$src3, +R600_Reg32:$src4, R600_Reg32:$src5, R600_Reg32:$src6, R600_Reg32:$src7), +(DOT_4 R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, R600_Reg32:$src3, +R600_Reg32:$src4, R600_Reg32:$src5, R600_Reg32:$src6, R600_Reg32:$src7)>; + + multiclass DOT4_Common <bits<11> inst> { def _pseudo : R600_REDUCTION <inst, (ins R600_Reg128:$src0, R600_Reg128:$src1), "DOT4 $dst $src0, $src1", - [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] + [] >; def _real : R600_2OP <inst, "DOT4", []>; |