summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-02-24 16:31:32 +0100
committerVincent Lejeune <vljn@ovi.com>2013-02-26 13:37:20 +0100
commit69e4fc24c7753f37cdc97128dea40eba0a1c9e46 (patch)
tree69a557c2b05660e318c14a7ebdd3d8549412b682
parent3f42b0d479ff1902879a8590994fe68b94a7a9a3 (diff)
bundling dot4scheduling2
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp17
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp13
-rw-r--r--lib/Target/R600/R600Instructions.td30
4 files changed, 60 insertions, 1 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 404f6201a5..2435fc8091 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -124,6 +124,7 @@ enum {
SMIN,
UMIN,
URECIP,
+ DOT4,
EXPORT,
CONST_ADDRESS,
REGISTER_LOAD,
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index f8c900f727..634dee03a8 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -182,6 +182,23 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MI.eraseFromParent();
continue;
}
+ case AMDGPU::DOT_4: {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ unsigned DstBase = TRI.getEncodingValue(MI.getOperand(0).getReg()) & HW_REG_MASK;
+ unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+ MachineInstr *BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::DOT4_eg_real,
+ DstReg, MI.getOperand(1 + 2 * Chan).getReg(), MI.getOperand(2 + 2 * Chan).getReg() );
+ if (Chan > 0) {
+ TII->addFlag(BMI, 0, MO_FLAG_MASK);
+ BMI->bundleWithPred();
+ }
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+ MI.eraseFromParent();
+ continue;
+ }
}
bool IsReduction = TII->isReductionOp(MI.getOpcode());
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index f25ced134f..fcc046b6ed 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -397,6 +397,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return SDValue(interp, slot % 2);
}
+ case AMDGPUIntrinsic::AMDGPU_dp4: {
+ SDValue Args[8] = {
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(3, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(3, MVT::i32))
+ };
+ return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
+ }
case r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 10bcdcf3f5..edafea4776 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -516,6 +516,13 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
[SDNPVariadic]
>;
+def DOT4 : SDNode<"AMDGPUISD::DOT4",
+ SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>,
+ SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>,
+ SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>,
+ []
+>;
+
//===----------------------------------------------------------------------===//
// Interpolation Instructions
//===----------------------------------------------------------------------===//
@@ -982,12 +989,33 @@ class CNDGE_Common <bits<5> inst> : R600_3OP <
COND_GE))]
>;
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+ def DOT_4 : Instruction {
+ let OutOperandList = (outs R600_TReg32_X:$dst);
+ let InOperandList =
+ (ins R600_TReg32_X:$src0_x, R600_TReg32_X:$src1_x,
+ R600_TReg32_Y:$src0_y, R600_TReg32_Y:$src1_y,
+ R600_TReg32_Z:$src0_z, R600_TReg32_Z:$src1_z,
+ R600_TReg32_W:$src0_w, R600_TReg32_W:$src1_w);
+ let AsmString = "DOT4";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+}
+
+def : Pat<(DOT4 R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, R600_Reg32:$src3,
+R600_Reg32:$src4, R600_Reg32:$src5, R600_Reg32:$src6, R600_Reg32:$src7),
+(DOT_4 R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, R600_Reg32:$src3,
+R600_Reg32:$src4, R600_Reg32:$src5, R600_Reg32:$src6, R600_Reg32:$src7)>;
+
+
multiclass DOT4_Common <bits<11> inst> {
def _pseudo : R600_REDUCTION <inst,
(ins R600_Reg128:$src0, R600_Reg128:$src1),
"DOT4 $dst $src0, $src1",
- [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+ []
>;
def _real : R600_2OP <inst, "DOT4", []>;