summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-02-01 20:15:43 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-02-04 17:11:33 +0000
commit7e1a91c49d3a8139a7c4245e07e24a9b01e5507b (patch)
tree970a69f34e872a90a1838e32810d01cca0610ff2
parent587e40d942e165366cb4c2581b135fd8ce1a470e (diff)
XXX: WIP on loops
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp1
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp89
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp1
-rw-r--r--lib/Target/R600/R600Instructions.td51
-rw-r--r--lib/Target/R600/R600LowerControlFlow.cpp60
-rw-r--r--lib/Target/R600/R600RegisterInfo.td2
7 files changed, 150 insertions, 55 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 6b546bf58d5..3997f712e4f 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -406,6 +406,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(ELSE)
NODE_NAME_CASE(ELSEBREAK)
+ NODE_NAME_CASE(ENDIF)
NODE_NAME_CASE(IF)
NODE_NAME_CASE(IFBREAK)
NODE_NAME_CASE(LOOP)
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 4c25b34aec6..bf119288e21 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -116,6 +116,7 @@ enum {
IFBREAK,
ELSE,
ELSEBREAK,
+ ENDIF,
FRACT,
FMAX,
LOOP,
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index b4d9474ab05..86ba8db4f7f 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -189,11 +189,31 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
// R600_ENDIF Pred0
//
- MachineBasicBlock::iterator InsertPoint;
- for (InsertPoint = I; !InsertPoint->isBranch(); ++InsertPoint);
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned NumUses = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg),
+ UE = MachineRegisterInfo::use_end();
+ UI != UE; ++UI, ++NumUses) {
+ MachineInstr *Use = &(*UI);
+ MachineBasicBlock::iterator InsertPoint = Use;
+ MachineInstr *PredSet;
+
+ if (NumUses < 1) {
+ MI->removeFromParent();
+ PredSet = MI;
+ } else {
+ // PRED_SET can only have one use, so if there is more than one use, we
+ // need to clone the instruction.
+ PredSet = MF->CloneMachineInstr(MI);
+ const TargetRegisterClass *DstRegClass =
+ MRI.getRegClass(MI->getOperand(0).getReg());
+ unsigned NewDst = MRI.createVirtualRegister(DstRegClass);
+ PredSet->getOperand(0).setReg(NewDst);
+ UI.getOperand().setReg(NewDst);
+ }
+ BB->insert(InsertPoint, PredSet);
+ }
- MI->removeFromParent();
- BB->insert(InsertPoint, MI);
return BB;
}
@@ -432,6 +452,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
+ case AMDGPUIntrinsic::R600_endcf: {
+ return DAG.getNode(AMDGPUISD::ENDIF, Op.getDebugLoc(), MVT::Other, Chain);
+ }
+
// default for switch(IntrinsicID)
default: break;
}
@@ -441,6 +465,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::INTRINSIC_W_CHAIN: {
SDValue Chain = Op.getOperand(0);
+ DebugLoc DL = Op.getDebugLoc();
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
@@ -448,10 +473,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case AMDGPUIntrinsic::R600_ifbreak: {
SDValue SetCC = Op.getOperand(2);
if (SetCC.getOpcode() == ISD::SETCC) {
- DebugLoc DL = Op.getDebugLoc();
bool IsInteger = SetCC.getOperand(0).getValueType().isInteger();
unsigned CondCode = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
- SDValue PredSet = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i32,
+ SDValue PredSet = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i1,
DAG.getTargetConstant(CondCode, MVT::i32),
DAG.getTargetConstant(IsInteger, MVT::i1),
SetCC.getOperand(0),
@@ -464,7 +488,13 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
break;
}
+ case AMDGPUIntrinsic::R600_elsebreak: {
+ SDValue Out[2];
+ Out[0] = Op.getOperand(2);
+ Out[1] = DAG.getNode(AMDGPUISD::ELSEBREAK, DL, MVT::Other, Chain);
+ return DAG.getMergeValues(Out, 2, DL);
}
+ }
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
@@ -698,14 +728,10 @@ SDValue R600TargetLowering::LowerBR_COND(SDValue Op, SelectionDAG &DAG) const {
}
case AMDGPUIntrinsic::R600_loop: {
- Ret = DAG.getNode(AMDGPUISD::LOOP, DL, MVT::Other, Chain, Cond, BB);
+ Ret = DAG.getNode(AMDGPUISD::LOOP, DL, MVT::Other, Chain, BB);
break;
}
- case AMDGPUIntrinsic::R600_elsebreak: {
- Ret = DAG.getNode(AMDGPUISD::ELSEBREAK, DL, MVT::Other, Chain, Cond, BB);
- break;
- }
}
DAG.ReplaceAllUsesOfValueWith(SDValue(CFIntrinsic.getNode(), 0), Cond);
@@ -753,6 +779,17 @@ SDValue R600TargetLowering::LowerCopyToReg(SDValue Op,
break;
}
+ case AMDGPUISD::PRED_SET: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value.getOperand(0));
+ ISD::CondCode CC = (ISD::CondCode)C->getZExtValue();
+ SDValue SetCC = DAG.getSetCC(DL, MVT::i1, Value.getOperand(2),
+ Value.getOperand(3),
+ CC);
+ NewValue = LowerSETCC(SetCC, DAG);
+ break;
+ }
+
+
}
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MRI.setRegClass(cast<RegisterSDNode>(Op.getOperand(1))->getReg(),
@@ -1427,6 +1464,36 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
CCOpcode);
}
+
+ case AMDGPUISD::PRED_SET: {
+ // (pred_set cc0 (selectcc lhs, rhs, -1, 0, cc1), 0) -> pred_set cc2, lhs rhs
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ SDValue SelectCC = N->getOperand(2);
+ if (!C1 || SelectCC.getOpcode() != ISD::SELECT_CC ||
+ !isHWTrueValue(SelectCC.getOperand(2)) ||
+ !isHWFalseValue(SelectCC.getOperand(3))) {
+ return SDValue();
+ }
+
+ ISD::CondCode OldCondCode = (ISD::CondCode)
+ (dyn_cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+ ISD::CondCode NewCondCode =
+ cast<CondCodeSDNode>(SelectCC->getOperand(4))->get();
+
+ if (OldCondCode == ISD::SETEQ) {
+ NewCondCode = ISD::getSetCCInverse(NewCondCode,
+ SelectCC.getValueType().isInteger());
+ }
+
+ return DAG.getNode(AMDGPUISD::PRED_SET, N->getDebugLoc(), N->getValueType(0),
+ DAG.getTargetConstant(NewCondCode, MVT::i32),
+ DAG.getTargetConstant(
+ SelectCC.getValueType().isInteger(), MVT::i1),
+ SelectCC.getOperand(0),
+ SelectCC.getOperand(1));
+
+ }
+
}
return SDValue();
}
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 71a3e914d3f..83509d695a1 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -191,6 +191,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
+ return true;
// Most of the following comes from the ARM implementation of AnalyzeBranch
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 03914fe7715..ab932f7fc2c 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -680,6 +680,10 @@ def SDTIf : SDTypeProfile<0, 2, [
SDTCisVT<1, OtherVT>
]>;
+def SDTLoop : SDTypeProfile<0, 1, [
+ SDTCisVT<0, OtherVT>
+]>;
+
def SDTIfBreak : SDTypeProfile<1, 1, [
SDTCisInt<0>, SDTCisInt<1>
]>;
@@ -695,9 +699,12 @@ def SDTPredSet : SDTypeProfile<1, 4, [ // setcc
def R600if : SDNode<"AMDGPUISD::IF", SDTIf, [SDNPHasChain]>;
def R600ifbreak : SDNode<"AMDGPUISD::IFBREAK", SDTIfBreak, [SDNPHasChain]>;
def R600else : SDNode<"AMDGPUISD::ELSE", SDTElse, [SDNPHasChain]>;
-def R600loop : SDNode<"AMDGPUISD::LOOP", SDTIf, [SDNPHasChain]>;
-def R600elsebreak : SDNode<"AMDGPUISD::ELSEBREAK", SDTIf, [SDNPHasChain]>;
+def R600loop : SDNode<"AMDGPUISD::LOOP", SDTLoop, [SDNPHasChain]>;
+def R600elsebreak : SDNode<"AMDGPUISD::ELSEBREAK",
+ SDTypeProfile<0, 0, []>, [SDNPHasChain]>;
def R600predset : SDNode<"AMDGPUISD::PRED_SET", SDTPredSet>;
+def R600endif : SDNode<"AMDGPUISD::ENDIF",
+ SDTypeProfile<0, 0, []>, [SDNPHasChain]>;
let Predicates = [isR600toCayman] in {
@@ -1795,9 +1802,9 @@ def R600_ELSE : AMDGPUInst <
def R600_LOOP : AMDGPUInst <
(outs),
- (ins brtarget:$true, R600_PredReg:$cond, PRED_SENSE:$pred_sense),
- "R600_LOOP $cond $true",
- [(R600loop R600_PredReg:$cond, bb:$true)]
+ (ins brtarget:$true, PRED_SENSE:$pred_sense),
+ "R600_LOOP $true",
+ [(R600loop bb:$true)]
>;
@@ -1825,13 +1832,6 @@ def R600_IFBREAK : AMDGPUInst <
[]
>;
-def R600_ELSEBREAK : AMDGPUInst <
- (outs R600_PredReg:$dst),
- (ins R600_PredReg:$cond),
- "R600_ELSEBREAK $dst, $cond",
- [(set R600_PredReg:$dst, (int_R600_elsebreak R600_PredReg:$cond))]
->;
-
def R600_BREAK : AMDGPUInst <
(outs R600_PredReg:$dst),
(ins R600_PredReg:$cond),
@@ -1842,19 +1842,29 @@ def R600_BREAK : AMDGPUInst <
def R600_PREDICATED_BREAK : AMDGPUInst <
(outs R600_PredReg:$dst),
- (ins R600_TReg32:$cond),
+ (ins R600_PredReg:$cond),
"R600_PREDICATED_BREAK $dst, $cond",
[]
>;
+let hasSideEffects = 1 in {
+
+def R600_ELSEBREAK : AMDGPUInst <
+ (outs),
+ (ins),
+ "R600_ELSEBREAK",
+ [(R600elsebreak)]
+>;
def R600_ENDIF : AMDGPUInst <
(outs),
- (ins R600_PredReg:$cond),
- "R600_ENDIF $cond",
- [(int_R600_endcf R600_PredReg:$cond)]
+ (ins),
+ "R600_ENDIF",
+ [(R600endif)]
>;
+} // End hasSideEffects = 1
+
} // End isCodeGenOnly = 1, isPseudo = 1
let isPseudo = 1, usesCustomInserter = 1 in {
@@ -1965,14 +1975,11 @@ def : Pat <
>;
// R600ifbreak patterns:
-class IfBreakPat <Instruction PredInst, int CondCode> : Pat <
- (i1 (R600ifbreak (i32 (R600predset (i32 CondCode /*SETEQ*/), (i1 timm), (i32 R600_Reg32:$src0), (i32 R600_Reg32:$src1))))),
- (R600_PREDICATED_BREAK (i32 (PredInst R600_Reg32:$src0, R600_Reg32:$src1)))
+def : Pat <
+ (i1 (R600ifbreak (i1 (R600predset (i32 timm:$cc), (i1 timm:$isint), (i32 R600_Reg32:$src0), (i32 R600_Reg32:$src1))))),
+ (R600_PREDICATED_BREAK (i1 (PRED_SET imm:$cc, imm:$isint, R600_Reg32:$src0, R600_Reg32:$src1)))
>;
-def : IfBreakPat <PRED_SETE_INT, 17 /* SETEQ */>;
-def : IfBreakPat <PRED_SETNE_INT, 22 /* SETNE */>;
-
// SGT Reverse args
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
diff --git a/lib/Target/R600/R600LowerControlFlow.cpp b/lib/Target/R600/R600LowerControlFlow.cpp
index 73d4530599d..c0ec81a046c 100644
--- a/lib/Target/R600/R600LowerControlFlow.cpp
+++ b/lib/Target/R600/R600LowerControlFlow.cpp
@@ -308,20 +308,23 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
// BB:#1:
// ...
//
+ case AMDGPU::R600_PREDICATED_BREAK:
case AMDGPU::R600_IF: {
bool InversePred = false;
- unsigned BranchOnTrue = MI.getOperand(2).getImm();
- if (BranchOnTrue) {
- MachineBasicBlock *BranchBlock = MI.getOperand(0).getMBB();
- MachineBasicBlock *FallThroughBlock;
- MachineBasicBlock::succ_iterator SI = MBB.succ_begin();
- if ((*SI)->getNumber() != BranchBlock->getNumber()) {
- FallThroughBlock = *SI;
- } else {
- FallThroughBlock = *(++SI);
+ if (MI.getOpcode() == AMDGPU::R600_IF) {
+ unsigned BranchOnTrue = MI.getOperand(2).getImm();
+ if (BranchOnTrue) {
+ MachineBasicBlock *BranchBlock = MI.getOperand(0).getMBB();
+ MachineBasicBlock *FallThroughBlock;
+ MachineBasicBlock::succ_iterator SI = MBB.succ_begin();
+ if ((*SI)->getNumber() != BranchBlock->getNumber()) {
+ FallThroughBlock = *SI;
+ } else {
+ FallThroughBlock = *(++SI);
+ }
+ InversePred = true;
+ MI.getOperand(2).setImm(0);
}
- InversePred = true;
- MI.getOperand(2).setImm(0);
}
// Lower the PRED_SET instruction to the correct hardware instruction.
MachineInstr &PredSet = *(++I);
@@ -357,11 +360,9 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
}
TII->setImmOperand(HWPredSet, R600Operands::WRITE, 0);
// if (Flags & MO_FLAG_PUSH) {
- TII->setImmOperand(HWPredSet, R600Operands::UPDATE_EXEC_MASK, 1);
-// } else {
+ TII->setImmOperand(HWPredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+ // } else {
// TII->setImmOperand(HWPredSet, R600Operands::UPDATE_PREDICATE, 1);
-// }
-
break;
}
@@ -419,18 +420,29 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
FallThroughBlock = *(++SI);
}
- if (FallThroughBlock->front().getOpcode() == AMDGPU::R600_ENDIF) {
- FallThroughBlock->front().eraseFromParent();
+ if (MI.getOperand(1).getImm() == 1) {
+ std::swap(FallThroughBlock, BranchToBlock);
+ }
+
+ MachineBasicBlock::iterator EndIfI = FallThroughBlock->begin();
+ while (EndIfI != FallThroughBlock->end()) {
+ if ((*EndIfI).getOpcode() == AMDGPU::R600_ENDIF) {
+ break;
+ }
+ EndIfI++;
}
+ MachineInstr &EndIf = *EndIfI;
+ assert(EndIf.getOpcode() == AMDGPU::R600_ENDIF);
BuildMI(MBB, MBB.end(), MBB.findDebugLoc(MBB.end()),
TII->get(AMDGPU::R600_ENDLOOP))
- .addOperand(MI.getOperand(1));
+ .addReg(AMDGPU::PRED_STACK);
BuildMI(*BranchToBlock, BranchToBlock->begin(),
BranchToBlock->findDebugLoc(BranchToBlock->begin()),
TII->get(AMDGPU::R600_BEGINLOOP))
- .addOperand(MI.getOperand(1));
+ .addReg(AMDGPU::PRED_STACK);
MI.eraseFromParent();
+ EndIf.eraseFromParent();
Next = MBB.rbegin();
break;
}
@@ -438,22 +450,24 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
DebugLoc DL = MBB.findDebugLoc((MachineBasicBlock::iterator)&MI);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::R600_ELSE))
.addMBB(*MBB.succ_begin())
- .addOperand(MI.getOperand(1))
+ .addReg(AMDGPU::PRED_STACK)
.addImm(0);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::R600_BREAK))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(0));
+ .addReg(AMDGPU::PRED_STACK)
+ .addReg(AMDGPU::PRED_STACK);
MI.eraseFromParent();
Next = MBB.rbegin();
break;
}
+#if 0
case AMDGPU::R600_PREDICATED_BREAK: {
MachineInstr &PredSet = *(++I);
TII->setImmOperand(&PredSet, R600Operands::WRITE, 0);
TII->setImmOperand(&PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
break;
}
+#endif
}
}
}
@@ -472,6 +486,8 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
MBB.eraseFromParent();
}
+ return false;
+
// Fourth-Pass: Predicate simple branches
for (MachineBasicBlock::iterator I = EntryBlock.begin(), Next = llvm::next(I);
I != EntryBlock.end(); I = Next) {
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 2bc2ca051f8..7cfe9f72cb6 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -149,6 +149,8 @@ foreach Index = 0-16 in {
def Pred#Index : R600Reg<"", Index>;
}
+def PRED_STACK : R600Reg<"PRED_STACK", 0>;
+
def TRUE : R600Reg<"True", 0>;
def FALSE: R600Reg<"False", 0>;