summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-01-29 19:20:20 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-02-01 16:27:43 +0000
commit834c4c7ae425f5696f66174ad2d0823aed643aa8 (patch)
tree12737d4f3265cccf315ab5b71e1f6f1e89eca57d
parent8723d7e085b88abdb56cef77838a2956fc724030 (diff)
XXX: Initial support for predicates
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp1
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp29
-rw-r--r--lib/Target/R600/R600LowerControlFlow.cpp90
-rw-r--r--test/CodeGen/R600/predicates.ll8
4 files changed, 117 insertions, 11 deletions
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index a003a253fde..043a8ac814f 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -78,7 +78,6 @@ public:
disablePass(&TailDuplicateID);
disablePass(&BranchFolderPassID);
disablePass(&MachineSinkingID);
- disablePass(&MachineBlockPlacementID);
}
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 8e14615ca4e..71a3e914d3f 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -150,6 +150,14 @@ isPredicateSetter(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::PRED_X:
case AMDGPU::PRED_SET:
+ case AMDGPU::PRED_SETE:
+ case AMDGPU::PRED_SETGT:
+ case AMDGPU::PRED_SETGE:
+ case AMDGPU::PRED_SETNE:
+ case AMDGPU::PRED_SETE_INT:
+ case AMDGPU::PRED_SETGT_INT:
+ case AMDGPU::PRED_SETGE_INT:
+ case AMDGPU::PRED_SETNE_INT:
return true;
default:
return false;
@@ -484,18 +492,31 @@ R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
}
+///
+/// \p Pred Index 0 should contain a predicate reigster, which is the result
+/// of a PRED_SET instruction. Index 1 should contain the "predicate sense",
+/// which tells how to interpret the result of a predicate register. (e.g.
+/// If the value of the predicate register matches the "predicate sense" then
+/// the instruction should be executed otherwise it should be skipped).
bool
R600InstrInfo::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
+ assert(Pred.size() == 2);
+
int PIdx = MI->findFirstPredOperandIdx();
+ unsigned PredReg = Pred[0].getReg();
+ int64_t PredSense = Pred[1].getImm();
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
- PMO.setReg(Pred[2].getReg());
- MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
- return true;
+ if (PredSense == 0) {
+ PMO.setReg(AMDGPU::PRED_SEL_ZERO);
+ } else {
+ PMO.setReg(AMDGPU::PRED_SEL_ONE);
+ }
+ MachineInstrBuilder(MI).addReg(PredReg, RegState::Implicit);
+ return true;
}
-
return false;
}
diff --git a/lib/Target/R600/R600LowerControlFlow.cpp b/lib/Target/R600/R600LowerControlFlow.cpp
index 2499924dcde..bed8e33ea40 100644
--- a/lib/Target/R600/R600LowerControlFlow.cpp
+++ b/lib/Target/R600/R600LowerControlFlow.cpp
@@ -33,6 +33,7 @@ private:
void mergeSuccessorBlocks(MachineBasicBlock &MBB,
MachineInstr &BranchInstr) const;
MachineBasicBlock *getFallThroughBlock(MachineBasicBlock *MBB) const;
+ bool canPredicateBlock(MachineBasicBlock::iterator I) const;
public:
R600LowerControlFlowPass(TargetMachine &tm) :
@@ -123,6 +124,25 @@ MachineBasicBlock *R600LowerControlFlowPass::getFallThroughBlock(
return FallThroughBlock;
}
+static bool
+isPredicateSetter(unsigned Opcode) {
+ switch (Opcode) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::PRED_SET:
+ case AMDGPU::PRED_SETE:
+ case AMDGPU::PRED_SETGT:
+ case AMDGPU::PRED_SETGE:
+ case AMDGPU::PRED_SETNE:
+ case AMDGPU::PRED_SETE_INT:
+ case AMDGPU::PRED_SETGT_INT:
+ case AMDGPU::PRED_SETGE_INT:
+ case AMDGPU::PRED_SETNE_INT:
+ return true;
+ default:
+ return false;
+ }
+}
+
void R600LowerControlFlowPass::mergeSuccessorBlocks(MachineBasicBlock &MBB,
MachineInstr &BranchInstr) const {
unsigned BranchOnTrue = BranchInstr.getOperand(2).getImm();
@@ -361,8 +381,6 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = MBB.rbegin();
} else {
MachineInstr &PrevInst = *(++I);
- PrevInst.dump();
- BranchBlock->front().dump();
// if (PrevInst.getOpcode() == AMDGPU::R600_IF && (
// BranchBlock->front().getOpcode() == AMDGPU::R600_ENDIF ||
// BranchBlock->front().getOpcode() == AMDGPU::R600_ELSE ||
@@ -454,5 +472,73 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
MBB.eraseFromParent();
}
+ // Fourth-Pass: Predicate simple branches
+ for (MachineBasicBlock::iterator I = EntryBlock.begin(), Next = llvm::next(I);
+ I != EntryBlock.end(); I = Next) {
+
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+
+ if (!isPredicateSetter(MI.getOpcode())) {
+ continue;
+ }
+
+ MachineInstr &PredInst = MI;
+ MachineInstr &IfInst = *(++I);
+
+ if (!canPredicateBlock(I)) {
+ continue;
+ }
+
+ SmallVector<MachineOperand, 2> Cond;
+ Cond.push_back(PredInst.getOperand(TII->getOperandIdx(PredInst.getOpcode(),
+ R600Operands::DST)));
+ Cond.push_back(IfInst.getOperand(2));
+
+ MachineBasicBlock::iterator PI;
+ MachineInstr *Else = NULL;
+ for (PI = ++I; (*PI).getOpcode() != AMDGPU::R600_ENDIF; ++PI) {
+ MachineInstr &InstrToPredicate = *PI;
+ if (InstrToPredicate.getOpcode() == AMDGPU::R600_ELSE) {
+ Else = &InstrToPredicate;
+ Cond[1].setImm(!Cond[1].getImm());
+ continue;
+ }
+ TII->PredicateInstruction(&InstrToPredicate, Cond);
+ }
+
+ MachineInstr &EndIf = *PI++;
+ IfInst.eraseFromParent();
+ if (Else) {
+ Else->eraseFromParent();
+ }
+ EndIf.eraseFromParent();
+
+ // Update the flags on the Predicate Instruction
+ TII->setImmOperand(&PredInst, R600Operands::UPDATE_EXEC_MASK, 0);
+ TII->setImmOperand(&PredInst, R600Operands::UPDATE_PREDICATE, 1);
+
+ Next = PI;
+ }
return false;
}
+
+bool R600LowerControlFlowPass::canPredicateBlock(
+ MachineBasicBlock::iterator IfIterator) const {
+ for (MachineBasicBlock::iterator I = llvm::next(IfIterator);
+ (*I).getOpcode() != AMDGPU::R600_ENDIF;
+ ++I) {
+ MachineInstr &MI = *I;
+ if (MI.getOpcode() == AMDGPU::R600_BEGINLOOP ||
+ MI.getOpcode() == AMDGPU::R600_IF) {
+ // Nested control flow
+ return false;
+ }
+
+ if (MI.getOpcode() != AMDGPU::R600_ELSE &&
+ !TII->R600InstrInfo::isPredicable(&MI)) {
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index 18895a423e5..6915289935f 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -45,10 +45,10 @@ ENDIF:
}
; CHECK: @nested_if
-; CHECK: IF_PREDICATE_SET
+; CHECK: R600_IF
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: ENDIF
+; CHECK: R600_ENDIF
define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
@@ -70,11 +70,11 @@ ENDIF:
}
; CHECK: @nested_if_else
-; CHECK: IF_PREDICATE_SET
+; CHECK: R600_IF
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: ENDIF
+; CHECK: R600_ENDIF
define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0