diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-01-29 19:20:20 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-02-01 16:27:43 +0000 |
commit | 834c4c7ae425f5696f66174ad2d0823aed643aa8 (patch) | |
tree | 12737d4f3265cccf315ab5b71e1f6f1e89eca57d | |
parent | 8723d7e085b88abdb56cef77838a2956fc724030 (diff) |
XXX: Initial support for predicates
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 29 | ||||
-rw-r--r-- | lib/Target/R600/R600LowerControlFlow.cpp | 90 | ||||
-rw-r--r-- | test/CodeGen/R600/predicates.ll | 8 |
4 files changed, 117 insertions, 11 deletions
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index a003a253fde..043a8ac814f 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -78,7 +78,6 @@ public: disablePass(&TailDuplicateID); disablePass(&BranchFolderPassID); disablePass(&MachineSinkingID); - disablePass(&MachineBlockPlacementID); } AMDGPUTargetMachine &getAMDGPUTargetMachine() const { diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 8e14615ca4e..71a3e914d3f 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -150,6 +150,14 @@ isPredicateSetter(unsigned Opcode) { switch (Opcode) { case AMDGPU::PRED_X: case AMDGPU::PRED_SET: + case AMDGPU::PRED_SETE: + case AMDGPU::PRED_SETGT: + case AMDGPU::PRED_SETGE: + case AMDGPU::PRED_SETNE: + case AMDGPU::PRED_SETE_INT: + case AMDGPU::PRED_SETGT_INT: + case AMDGPU::PRED_SETGE_INT: + case AMDGPU::PRED_SETNE_INT: return true; default: return false; @@ -484,18 +492,31 @@ R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, } +/// +/// \p Pred Index 0 should contain a predicate reigster, which is the result +/// of a PRED_SET instruction. Index 1 should contain the "predicate sense", +/// which tells how to interpret the result of a predicate register. (e.g. +/// If the value of the predicate register matches the "predicate sense" then +/// the instruction should be executed otherwise it should be skipped). bool R600InstrInfo::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { + assert(Pred.size() == 2); + int PIdx = MI->findFirstPredOperandIdx(); + unsigned PredReg = Pred[0].getReg(); + int64_t PredSense = Pred[1].getImm(); if (PIdx != -1) { MachineOperand &PMO = MI->getOperand(PIdx); - PMO.setReg(Pred[2].getReg()); - MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); - return true; + if (PredSense == 0) { + PMO.setReg(AMDGPU::PRED_SEL_ZERO); + } else { + PMO.setReg(AMDGPU::PRED_SEL_ONE); + } + MachineInstrBuilder(MI).addReg(PredReg, RegState::Implicit); + return true; } - return false; } diff --git a/lib/Target/R600/R600LowerControlFlow.cpp b/lib/Target/R600/R600LowerControlFlow.cpp index 2499924dcde..bed8e33ea40 100644 --- a/lib/Target/R600/R600LowerControlFlow.cpp +++ b/lib/Target/R600/R600LowerControlFlow.cpp @@ -33,6 +33,7 @@ private: void mergeSuccessorBlocks(MachineBasicBlock &MBB, MachineInstr &BranchInstr) const; MachineBasicBlock *getFallThroughBlock(MachineBasicBlock *MBB) const; + bool canPredicateBlock(MachineBasicBlock::iterator I) const; public: R600LowerControlFlowPass(TargetMachine &tm) : @@ -123,6 +124,25 @@ MachineBasicBlock *R600LowerControlFlowPass::getFallThroughBlock( return FallThroughBlock; } +static bool +isPredicateSetter(unsigned Opcode) { + switch (Opcode) { + case AMDGPU::PRED_X: + case AMDGPU::PRED_SET: + case AMDGPU::PRED_SETE: + case AMDGPU::PRED_SETGT: + case AMDGPU::PRED_SETGE: + case AMDGPU::PRED_SETNE: + case AMDGPU::PRED_SETE_INT: + case AMDGPU::PRED_SETGT_INT: + case AMDGPU::PRED_SETGE_INT: + case AMDGPU::PRED_SETNE_INT: + return true; + default: + return false; + } +} + void R600LowerControlFlowPass::mergeSuccessorBlocks(MachineBasicBlock &MBB, MachineInstr &BranchInstr) const { unsigned BranchOnTrue = BranchInstr.getOperand(2).getImm(); @@ -361,8 +381,6 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { Next = MBB.rbegin(); } else { MachineInstr &PrevInst = *(++I); - PrevInst.dump(); - BranchBlock->front().dump(); // if (PrevInst.getOpcode() == AMDGPU::R600_IF && ( // BranchBlock->front().getOpcode() == AMDGPU::R600_ENDIF || // BranchBlock->front().getOpcode() == AMDGPU::R600_ELSE || @@ -454,5 +472,73 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { MBB.eraseFromParent(); } + // Fourth-Pass: Predicate simple branches + for (MachineBasicBlock::iterator I = EntryBlock.begin(), Next = llvm::next(I); + I != EntryBlock.end(); I = Next) { + + Next = llvm::next(I); + MachineInstr &MI = *I; + + if (!isPredicateSetter(MI.getOpcode())) { + continue; + } + + MachineInstr &PredInst = MI; + MachineInstr &IfInst = *(++I); + + if (!canPredicateBlock(I)) { + continue; + } + + SmallVector<MachineOperand, 2> Cond; + Cond.push_back(PredInst.getOperand(TII->getOperandIdx(PredInst.getOpcode(), + R600Operands::DST))); + Cond.push_back(IfInst.getOperand(2)); + + MachineBasicBlock::iterator PI; + MachineInstr *Else = NULL; + for (PI = ++I; (*PI).getOpcode() != AMDGPU::R600_ENDIF; ++PI) { + MachineInstr &InstrToPredicate = *PI; + if (InstrToPredicate.getOpcode() == AMDGPU::R600_ELSE) { + Else = &InstrToPredicate; + Cond[1].setImm(!Cond[1].getImm()); + continue; + } + TII->PredicateInstruction(&InstrToPredicate, Cond); + } + + MachineInstr &EndIf = *PI++; + IfInst.eraseFromParent(); + if (Else) { + Else->eraseFromParent(); + } + EndIf.eraseFromParent(); + + // Update the flags on the Predicate Instruction + TII->setImmOperand(&PredInst, R600Operands::UPDATE_EXEC_MASK, 0); + TII->setImmOperand(&PredInst, R600Operands::UPDATE_PREDICATE, 1); + + Next = PI; + } return false; } + +bool R600LowerControlFlowPass::canPredicateBlock( + MachineBasicBlock::iterator IfIterator) const { + for (MachineBasicBlock::iterator I = llvm::next(IfIterator); + (*I).getOpcode() != AMDGPU::R600_ENDIF; + ++I) { + MachineInstr &MI = *I; + if (MI.getOpcode() == AMDGPU::R600_BEGINLOOP || + MI.getOpcode() == AMDGPU::R600_IF) { + // Nested control flow + return false; + } + + if (MI.getOpcode() != AMDGPU::R600_ELSE && + !TII->R600InstrInfo::isPredicable(&MI)) { + return false; + } + } + return true; +} diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll index 18895a423e5..6915289935f 100644 --- a/test/CodeGen/R600/predicates.ll +++ b/test/CodeGen/R600/predicates.ll @@ -45,10 +45,10 @@ ENDIF: } ; CHECK: @nested_if -; CHECK: IF_PREDICATE_SET +; CHECK: R600_IF ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, ; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: ENDIF +; CHECK: R600_ENDIF define void @nested_if(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 @@ -70,11 +70,11 @@ ENDIF: } ; CHECK: @nested_if_else -; CHECK: IF_PREDICATE_SET +; CHECK: R600_IF ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: ENDIF +; CHECK: R600_ENDIF define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 |