diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-02-01 20:15:43 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-02-04 17:11:33 +0000 |
commit | 7e1a91c49d3a8139a7c4245e07e24a9b01e5507b (patch) | |
tree | 970a69f34e872a90a1838e32810d01cca0610ff2 | |
parent | 587e40d942e165366cb4c2581b135fd8ce1a470e (diff) |
XXX: WIP on loops
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 89 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 51 | ||||
-rw-r--r-- | lib/Target/R600/R600LowerControlFlow.cpp | 60 | ||||
-rw-r--r-- | lib/Target/R600/R600RegisterInfo.td | 2 |
7 files changed, 150 insertions, 55 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 6b546bf58d5..3997f712e4f 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -406,6 +406,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FMAX) NODE_NAME_CASE(ELSE) NODE_NAME_CASE(ELSEBREAK) + NODE_NAME_CASE(ENDIF) NODE_NAME_CASE(IF) NODE_NAME_CASE(IFBREAK) NODE_NAME_CASE(LOOP) diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 4c25b34aec6..bf119288e21 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -116,6 +116,7 @@ enum { IFBREAK, ELSE, ELSEBREAK, + ENDIF, FRACT, FMAX, LOOP, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b4d9474ab05..86ba8db4f7f 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -189,11 +189,31 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( // R600_ENDIF Pred0 // - MachineBasicBlock::iterator InsertPoint; - for (InsertPoint = I; !InsertPoint->isBranch(); ++InsertPoint); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned NumUses = 0; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg), + UE = MachineRegisterInfo::use_end(); + UI != UE; ++UI, ++NumUses) { + MachineInstr *Use = &(*UI); + MachineBasicBlock::iterator InsertPoint = Use; + MachineInstr *PredSet; + + if (NumUses < 1) { + MI->removeFromParent(); + PredSet = MI; + } else { + // PRED_SET can only have one use, so if there is more than one use, we + // need to clone the instruction. + PredSet = MF->CloneMachineInstr(MI); + const TargetRegisterClass *DstRegClass = + MRI.getRegClass(MI->getOperand(0).getReg()); + unsigned NewDst = MRI.createVirtualRegister(DstRegClass); + PredSet->getOperand(0).setReg(NewDst); + UI.getOperand().setReg(NewDst); + } + BB->insert(InsertPoint, PredSet); + } - MI->removeFromParent(); - BB->insert(InsertPoint, MI); return BB; } @@ -432,6 +452,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } + case AMDGPUIntrinsic::R600_endcf: { + return DAG.getNode(AMDGPUISD::ENDIF, Op.getDebugLoc(), MVT::Other, Chain); + } + // default for switch(IntrinsicID) default: break; } @@ -441,6 +465,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::INTRINSIC_W_CHAIN: { SDValue Chain = Op.getOperand(0); + DebugLoc DL = Op.getDebugLoc(); unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { @@ -448,10 +473,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case AMDGPUIntrinsic::R600_ifbreak: { SDValue SetCC = Op.getOperand(2); if (SetCC.getOpcode() == ISD::SETCC) { - DebugLoc DL = Op.getDebugLoc(); bool IsInteger = SetCC.getOperand(0).getValueType().isInteger(); unsigned CondCode = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); - SDValue PredSet = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i32, + SDValue PredSet = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i1, DAG.getTargetConstant(CondCode, MVT::i32), DAG.getTargetConstant(IsInteger, MVT::i1), SetCC.getOperand(0), @@ -464,7 +488,13 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } break; } + case AMDGPUIntrinsic::R600_elsebreak: { + SDValue Out[2]; + Out[0] = Op.getOperand(2); + Out[1] = DAG.getNode(AMDGPUISD::ELSEBREAK, DL, MVT::Other, Chain); + return DAG.getMergeValues(Out, 2, DL); } + } break; } case ISD::INTRINSIC_WO_CHAIN: { @@ -698,14 +728,10 @@ SDValue R600TargetLowering::LowerBR_COND(SDValue Op, SelectionDAG &DAG) const { } case AMDGPUIntrinsic::R600_loop: { - Ret = DAG.getNode(AMDGPUISD::LOOP, DL, MVT::Other, Chain, Cond, BB); + Ret = DAG.getNode(AMDGPUISD::LOOP, DL, MVT::Other, Chain, BB); break; } - case AMDGPUIntrinsic::R600_elsebreak: { - Ret = DAG.getNode(AMDGPUISD::ELSEBREAK, DL, MVT::Other, Chain, Cond, BB); - break; - } } DAG.ReplaceAllUsesOfValueWith(SDValue(CFIntrinsic.getNode(), 0), Cond); @@ -753,6 +779,17 @@ SDValue R600TargetLowering::LowerCopyToReg(SDValue Op, break; } + case AMDGPUISD::PRED_SET: { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value.getOperand(0)); + ISD::CondCode CC = (ISD::CondCode)C->getZExtValue(); + SDValue SetCC = DAG.getSetCC(DL, MVT::i1, Value.getOperand(2), + Value.getOperand(3), + CC); + NewValue = LowerSETCC(SetCC, DAG); + break; + } + + } MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); MRI.setRegClass(cast<RegisterSDNode>(Op.getOperand(1))->getReg(), @@ -1427,6 +1464,36 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, CCOpcode); } + + case AMDGPUISD::PRED_SET: { + // (pred_set cc0 (selectcc lhs, rhs, -1, 0, cc1), 0) -> pred_set cc2, lhs rhs + ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N->getOperand(3)); + SDValue SelectCC = N->getOperand(2); + if (!C1 || SelectCC.getOpcode() != ISD::SELECT_CC || + !isHWTrueValue(SelectCC.getOperand(2)) || + !isHWFalseValue(SelectCC.getOperand(3))) { + return SDValue(); + } + + ISD::CondCode OldCondCode = (ISD::CondCode) + (dyn_cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()); + ISD::CondCode NewCondCode = + cast<CondCodeSDNode>(SelectCC->getOperand(4))->get(); + + if (OldCondCode == ISD::SETEQ) { + NewCondCode = ISD::getSetCCInverse(NewCondCode, + SelectCC.getValueType().isInteger()); + } + + return DAG.getNode(AMDGPUISD::PRED_SET, N->getDebugLoc(), N->getValueType(0), + DAG.getTargetConstant(NewCondCode, MVT::i32), + DAG.getTargetConstant( + SelectCC.getValueType().isInteger(), MVT::i1), + SelectCC.getOperand(0), + SelectCC.getOperand(1)); + + } + } return SDValue(); } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 71a3e914d3f..83509d695a1 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -191,6 +191,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { + return true; // Most of the following comes from the ARM implementation of AnalyzeBranch // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 03914fe7715..ab932f7fc2c 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -680,6 +680,10 @@ def SDTIf : SDTypeProfile<0, 2, [ SDTCisVT<1, OtherVT> ]>; +def SDTLoop : SDTypeProfile<0, 1, [ + SDTCisVT<0, OtherVT> +]>; + def SDTIfBreak : SDTypeProfile<1, 1, [ SDTCisInt<0>, SDTCisInt<1> ]>; @@ -695,9 +699,12 @@ def SDTPredSet : SDTypeProfile<1, 4, [ // setcc def R600if : SDNode<"AMDGPUISD::IF", SDTIf, [SDNPHasChain]>; def R600ifbreak : SDNode<"AMDGPUISD::IFBREAK", SDTIfBreak, [SDNPHasChain]>; def R600else : SDNode<"AMDGPUISD::ELSE", SDTElse, [SDNPHasChain]>; -def R600loop : SDNode<"AMDGPUISD::LOOP", SDTIf, [SDNPHasChain]>; -def R600elsebreak : SDNode<"AMDGPUISD::ELSEBREAK", SDTIf, [SDNPHasChain]>; +def R600loop : SDNode<"AMDGPUISD::LOOP", SDTLoop, [SDNPHasChain]>; +def R600elsebreak : SDNode<"AMDGPUISD::ELSEBREAK", + SDTypeProfile<0, 0, []>, [SDNPHasChain]>; def R600predset : SDNode<"AMDGPUISD::PRED_SET", SDTPredSet>; +def R600endif : SDNode<"AMDGPUISD::ENDIF", + SDTypeProfile<0, 0, []>, [SDNPHasChain]>; let Predicates = [isR600toCayman] in { @@ -1795,9 +1802,9 @@ def R600_ELSE : AMDGPUInst < def R600_LOOP : AMDGPUInst < (outs), - (ins brtarget:$true, R600_PredReg:$cond, PRED_SENSE:$pred_sense), - "R600_LOOP $cond $true", - [(R600loop R600_PredReg:$cond, bb:$true)] + (ins brtarget:$true, PRED_SENSE:$pred_sense), + "R600_LOOP $true", + [(R600loop bb:$true)] >; @@ -1825,13 +1832,6 @@ def R600_IFBREAK : AMDGPUInst < [] >; -def R600_ELSEBREAK : AMDGPUInst < - (outs R600_PredReg:$dst), - (ins R600_PredReg:$cond), - "R600_ELSEBREAK $dst, $cond", - [(set R600_PredReg:$dst, (int_R600_elsebreak R600_PredReg:$cond))] ->; - def R600_BREAK : AMDGPUInst < (outs R600_PredReg:$dst), (ins R600_PredReg:$cond), @@ -1842,19 +1842,29 @@ def R600_BREAK : AMDGPUInst < def R600_PREDICATED_BREAK : AMDGPUInst < (outs R600_PredReg:$dst), - (ins R600_TReg32:$cond), + (ins R600_PredReg:$cond), "R600_PREDICATED_BREAK $dst, $cond", [] >; +let hasSideEffects = 1 in { + +def R600_ELSEBREAK : AMDGPUInst < + (outs), + (ins), + "R600_ELSEBREAK", + [(R600elsebreak)] +>; def R600_ENDIF : AMDGPUInst < (outs), - (ins R600_PredReg:$cond), - "R600_ENDIF $cond", - [(int_R600_endcf R600_PredReg:$cond)] + (ins), + "R600_ENDIF", + [(R600endif)] >; +} // End hasSideEffects = 1 + } // End isCodeGenOnly = 1, isPseudo = 1 let isPseudo = 1, usesCustomInserter = 1 in { @@ -1965,14 +1975,11 @@ def : Pat < >; // R600ifbreak patterns: -class IfBreakPat <Instruction PredInst, int CondCode> : Pat < - (i1 (R600ifbreak (i32 (R600predset (i32 CondCode /*SETEQ*/), (i1 timm), (i32 R600_Reg32:$src0), (i32 R600_Reg32:$src1))))), - (R600_PREDICATED_BREAK (i32 (PredInst R600_Reg32:$src0, R600_Reg32:$src1))) +def : Pat < + (i1 (R600ifbreak (i1 (R600predset (i32 timm:$cc), (i1 timm:$isint), (i32 R600_Reg32:$src0), (i32 R600_Reg32:$src1))))), + (R600_PREDICATED_BREAK (i1 (PRED_SET imm:$cc, imm:$isint, R600_Reg32:$src0, R600_Reg32:$src1))) >; -def : IfBreakPat <PRED_SETE_INT, 17 /* SETEQ */>; -def : IfBreakPat <PRED_SETNE_INT, 22 /* SETNE */>; - // SGT Reverse args def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), diff --git a/lib/Target/R600/R600LowerControlFlow.cpp b/lib/Target/R600/R600LowerControlFlow.cpp index 73d4530599d..c0ec81a046c 100644 --- a/lib/Target/R600/R600LowerControlFlow.cpp +++ b/lib/Target/R600/R600LowerControlFlow.cpp @@ -308,20 +308,23 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { // BB:#1: // ... // + case AMDGPU::R600_PREDICATED_BREAK: case AMDGPU::R600_IF: { bool InversePred = false; - unsigned BranchOnTrue = MI.getOperand(2).getImm(); - if (BranchOnTrue) { - MachineBasicBlock *BranchBlock = MI.getOperand(0).getMBB(); - MachineBasicBlock *FallThroughBlock; - MachineBasicBlock::succ_iterator SI = MBB.succ_begin(); - if ((*SI)->getNumber() != BranchBlock->getNumber()) { - FallThroughBlock = *SI; - } else { - FallThroughBlock = *(++SI); + if (MI.getOpcode() == AMDGPU::R600_IF) { + unsigned BranchOnTrue = MI.getOperand(2).getImm(); + if (BranchOnTrue) { + MachineBasicBlock *BranchBlock = MI.getOperand(0).getMBB(); + MachineBasicBlock *FallThroughBlock; + MachineBasicBlock::succ_iterator SI = MBB.succ_begin(); + if ((*SI)->getNumber() != BranchBlock->getNumber()) { + FallThroughBlock = *SI; + } else { + FallThroughBlock = *(++SI); + } + InversePred = true; + MI.getOperand(2).setImm(0); } - InversePred = true; - MI.getOperand(2).setImm(0); } // Lower the PRED_SET instruction to the correct hardware instruction. MachineInstr &PredSet = *(++I); @@ -357,11 +360,9 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { } TII->setImmOperand(HWPredSet, R600Operands::WRITE, 0); // if (Flags & MO_FLAG_PUSH) { - TII->setImmOperand(HWPredSet, R600Operands::UPDATE_EXEC_MASK, 1); -// } else { + TII->setImmOperand(HWPredSet, R600Operands::UPDATE_EXEC_MASK, 1); + // } else { // TII->setImmOperand(HWPredSet, R600Operands::UPDATE_PREDICATE, 1); -// } - break; } @@ -419,18 +420,29 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { FallThroughBlock = *(++SI); } - if (FallThroughBlock->front().getOpcode() == AMDGPU::R600_ENDIF) { - FallThroughBlock->front().eraseFromParent(); + if (MI.getOperand(1).getImm() == 1) { + std::swap(FallThroughBlock, BranchToBlock); + } + + MachineBasicBlock::iterator EndIfI = FallThroughBlock->begin(); + while (EndIfI != FallThroughBlock->end()) { + if ((*EndIfI).getOpcode() == AMDGPU::R600_ENDIF) { + break; + } + EndIfI++; } + MachineInstr &EndIf = *EndIfI; + assert(EndIf.getOpcode() == AMDGPU::R600_ENDIF); BuildMI(MBB, MBB.end(), MBB.findDebugLoc(MBB.end()), TII->get(AMDGPU::R600_ENDLOOP)) - .addOperand(MI.getOperand(1)); + .addReg(AMDGPU::PRED_STACK); BuildMI(*BranchToBlock, BranchToBlock->begin(), BranchToBlock->findDebugLoc(BranchToBlock->begin()), TII->get(AMDGPU::R600_BEGINLOOP)) - .addOperand(MI.getOperand(1)); + .addReg(AMDGPU::PRED_STACK); MI.eraseFromParent(); + EndIf.eraseFromParent(); Next = MBB.rbegin(); break; } @@ -438,22 +450,24 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { DebugLoc DL = MBB.findDebugLoc((MachineBasicBlock::iterator)&MI); BuildMI(MBB, &MI, DL, TII->get(AMDGPU::R600_ELSE)) .addMBB(*MBB.succ_begin()) - .addOperand(MI.getOperand(1)) + .addReg(AMDGPU::PRED_STACK) .addImm(0); BuildMI(MBB, &MI, DL, TII->get(AMDGPU::R600_BREAK)) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(0)); + .addReg(AMDGPU::PRED_STACK) + .addReg(AMDGPU::PRED_STACK); MI.eraseFromParent(); Next = MBB.rbegin(); break; } +#if 0 case AMDGPU::R600_PREDICATED_BREAK: { MachineInstr &PredSet = *(++I); TII->setImmOperand(&PredSet, R600Operands::WRITE, 0); TII->setImmOperand(&PredSet, R600Operands::UPDATE_EXEC_MASK, 1); break; } +#endif } } } @@ -472,6 +486,8 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { MBB.eraseFromParent(); } + return false; + // Fourth-Pass: Predicate simple branches for (MachineBasicBlock::iterator I = EntryBlock.begin(), Next = llvm::next(I); I != EntryBlock.end(); I = Next) { diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 2bc2ca051f8..7cfe9f72cb6 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -149,6 +149,8 @@ foreach Index = 0-16 in { def Pred#Index : R600Reg<"", Index>; } +def PRED_STACK : R600Reg<"PRED_STACK", 0>; + def TRUE : R600Reg<"True", 0>; def FALSE: R600Reg<"False", 0>; |