diff options
26 files changed, 419 insertions, 137 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 88b375b0218..a9891350e57 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -309,6 +309,40 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } + case AMDGPUISD::REGISTER_LOAD: { + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + break; + SDValue Addr, Offset; + + SelectADDRIndirect(N->getOperand(1), Addr, Offset); + const SDValue Ops[] = { + Addr, + Offset, + CurDAG->getTargetConstant(0, MVT::i32), + N->getOperand(0), + }; + return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N), + CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other), + Ops); + } + case AMDGPUISD::REGISTER_STORE: { + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + break; + SDValue Addr, Offset; + SelectADDRIndirect(N->getOperand(2), Addr, Offset); + const SDValue Ops[] = { + N->getOperand(1), + Addr, + Offset, + CurDAG->getTargetConstant(0, MVT::i32), + N->getOperand(0), + }; + return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N), + CurDAG->getVTList(MVT::Other), + Ops); + } } return SelectCode(N); } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index f6c074a4cd5..f8f0596c5a0 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "AMDGPUISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUFrameLowering.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" @@ -227,8 +228,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) // AMDGPU DAG lowering case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); } return Op; @@ -302,6 +303,21 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, &Args[0], Args.size()); } +SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, + SelectionDAG &DAG) const { + + MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = + static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); + + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); + assert(FIN); + + unsigned FrameIndex = FIN->getIndex(); + unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); + return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), + Op.getValueType()); +} SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -539,7 +555,8 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } StoreSDNode *Store = cast<StoreSDNode>(Op); - if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && Store->getValue().getValueType().isVector()) { return SplitVectorStore(Op, DAG); } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 8a68356c2e5..43f6389fac7 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -28,6 +28,7 @@ private: void ExtractVectorElements(SDValue Op, SelectionDAG &DAG, SmallVectorImpl<SDValue> &Args, unsigned Start, unsigned Count) const; + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 434c91a5231..333a8c1d348 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -120,31 +120,43 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const { MachineBasicBlock *MBB = MI->getParent(); + int OffsetOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr); + // addr is a custom operand with multiple MI operands, and only the + // first MI operand is given a name. + int RegOpIdx = OffsetOpIdx + 1; + int ChanOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan); switch(MI->getOpcode()) { default: if (isRegisterLoad(*MI)) { - unsigned RegIndex = MI->getOperand(2).getImm(); - unsigned Channel = MI->getOperand(3).getImm(); + int DstOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); + unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(1).getReg(); + unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { - buildMovInstr(MBB, MI, MI->getOperand(0).getReg(), + buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(), getIndirectAddrRegClass()->getRegister(Address)); } else { - buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(), + buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(), Address, OffsetReg); } } else if (isRegisterStore(*MI)) { - unsigned RegIndex = MI->getOperand(2).getImm(); - unsigned Channel = MI->getOperand(3).getImm(); + int ValOpIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val); + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); + unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(1).getReg(); + unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), - MI->getOperand(0).getReg()); + MI->getOperand(ValOpIdx).getReg()); } else { - buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(), + buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(), calculateIndirectAddress(RegIndex, Channel), OffsetReg); } @@ -263,6 +275,52 @@ bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD; } +int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo*>( + MF.getTarget().getRegisterInfo()); + int Offset = -1; + + if (MFI->getNumObjects() == 0) { + return -1; + } + + if (MRI.livein_empty()) { + return 0; + } + + const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + LE = MRI.livein_end(); + LI != LE; ++LI) { + unsigned Reg = LI->first; + if (TargetRegisterInfo::isVirtualRegister(Reg) || + !IndirectRC->contains(Reg)) + continue; + + Offset = std::max(Offset, (int)TRI->getHWRegIndex(Reg)); + } + + return Offset + 1; +} + +int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + int Offset = 0; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Variable sized objects are not supported + assert(!MFI->hasVarSizedObjects()); + + if (MFI->getNumObjects() == 0) { + return -1; + } + + Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); + + return getIndirectIndexBegin(MF) + Offset; +} + void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const { diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index dc65d4e75f7..6378fdd1eb4 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -99,6 +99,14 @@ protected: MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI) const; + /// \returns the smallest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexBegin(const MachineFunction &MF) const; + + /// \returns the largest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + public: bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; @@ -144,14 +152,6 @@ public: virtual unsigned getIEQOpcode() const = 0; virtual bool isMov(unsigned opcode) const = 0; - /// \returns the smallest register index that will be accessed by an indirect - /// read or write or -1 if indirect addressing is not used by this program. - virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0; - - /// \returns the largest register index that will be accessed by an indirect - /// read or write or -1 if indirect addressing is not used by this program. - virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0; - /// \brief Calculate the "Indirect Address" for the given \p RegIndex and /// \p Channel /// diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 5778a8c2b23..33ad5ec5a37 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -35,6 +35,7 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> } def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; +def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; def COND_EQ : PatLeaf < (cond), @@ -277,6 +278,8 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst < multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, ComplexPattern addrPat> { +let UseNamedOperandTable = 1 in { + def RegisterLoad : AMDGPUShaderInst < (outs dstClass:$dst), (ins addrClass:$addr, i32imm:$chan), @@ -295,6 +298,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, let isRegisterStore = 1; } } +} } // End isCodeGenOnly = 1, isPseudo = 1 diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index 135d3dd0207..688e1a02c12 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -50,6 +50,10 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { assert(!"Unimplemented"); return NULL; } + virtual unsigned getHWRegIndex(unsigned Reg) const { + assert(!"Unimplemented"); return 0; + } + /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) unsigned getSubRegFromChannel(unsigned Channel) const; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 62577eabf99..f231d47bd20 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -513,7 +513,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -820,20 +819,6 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, false, false, false, 0); } -SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { - - MachineFunction &MF = DAG.getMachineFunction(); - const AMDGPUFrameLowering *TFL = - static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); - - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); - assert(FIN); - - unsigned FrameIndex = FIN->getIndex(); - unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); - return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32); -} - bool R600TargetLowering::isZero(SDValue Op) const { if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { return Cst->isNullValue(); diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 6457ad42ea6..c10257eeada 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -59,7 +59,6 @@ private: SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index a11d54a9f7d..aff11ce1b34 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -1024,67 +1024,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return 2; } -int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int Offset = 0; - - if (MFI->getNumObjects() == 0) { - return -1; - } - - if (MRI.livein_empty()) { - return 0; - } - - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - LE = MRI.livein_end(); - LI != LE; ++LI) { - Offset = std::max(Offset, - GET_REG_INDEX(RI.getEncodingValue(LI->first))); - } - - return Offset + 1; -} - -int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { - int Offset = 0; - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Variable sized objects are not supported - assert(!MFI->hasVarSizedObjects()); - - if (MFI->getNumObjects() == 0) { - return -1; - } - - Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); - - return getIndirectIndexBegin(MF) + Offset; -} - -std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs( +void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const { const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); - std::vector<unsigned> Regs; unsigned StackWidth = TFL->getStackWidth(MF); int End = getIndirectIndexEnd(MF); - if (End == -1) { - return Regs; - } + if (End == -1) + return; for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); - Regs.push_back(SuperReg); + Reserved.set(SuperReg); for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); - Regs.push_back(Reg); + Reserved.set(Reg); } } - return Regs; } unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index d7438ef2771..e2996c7a78f 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -193,14 +193,9 @@ namespace llvm { virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const { return 1;} - /// \returns a list of all the registers that may be accesed using indirect - /// addressing. - std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const; - - virtual int getIndirectIndexBegin(const MachineFunction &MF) const; - - virtual int getIndirectIndexEnd(const MachineFunction &MF) const; - + /// \brief Reserve the registers that may be accesed using indirect addressing. + void reserveIndirectRegisters(BitVector &Reserved, + const MachineFunction &MF) const; virtual unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index f0c061ec9d6..d940d45a4ac 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -75,7 +75,6 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; -def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index dd8f3ef9814..fbe333d2038 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -28,6 +28,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm) BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); + Reserved.set(AMDGPU::ZERO); Reserved.set(AMDGPU::HALF); Reserved.set(AMDGPU::ONE); @@ -48,14 +50,8 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*I); } - const R600InstrInfo *RII = - static_cast<const R600InstrInfo*>(TM.getInstrInfo()); - std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF); - for (std::vector<unsigned>::iterator I = IndirectRegs.begin(), - E = IndirectRegs.end(); - I != E; ++I) { - Reserved.set(*I); - } + TII->reserveIndirectRegisters(Reserved, MF); + return Reserved; } @@ -73,6 +69,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { return this->getEncodingValue(reg) >> HW_CHAN_SHIFT; } +unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const { + return GET_REG_INDEX(getEncodingValue(Reg)); +} + const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( MVT VT) const { switch(VT.SimpleTy) { diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h index d458e557a4e..8833ee77e04 100644 --- a/lib/Target/R600/R600RegisterInfo.h +++ b/lib/Target/R600/R600RegisterInfo.h @@ -39,6 +39,8 @@ struct R600RegisterInfo : public AMDGPURegisterInfo { /// \brief get the HW encoding for a register's channel. unsigned getHWRegChan(unsigned reg) const; + virtual unsigned getHWRegIndex(unsigned Reg) const; + /// \brief get the register class of the specified type to use in the /// CFGStructurizer virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index fb21f6e860c..89b3c7c7674 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -70,6 +70,19 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + // We need to custom lower loads/stores from private memory + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i128, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v4i32, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -89,12 +102,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::FrameIndex, MVT::i64, Custom); setTargetDAGCombine(ISD::SELECT_CC); @@ -111,6 +126,8 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const { // XXX: This depends on the address space and also we may want to revist // the alignment values we specify in the DataLayout. + if (!VT.isSimple() || VT == MVT::Other) + return false; return VT.bitsGT(MVT::i32); } @@ -332,6 +349,19 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } + case AMDGPU::SI_RegisterStorePseudo: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const SIInstrInfo *TII = + static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstrBuilder MIB = + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore), + Reg); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + MIB.addOperand(MI->getOperand(i)); + + MI->eraseFromParent(); + } } return BB; } @@ -377,7 +407,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); - if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && Op.getValueType().isVector()) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), @@ -385,11 +416,12 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { }; return DAG.getMergeValues(MergedValues, 2, SDLoc(Op)); } else { - return SDValue(); + return LowerLOAD(Op, DAG); } } case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_WO_CHAIN: { @@ -600,6 +632,30 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, return Chain; } +SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + LoadSDNode *Load = cast<LoadSDNode>(Op); + + if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + return SDValue(); + + SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Load->getBasePtr(), DAG.getConstant(0, MVT::i32)); + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + DAG.getConstant(2, MVT::i32)); + + SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), + Load->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); + SDValue MergedValues[2] = { + Ret, + Load->getChain() + }; + return DAG.getMergeValues(MergedValues, 2, DL); + +} + SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const { @@ -657,6 +713,52 @@ SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op, return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi); } +SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + StoreSDNode *Store = cast<StoreSDNode>(Op); + EVT VT = Store->getMemoryVT(); + SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); + if (Ret.getNode()) + return Ret; + + if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + return SDValue(); + + SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32); + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + DAG.getConstant(2, MVT::i32)); + SDValue Chain = Store->getChain(); + SmallVector<SDValue, 8> Values; + + if (VT == MVT::i64) { + for (unsigned i = 0; i < 2; ++i) { + Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Store->getValue(), DAG.getConstant(i, MVT::i32))); + } + } else if (VT == MVT::i128) { + for (unsigned i = 0; i < 2; ++i) { + for (unsigned j = 0; j < 2; ++j) { + Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, + Store->getValue(), DAG.getConstant(i, MVT::i32)), + DAG.getConstant(j, MVT::i32))); + } + } + } else { + Values.push_back(Store->getValue()); + } + + for (unsigned i = 0; i < Values.size(); ++i) { + SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, + Ptr, DAG.getConstant(i, MVT::i32)); + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, + Chain, Values[i], PartPtr, + DAG.getTargetConstant(0, MVT::i32)); + } + return Chain; +} + + SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 9c54a6f48aa..ecfea15e612 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -25,8 +25,10 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue Chain, unsigned Offset) const; SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 7e42fb777d4..7ef662eb65b 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -186,7 +186,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { - if (!Op.isReg()) + if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) return std::make_pair(0, 0); unsigned Reg = Op.getReg(); diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 4640d80fdfd..15ba7d4b25b 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -226,7 +226,8 @@ MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const { - assert(!"Not Implemented"); + return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), + DstReg) .addReg(SrcReg); } bool SIInstrInfo::isMov(unsigned Opcode) const { @@ -595,17 +596,8 @@ unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, return RegIndex; } - -int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { - llvm_unreachable("Unimplemented"); -} - -int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { - llvm_unreachable("Unimplemented"); -} - const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { - llvm_unreachable("Unimplemented"); + return &AMDGPU::VReg_32RegClass; } MachineInstrBuilder SIInstrInfo::buildIndirectWrite( @@ -613,7 +605,17 @@ MachineInstrBuilder SIInstrInfo::buildIndirectWrite( MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const { - llvm_unreachable("Unimplemented"); + const DebugLoc &DL = MBB->findDebugLoc(I); + unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( + getIndirectIndexBegin(*MBB->getParent())); + + return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) + .addReg(IndirectBaseReg, RegState::Define) + .addOperand(I->getOperand(0)) + .addReg(IndirectBaseReg) + .addReg(OffsetReg) + .addImm(0) + .addReg(ValueReg); } MachineInstrBuilder SIInstrInfo::buildIndirectRead( @@ -621,5 +623,43 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead( MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const { - llvm_unreachable("Unimplemented"); + const DebugLoc &DL = MBB->findDebugLoc(I); + unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( + getIndirectIndexBegin(*MBB->getParent())); + + return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) + .addOperand(I->getOperand(0)) + .addOperand(I->getOperand(1)) + .addReg(IndirectBaseReg) + .addReg(OffsetReg) + .addImm(0); + +} + +void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, + const MachineFunction &MF) const { + int End = getIndirectIndexEnd(MF); + int Begin = getIndirectIndexBegin(MF); + + if (End == -1) + return; + + + for (int Index = Begin; Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 1); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 2); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 3); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 7); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); + + for (int Index = std::max(0, Index - 15); Index <= End; ++Index) + Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); } diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 31ceaf35f54..8bc53e65526 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -25,6 +25,14 @@ class SIInstrInfo : public AMDGPUInstrInfo { private: const SIRegisterInfo RI; + MachineInstrBuilder buildIndirectIndexLoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned OffsetVGPR, + unsigned MovRelOp, + unsigned Dst, + unsigned Src0) const; + // If you add or remove instructions from this function, you will + public: explicit SIInstrInfo(AMDGPUTargetMachine &tm); @@ -58,9 +66,6 @@ public: virtual bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const; - virtual int getIndirectIndexBegin(const MachineFunction &MF) const; - - virtual int getIndirectIndexEnd(const MachineFunction &MF) const; bool isSALUInstr(const MachineInstr &MI) const; unsigned getVALUOp(const MachineInstr &MI) const; @@ -115,7 +120,12 @@ public: unsigned ValueReg, unsigned Address, unsigned OffsetReg) const; - }; + void reserveIndirectRegisters(BitVector &Reserved, + const MachineFunction &MF) const; + + void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I, + unsigned SavReg, unsigned IndexReg) const; +}; namespace AMDGPU { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index b55f59d1618..4cd0daa55c5 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -121,6 +121,10 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{ return false; }]>; +def FRAMEri64 : Operand<iPTR> { + let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index); +} + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a8b1d53deda..05af6e91b06 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1286,6 +1286,36 @@ def SI_KILL : InstSI < let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { +//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>; + +let UseNamedOperandTable = 1 in { + +def SI_RegisterLoad : AMDGPUShaderInst < + (outs VReg_32:$dst, SReg_64:$temp), + (ins FRAMEri64:$addr, i32imm:$chan), + "", [] +> { + let isRegisterLoad = 1; + let mayLoad = 1; +} + +class SIRegStore<dag outs> : AMDGPUShaderInst < + outs, + (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan), + "", [] +> { + let isRegisterStore = 1; + let mayStore = 1; +} + +let usesCustomInserter = 1 in { +def SI_RegisterStorePseudo : SIRegStore<(outs)>; +} // End usesCustomInserter = 1 +def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>; + + +} // End UseNamedOperandTable = 1 + def SI_INDIRECT_SRC : InstSI < (outs VReg_32:$dst, SReg_64:$temp), (ins unknown:$src, VSrc_32:$idx, i32imm:$off), @@ -1302,6 +1332,7 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI < let Constraints = "$src = $dst"; } +def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>; def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; @@ -1978,6 +2009,15 @@ def : Pat< //============================================================================// // Misc patterns //============================================================================// +def : Pat < + (i32 (trunc i64:$a)), + (EXTRACT_SUBREG $a, sub0) +>; + +def : Pat < + (i32 (trunc i64:$a)), + (EXTRACT_SUBREG $a, sub0) +>; def : Pat < (or i64:$a, i64:$b), diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index a6c43bbb2c5..958763dffc2 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Vec = MI.getOperand(2).getReg(); unsigned Off = MI.getOperand(4).getImm(); + unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0); + if (!SubReg) + SubReg = Vec; - MachineInstr *MovRel = + MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) - .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(SubReg + Off) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Vec, RegState::Implicit); @@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Off = MI.getOperand(4).getImm(); unsigned Val = MI.getOperand(5).getReg(); + unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0); + if (!SubReg) + SubReg = Dst; MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) - .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(SubReg + Off, RegState::Define) .addReg(Val) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Dst, RegState::Implicit); @@ -477,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { IndirectSrc(MI); break; + case AMDGPU::SI_INDIRECT_DST_V1: case AMDGPU::SI_INDIRECT_DST_V2: case AMDGPU::SI_INDIRECT_DST_V4: case AMDGPU::SI_INDIRECT_DST_V8: diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index e06a02257fe..0bbad09cf15 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "SIRegisterInfo.h" #include "AMDGPUTargetMachine.h" +#include "SIInstrInfo.h" using namespace llvm; @@ -26,6 +27,9 @@ SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm) BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::EXEC); + Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); + const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo()); + TII->reserveIndirectRegisters(Reserved, MF); return Reserved; } @@ -51,6 +55,10 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( } } +unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const { + return getEncodingValue(Reg); +} + const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { assert(!TargetRegisterInfo::isVirtualRegister(Reg)); diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index ba831b0f77b..8148f7fa476 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -42,6 +42,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// CFGStructurizer virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + virtual unsigned getHWRegIndex(unsigned Reg) const; + /// \brief Return the 'base' register class for this register. /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc. const TargetRegisterClass *getPhysRegClass(unsigned Reg) const; diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/private-memory.ll index 1ef6c358921..48a013c8e54 100644 --- a/test/CodeGen/R600/indirect-addressing.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -1,16 +1,24 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; This test checks that uses and defs of the AR register happen in the same ; instruction clause. -; CHECK: @mova_same_clause -; CHECK: MOVA_INT -; CHECK-NOT: ALU clause -; CHECK: 0 + AR.x -; CHECK: MOVA_INT -; CHECK-NOT: ALU clause -; CHECK: 0 + AR.x +; R600-CHECK-LABEL: @mova_same_clause +; R600-CHECK: MOVA_INT +; R600-CHECK-NOT: ALU clause +; R600-CHECK: 0 + AR.x +; R600-CHECK: MOVA_INT +; R600-CHECK-NOT: ALU clause +; R600-CHECK: 0 + AR.x +; SI-CHECK-LABEL: @mova_same_clause +; SI-CHECK: V_READFIRSTLANE +; SI-CHECK: V_MOVRELD +; SI-CHECK: S_CBRANCH +; SI-CHECK: V_READFIRSTLANE +; SI-CHECK: V_MOVRELD +; SI-CHECK: S_CBRANCH define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: %stack = alloca [5 x i32], align 4 @@ -38,9 +46,10 @@ entry: ; XXX: This generated code has unnecessary MOVs, we should be able to optimize ; this. -; CHECK: @multiple_structs -; CHECK-NOT: MOVA_INT - +; R600-CHECK-LABEL: @multiple_structs +; R600-CHECK-NOT: MOVA_INT +; SI-CHECK-LABEL: @multiple_structs +; SI-CHECK-NOT: V_MOVREL %struct.point = type { i32, i32 } define void @multiple_structs(i32 addrspace(1)* %out) { @@ -68,8 +77,10 @@ entry: ; loads and stores should be lowered to copies, so there shouldn't be any ; MOVA instructions. -; CHECK: @direct_loop -; CHECK-NOT: MOVA_INT +; R600-CHECK-LABLE: @direct_loop +; R600-CHECK-NOT: MOVA_INT +; SI-CHECK-LABEL: @direct_loop +; SI-CHECK-NOT: V_MOVREL define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll index 3ede6a5d39d..01728f7742c 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/R600/sra.ll @@ -43,7 +43,7 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ;EG-CHECK: ASHR ;SI-CHECK-LABEL: @ashr_i64 -;SI-CHECK: V_ASHR_I64 +;SI-CHECK: S_ASHR_I64 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR{{[0-9]}}_SGPR{{[0-9]}}, 8 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) { entry: %0 = sext i32 %in to i64 |