diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 283 |
1 files changed, 3 insertions, 280 deletions
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 05dce507e58..27d569d7043 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -43,7 +43,6 @@ STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); -STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); @@ -94,12 +93,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, LdStPairFlags &Flags, unsigned Limit); - - // Scan the instructions looking for a store that writes to the address from - // which the current load instruction reads. Return true if one is found. - bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, - MachineBasicBlock::iterator &StoreI); - // Merge the two instructions indicated into a single pair-wise instruction. // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction @@ -109,11 +102,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags); - // Promote the load that reads directly from the address stored to. - MachineBasicBlock::iterator - promoteLoadFromStore(MachineBasicBlock::iterator LoadI, - MachineBasicBlock::iterator StoreI); - // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan forwards. @@ -140,9 +128,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge foldable ldr/str instructions. bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); - // Find and promote load instructions which read directly from store. - bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); - // Check if converting two narrow loads into a single wider load with // bitfield extracts could be enabled. bool enableNarrowLdMerge(MachineFunction &Fn); @@ -414,36 +399,6 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { } } -static unsigned isMatchingStore(MachineInstr *LoadInst, - MachineInstr *StoreInst) { - unsigned LdOpc = LoadInst->getOpcode(); - unsigned StOpc = StoreInst->getOpcode(); - switch (LdOpc) { - default: - llvm_unreachable("Unsupported load instruction!"); - case AArch64::LDRBBui: - return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || - StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; - case AArch64::LDURBBi: - return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || - StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; - case AArch64::LDRHHui: - return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || - StOpc == AArch64::STRXui; - case AArch64::LDURHHi: - return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || - StOpc == AArch64::STURXi; - case AArch64::LDRWui: - return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; - case AArch64::LDURWi: - return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; - case AArch64::LDRXui: - return StOpc == AArch64::STRXui; - case AArch64::LDURXi: - return StOpc == AArch64::STURXi; - } -} - static unsigned getPreIndexedOpcode(unsigned Opc) { switch (Opc) { default: @@ -598,21 +553,6 @@ static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) { return MI->getOperand(Idx); } -static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst, - MachineInstr *StoreInst) { - assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); - int LoadSize = getMemScale(LoadInst); - int StoreSize = getMemScale(StoreInst); - int UnscaledStOffset = isUnscaledLdSt(StoreInst) - ? getLdStOffsetOp(StoreInst).getImm() - : getLdStOffsetOp(StoreInst).getImm() * StoreSize; - int UnscaledLdOffset = isUnscaledLdSt(LoadInst) - ? getLdStOffsetOp(LoadInst).getImm() - : getLdStOffsetOp(LoadInst).getImm() * LoadSize; - return (UnscaledStOffset <= UnscaledLdOffset) && - (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); -} - // Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI. static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1) { @@ -860,106 +800,6 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, return NextI; } -MachineBasicBlock::iterator -AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, - MachineBasicBlock::iterator StoreI) { - MachineBasicBlock::iterator NextI = LoadI; - ++NextI; - - int LoadSize = getMemScale(LoadI); - int StoreSize = getMemScale(StoreI); - unsigned LdRt = getLdStRegOp(LoadI).getReg(); - unsigned StRt = getLdStRegOp(StoreI).getReg(); - bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); - - assert((IsStoreXReg || - TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && - "Unexpected RegClass"); - - MachineInstr *BitExtMI; - if (LoadSize == StoreSize) { - // Remove the load, if the destination register of the loads is the same - // register for stored value. - if (StRt == LdRt) { - DEBUG(dbgs() << "Remove load instruction:\n "); - DEBUG(LoadI->print(dbgs())); - DEBUG(dbgs() << "\n"); - LoadI->eraseFromParent(); - return NextI; - } - // Replace the load with a mov if the load and store are in the same size. - BitExtMI = - BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), - TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) - .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) - .addReg(StRt) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); - } else { - // FIXME: Currently we disable this transformation in big-endian targets as - // performance and correctness are verified only in little-endian. - if (!Subtarget->isLittleEndian()) - return NextI; - bool IsUnscaled = isUnscaledLdSt(LoadI); - assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match"); - assert(LoadSize < StoreSize && "Invalid load size"); - int UnscaledLdOffset = IsUnscaled - ? getLdStOffsetOp(LoadI).getImm() - : getLdStOffsetOp(LoadI).getImm() * LoadSize; - int UnscaledStOffset = IsUnscaled - ? getLdStOffsetOp(StoreI).getImm() - : getLdStOffsetOp(StoreI).getImm() * StoreSize; - int Width = LoadSize * 8; - int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); - int Imms = Immr + Width - 1; - unsigned DestReg = IsStoreXReg - ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, - &AArch64::GPR64RegClass) - : LdRt; - - assert(((UnscaledLdOffset) >= UnscaledStOffset && - (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && - "Invalid offset"); - - Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); - Imms = Immr + Width - 1; - if (UnscaledLdOffset == UnscaledStOffset) { - uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N - | ((Immr) << 6) // immr - | ((Imms) << 0) // imms - ; - - BitExtMI = - BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), - TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), - DestReg) - .addReg(StRt) - .addImm(AndMaskEncoded); - } else { - BitExtMI = - BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), - TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), - DestReg) - .addReg(StRt) - .addImm(Immr) - .addImm(Imms); - } - } - - DEBUG(dbgs() << "Promoting load by replacing :\n "); - DEBUG(StoreI->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(LoadI->print(dbgs())); - DEBUG(dbgs() << " with instructions:\n "); - DEBUG(StoreI->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG((BitExtMI)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions. - LoadI->eraseFromParent(); - return NextI; -} - /// trackRegDefsUses - Remember what registers the specified instruction uses /// and modifies. static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs, @@ -1023,60 +863,6 @@ static bool mayAlias(MachineInstr *MIa, return false; } -bool AArch64LoadStoreOpt::findMatchingStore( - MachineBasicBlock::iterator I, unsigned Limit, - MachineBasicBlock::iterator &StoreI) { - MachineBasicBlock::iterator E = I->getParent()->begin(); - MachineBasicBlock::iterator MBBI = I; - MachineInstr *FirstMI = I; - unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); - - // Track which registers have been modified and used between the first insn - // and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - - for (unsigned Count = 0; MBBI != E && Count < Limit;) { - --MBBI; - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - // Now that we know this is a real instruction, count it. - ++Count; - - // If the load instruction reads directly from the address to which the - // store instruction writes and the stored value is not modified, we can - // promote the load. Since we do not handle stores with pre-/post-index, - // it's unnecessary to check if BaseReg is modified by the store itself. - if (MI->mayStore() && isMatchingStore(FirstMI, MI) && - BaseReg == getLdStBaseOp(MI).getReg() && - isLdOffsetInRangeOfSt(FirstMI, MI) && - !ModifiedRegs[getLdStRegOp(MI).getReg()]) { - StoreI = MBBI; - return true; - } - - if (MI->isCall()) - return false; - - // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg]) - return false; - - // If we encounter a store aliased with the load, return early. - if (MI->mayStore() && mayAlias(FirstMI, MI, TII)) - return false; - } - return false; -} - /// findMatchingInsn - Scan the instructions looking for a load/store that can /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator @@ -1477,31 +1263,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } -bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( - MachineBasicBlock::iterator &MBBI) { - MachineInstr *MI = MBBI; - // If this is a volatile load, don't mess with it. - if (MI->hasOrderedMemoryRef()) - return false; - - // Make sure this is a reg+imm. - // FIXME: It is possible to extend it to handle reg+reg cases. - if (!getLdStOffsetOp(MI).isImm()) - return false; - - // Look backward up to ScanLimit instructions. - MachineBasicBlock::iterator StoreI; - if (findMatchingStore(MBBI, ScanLimit, StoreI)) { - ++NumLoadsFromStoresPromoted; - // Promote the load. Keeping the iterator straight is a - // pain, so we let the merge routine tell us what the next instruction - // is after it's done mucking about. - MBBI = promoteLoadFromStore(MBBI, StoreI); - return true; - } - return false; -} - bool AArch64LoadStoreOpt::tryToMergeLdStInst( MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = MBBI; @@ -1546,16 +1307,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt) { bool Modified = false; // Three tranformations to do here: - // 1) Find loads that directly read from stores and promote them by - // replacing with mov instructions. If the store is wider than the load, - // the load will be replaced with a bitfield extract. - // e.g., - // str w1, [x0, #4] - // ldrh w2, [x0, #6] - // ; becomes - // str w1, [x0, #4] - // lsr w2, w1, #16 - // 2) Find narrow loads that can be converted into a single wider load + // 1) Find narrow loads that can be converted into a single wider load // with bitfield extract instructions. // e.g., // ldrh w0, [x2] @@ -1564,14 +1316,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ldr w0, [x2] // ubfx w1, w0, #16, #16 // and w0, w0, #ffff - // 3) Find loads and stores that can be merged into a single load or store + // 2) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., // ldr x0, [x2] // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] - // 4) Find base register updates that can be merged into the load or store + // 3) Find base register updates that can be merged into the load or store // as a base-reg writeback. // e.g., // ldr x0, [x2] @@ -1580,35 +1332,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ldr x0, [x2], #4 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - MachineInstr *MI = MBBI; - switch (MI->getOpcode()) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - // Scaled instructions. - case AArch64::LDRBBui: - case AArch64::LDRHHui: - case AArch64::LDRWui: - case AArch64::LDRXui: - // Unscaled instructions. - case AArch64::LDURBBi: - case AArch64::LDURHHi: - case AArch64::LDURWi: - case AArch64::LDURXi: { - if (tryToPromoteLoadFromStore(MBBI)) { - Modified = true; - break; - } - ++MBBI; - break; - } - // FIXME: Do the other instructions. - } - } - - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); enableNarrowLdOpt && MBBI != E;) { MachineInstr *MI = MBBI; switch (MI->getOpcode()) { |