From 705e73c7873e11a14fcc308fafd1c26bc188021f Mon Sep 17 00:00:00 2001 From: "Song, Ruiling" Date: Fri, 21 Jul 2017 14:46:41 +0800 Subject: backend: Fix a bug in load-store optimization. when we are merging STOREs, we should use the very last instruction as the insertion point. Signed-off-by: Ruiling Song Reviewed-by: Yang Rong --- backend/src/llvm/llvm_loadstore_optimization.cpp | 71 +++++++++++++++--------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp index bb8dc5f6..a1687c1c 100644 --- a/backend/src/llvm/llvm_loadstore_optimization.cpp +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp @@ -61,21 +61,24 @@ namespace gbe { #endif return optimizeLoadStore(BB); } - Type *getValueType(Value *insn); - Value *getPointerOperand(Value *I); + Type *getValueType(Value *insn); + Value *getPointerOperand(Value *I); unsigned getAddressSpace(Value *I); - bool isSimpleLoadStore(Value *I); - bool optimizeLoadStore(BasicBlock &BB); - - bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize, int maxVecSize); - void mergeLoad(BasicBlock &BB, SmallVector &merged, Instruction *start,int offset); - void mergeStore(BasicBlock &BB, SmallVector &merged, Instruction *start,int offset); - bool findConsecutiveAccess(BasicBlock &BB, - SmallVector &merged, - const BasicBlock::iterator &start, - unsigned maxVecSize, - bool isLoad, - int *addrOffset); + bool isSimpleLoadStore(Value *I); + bool optimizeLoadStore(BasicBlock &BB); + + bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int *elementSize, + int maxVecSize); + void mergeLoad(BasicBlock &BB, SmallVector &merged, + Instruction *first, int offset); + void mergeStore(BasicBlock &BB, SmallVector &merged, + Instruction *first, Instruction *last, int offset); + bool findConsecutiveAccess(BasicBlock &BB, + SmallVector &merged, + const BasicBlock::iterator &start, + unsigned maxVecSize, bool isLoad, + int *addrOffset, Instruction *&first, + Instruction *&last); #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40 virtual StringRef getPassName() const #else @@ -146,7 +149,7 @@ namespace gbe { void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, SmallVector &merged, - Instruction *start, + Instruction *first, int offset) { IRBuilder<> Builder(&BB); @@ -155,7 +158,7 @@ namespace gbe { for(unsigned i = 0; i < size; i++) { values.push_back(merged[i]); } - LoadInst *ld = cast(start); + LoadInst *ld = cast(first); unsigned align = ld->getAlignment(); unsigned addrSpace = ld->getPointerAddressSpace(); // insert before first load @@ -214,7 +217,9 @@ namespace gbe { const BasicBlock::iterator &start, unsigned maxVecSize, bool isLoad, - int *addrOffset) { + int *addrOffset, + Instruction *&first, + Instruction *&last) { if(!isSimpleLoadStore(&*start)) return false; unsigned targetAddrSpace = getAddressSpace(&*start); @@ -233,6 +238,7 @@ namespace gbe { int elementSize; SmallVector searchInsnArray; + SmallVector orderedInstrs; mergedInfo meInfoArray[32]; int indx = 0; meInfoArray[indx++].init(&*start, 0); @@ -275,13 +281,15 @@ namespace gbe { if(indx > 1) { + first = (*searchInsnArray.begin())->mInsn; //try to sort the load/store by the offset from the start //the farthest is at the beginning. this is easy to find the //continuous load/store + orderedInstrs = searchInsnArray; std::sort(searchInsnArray.begin(), searchInsnArray.end(), offsetSorter()); // try to find continuous loadstore insn in the candidate array - for(unsigned i = 0; i < searchInsnArray.size(); i++) + for (unsigned i = 0; i < searchInsnArray.size(); i++) { unsigned j; for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++) @@ -306,6 +314,17 @@ namespace gbe { if (k >= maxVecSize) break; } + // find the last instruction if we are trying to merge STOREs. + // we will later use it as the insertion point. + if (!isLoad) + for (auto insn = orderedInstrs.rbegin(); + insn != orderedInstrs.rend(); ++insn) { + if (std::find(merged.begin(), merged.end(), (*insn)->mInsn) != + merged.end()) { + last = (*insn)->mInsn; + break; + } + } break; } @@ -317,7 +336,8 @@ namespace gbe { void GenLoadStoreOptimization::mergeStore(BasicBlock &BB, SmallVector &merged, - Instruction *start, + Instruction *first, + Instruction *last, int offset) { IRBuilder<> Builder(&BB); @@ -326,7 +346,7 @@ namespace gbe { for(unsigned i = 0; i < size; i++) { values.push_back(cast(merged[i])->getValueOperand()); } - StoreInst *st = cast(start); + StoreInst *st = cast(first); if(!st) return; @@ -334,7 +354,7 @@ namespace gbe { unsigned align = st->getAlignment(); // insert before the last store - Builder.SetInsertPoint(merged[size-1]); + Builder.SetInsertPoint(last); Type *dataTy = st->getValueOperand()->getType(); VectorType *vecTy = VectorType::get(dataTy, size); @@ -405,13 +425,14 @@ namespace gbe { continue; int addrOffset = 0; + Instruction *first = nullptr, *last = nullptr; unsigned maxVecSize = (ty->isFloatTy() || ty->isIntegerTy(32)) ? 4 : (ty->isIntegerTy(16) ? 8 : 16); - bool reorder = findConsecutiveAccess(BB, merged, BBI, maxVecSize, isLoad, &addrOffset); + bool reorder = findConsecutiveAccess(BB, merged, BBI, maxVecSize, + isLoad, &addrOffset, first, last); uint32_t size = merged.size(); uint32_t pos = 0; bool doDeleting = size > 1; - BasicBlock::iterator startLS = BBI; if (doDeleting) { // choose next undeleted instruction BBI = findSafeInstruction(merged, BBI, reorder); @@ -423,9 +444,9 @@ namespace gbe { (size >= 4 ? 4 : size)); SmallVector mergedVec(merged.begin() + pos, merged.begin() + pos + vecSize); if(isLoad) - mergeLoad(BB, mergedVec, &*startLS, addrOffset); + mergeLoad(BB, mergedVec, first, addrOffset); else - mergeStore(BB, mergedVec, &*startLS, addrOffset); + mergeStore(BB, mergedVec, first, last, addrOffset); // remove merged insn for(uint32_t i = 0; i < mergedVec.size(); i++) mergedVec[i]->eraseFromParent(); -- cgit v1.2.3