summaryrefslogtreecommitdiff
path: root/backend/src/llvm/llvm_unroll.cpp
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-10-08 12:58:59 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-10-17 15:10:44 +0800
commit0ccfdf53f80782b29835cea867fa1db891bcdcc5 (patch)
treedbcad32f44ffd643dce181a6c3fbb88c7dcfbaef /backend/src/llvm/llvm_unroll.cpp
parent74ea659e2ba624cbb07a6a99f1c7edc5bc144435 (diff)
GBE: Add a customized loop unrolling handling mechanism.
By default, the unrolling threshold is relatively small. Thus some relative large loops which access private array will not be unrolled, thus those private array can't be scalarized latter. And the private array is allocated in stack which is extreme slow for Gen backend currently. To increase the unrolling threshold for all loops is not a good idea, as most of the loops don't need to do unrolling for this purpose and a large unrolling threshold will cause a big code size and unecessary big register pressure which may lead to register spilling. So this patch introduce a trade-off pass to identify those loops which still have private load/store in the outer most of the loop. Then add a metadata to it to indicate aggresive unrolling on those loops. Then do another round loop unrolling. This patch with the previous small patch, can bring significant performance improvement for some cases. I just tested with some opencv test cases, and observed it can bring 2x to 10x improvement. v2: refine the parent loop unroll analysis method. v3: disable this pass for LLVM 3.3/3.4. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'backend/src/llvm/llvm_unroll.cpp')
-rw-r--r--backend/src/llvm/llvm_unroll.cpp226
1 files changed, 226 insertions, 0 deletions
diff --git a/backend/src/llvm/llvm_unroll.cpp b/backend/src/llvm/llvm_unroll.cpp
new file mode 100644
index 00000000..bd0dd8c1
--- /dev/null
+++ b/backend/src/llvm/llvm_unroll.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <set>
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MINOR <= 2
+#include "llvm/Function.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#else
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#endif /* LLVM_VERSION_MINOR <= 2 */
+#include "llvm/Pass.h"
+#if LLVM_VERSION_MINOR <= 1
+#include "llvm/Support/IRBuilder.h"
+#elif LLVM_VERSION_MINOR == 2
+#include "llvm/IRBuilder.h"
+#else
+#include "llvm/IR/IRBuilder.h"
+#endif /* LLVM_VERSION_MINOR <= 1 */
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/llvm_gen_backend.hpp"
+#include "sys/map.hpp"
+
+
+using namespace llvm;
+
+namespace gbe {
+ class CustomLoopUnroll : public LoopPass
+ {
+ public:
+ static char ID;
+ CustomLoopUnroll() :
+ LoopPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTreeWrapperPass>();
+
+ }
+
+ // Returns the value associated with the given metadata node name (for
+ // example, "llvm.loop.unroll.count"). If no such named metadata node
+ // exists, then nullptr is returned.
+ static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
+ StringRef Name) {
+ MDNode *LoopID = L->getLoopID();
+ if (!LoopID) return nullptr;
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD) continue;
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S) continue;
+ if (Name.equals(S->getString())) {
+ assert(MD->getNumOperands() == 2 &&
+ "Unroll hint metadata should have two operands.");
+ return cast<ConstantInt>(MD->getOperand(1));
+ }
+ }
+ return nullptr;
+ }
+
+ void setUnrollID(Loop *L, bool enable) {
+ if (!enable && disabledLoops.find(L) != disabledLoops.end())
+ return;
+ LLVMContext &Context = L->getHeader()->getContext();
+ SmallVector<Value *, 2> forceUnroll;
+ forceUnroll.push_back(MDString::get(Context, "llvm.loop.unroll.enable"));
+ forceUnroll.push_back(ConstantInt::get(Type::getInt1Ty(Context), enable));
+ MDNode *forceUnrollNode = MDNode::get(Context, forceUnroll);
+ SmallVector<Value *, 4> Vals;
+ Vals.push_back(NULL);
+ Vals.push_back(forceUnrollNode);
+ MDNode *NewLoopID = MDNode::get(Context, Vals);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L->setLoopID(NewLoopID);
+ if (!enable)
+ disabledLoops.insert(L);
+ }
+
+ static bool hasPrivateLoadStore(Loop *L) {
+ const std::vector<Loop*> subLoops = L->getSubLoops();
+ std::set<BasicBlock*> subBlocks, blocks;
+
+ for(auto l : subLoops)
+ for(auto bb : l->getBlocks())
+ subBlocks.insert(bb);
+ for(auto bb : L->getBlocks())
+ if (subBlocks.find(bb) == subBlocks.end())
+ blocks.insert(bb);
+ for(auto bb : blocks) {
+ for (BasicBlock::iterator inst = bb->begin(), instE = bb->end(); inst != instE; ++inst) {
+ unsigned addrSpace = 0;
+ if (isa<LoadInst>(*inst)) {
+ LoadInst *ld = cast<LoadInst>(&*inst);
+ addrSpace = ld->getPointerAddressSpace();
+ }
+ else if (isa<StoreInst>(*inst)) {
+ StoreInst *st = cast<StoreInst>(&*inst);
+ addrSpace = st->getPointerAddressSpace();
+ }
+ if (addrSpace == 0)
+ return true;
+ }
+ }
+ return false;
+ }
+ // If one loop has very large self trip count
+ // we don't want to unroll it.
+ // self trip count means trip count divide by the parent's trip count. for example
+ // for (int i = 0; i < 16; i++) {
+ // for (int j = 0; j < 4; j++) {
+ // for (int k = 0; k < 2; k++) {
+ // ...
+ // }
+ // ...
+ // }
+ // The inner loops j and k could be unrolled, but the loop i will not be unrolled.
+ // The return value true means the L could be unrolled, otherwise, it could not
+ // be unrolled.
+ bool handleParentLoops(Loop *L, LPPassManager &LPM) {
+ Loop *currL = L;
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+ BasicBlock *latchBlock = currL->getLoopLatch();
+ unsigned currTripCount = 0;
+ bool shouldUnroll = true;
+ if (latchBlock)
+ currTripCount = SE->getSmallConstantTripCount(L, latchBlock);
+
+ while(currL) {
+ Loop *parentL = currL->getParentLoop();
+ unsigned parentTripCount = 0;
+ if (parentL) {
+ BasicBlock *parentLatchBlock = parentL->getLoopLatch();
+ if (parentLatchBlock)
+ parentTripCount = SE->getSmallConstantTripCount(parentL, parentLatchBlock);
+ }
+ if ((parentTripCount != 0 && currTripCount / parentTripCount > 16) ||
+ (currTripCount > 32)) {
+ if (currL == L)
+ shouldUnroll = false;
+ setUnrollID(currL, false);
+ if (currL != L)
+ LPM.deleteLoopFromQueue(currL);
+ }
+ currL = parentL;
+ currTripCount = parentTripCount;
+ }
+ return shouldUnroll;
+ }
+
+ // Analyze the outermost BBs of this loop, if there are
+ // some private load or store, we change it's loop meta data
+ // to indicate more aggresive unrolling on it.
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+ const ConstantInt *Enable = GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
+ if (Enable)
+ return false;
+ const ConstantInt *Count = GetUnrollMetadataValue(L, "llvm.loop.unroll.count");
+ if (Count)
+ return false;
+
+ if (!handleParentLoops(L, LPM))
+ return false;
+
+ if (!hasPrivateLoadStore(L))
+ return false;
+ setUnrollID(L, true);
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "SPIR backend: custom loop unrolling pass";
+ }
+ private:
+ std::set<Loop *> disabledLoops;
+
+ };
+
+ char CustomLoopUnroll::ID = 0;
+
+ LoopPass *createCustomLoopUnrollPass() {
+ return new CustomLoopUnroll();
+ }
+} // end namespace