diff options
Diffstat (limited to 'src/gallium/drivers/radeon/AMDILInliner.cpp')
-rw-r--r-- | src/gallium/drivers/radeon/AMDILInliner.cpp | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/AMDILInliner.cpp b/src/gallium/drivers/radeon/AMDILInliner.cpp new file mode 100644 index 0000000000..9dad6add97 --- /dev/null +++ b/src/gallium/drivers/radeon/AMDILInliner.cpp @@ -0,0 +1,271 @@ +//===-- AMDILInliner.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +#define DEBUG_TYPE "amdilinline" +#include "AMDIL.h" +#include "AMDILCompilerErrors.h" +#include "AMDILMachineFunctionInfo.h" +#include "AMDILSubtarget.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +namespace +{ + class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass + + { + public: + TargetMachine &TM; + static char ID; + AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + ~AMDILInlinePass(); + virtual const char* getPassName() const; + virtual bool runOnFunction(Function &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + private: + typedef DenseMap<const ArrayType*, SmallVector<AllocaInst*, + DEFAULT_VEC_SLOTS> > InlinedArrayAllocasTy; + bool + AMDILInlineCallIfPossible(CallSite CS, + const TargetData *TD, + InlinedArrayAllocasTy &InlinedArrayAllocas); + + CodeGenOpt::Level OptLevel; + }; + char AMDILInlinePass::ID = 0; +} // anonymouse namespace + + +namespace llvm +{ + FunctionPass* + createAMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + { + return new AMDILInlinePass(tm AMDIL_OPT_LEVEL_VAR); + } +} // llvm namespace + + AMDILInlinePass::AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) +: FunctionPass(ID), TM(tm) +{ + OptLevel = tm.getOptLevel(); +} +AMDILInlinePass::~AMDILInlinePass() +{ +} + + +bool +AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS, + const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas) { + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + + // Try to inline the function. Get the list of static allocas that were + // inlined. + SmallVector<AllocaInst*, 16> StaticAllocas; + InlineFunctionInfo IFI; + if (!InlineFunction(CS, IFI)) + return false; + DEBUG(errs() << "<amdilinline> function " << Caller->getName() + << ": inlined call to "<< Callee->getName() << "\n"); + + // If the inlined function had a higher stack protection level than the + // calling function, then bump up the caller's stack protection level. + if (Callee->hasFnAttr(Attribute::StackProtectReq)) + Caller->addFnAttr(Attribute::StackProtectReq); + else if (Callee->hasFnAttr(Attribute::StackProtect) && + !Caller->hasFnAttr(Attribute::StackProtectReq)) + Caller->addFnAttr(Attribute::StackProtect); + + + // Look at all of the allocas that we inlined through this call site. If we + // have already inlined other allocas through other calls into this function, + // then we know that they have disjoint lifetimes and that we can merge them. + // + // There are many heuristics possible for merging these allocas, and the + // different options have different tradeoffs. One thing that we *really* + // don't want to hurt is SRoA: once inlining happens, often allocas are no + // longer address taken and so they can be promoted. + // + // Our "solution" for that is to only merge allocas whose outermost type is an + // array type. These are usually not promoted because someone is using a + // variable index into them. These are also often the most important ones to + // merge. + // + // A better solution would be to have real memory lifetime markers in the IR + // and not have the inliner do any merging of allocas at all. This would + // allow the backend to do proper stack slot coloring of all allocas that + // *actually make it to the backend*, which is really what we want. + // + // Because we don't have this information, we do this simple and useful hack. + // + SmallPtrSet<AllocaInst*, 16> UsedAllocas; + + // Loop over all the allocas we have so far and see if they can be merged with + // a previously inlined alloca. If not, remember that we had it. + + for (unsigned AllocaNo = 0, + e = IFI.StaticAllocas.size(); + AllocaNo != e; ++AllocaNo) { + + AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; + + // Don't bother trying to merge array allocations (they will usually be + // canonicalized to be an allocation *of* an array), or allocations whose + // type is not itself an array (because we're afraid of pessimizing SRoA). + const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); + if (ATy == 0 || AI->isArrayAllocation()) + continue; + + // Get the list of all available allocas for this array type. + SmallVector<AllocaInst*, DEFAULT_VEC_SLOTS> &AllocasForType + = InlinedArrayAllocas[ATy]; + + // Loop over the allocas in AllocasForType to see if we can reuse one. Note + // that we have to be careful not to reuse the same "available" alloca for + // multiple different allocas that we just inlined, we use the 'UsedAllocas' + // set to keep track of which "available" allocas are being used by this + // function. Also, AllocasForType can be empty of course! + bool MergedAwayAlloca = false; + for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { + AllocaInst *AvailableAlloca = AllocasForType[i]; + + // The available alloca has to be in the right function, not in some other + // function in this SCC. + if (AvailableAlloca->getParent() != AI->getParent()) + continue; + + // If the inlined function already uses this alloca then we can't reuse + // it. + if (!UsedAllocas.insert(AvailableAlloca)) + continue; + + // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare + // success! + DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); + + AI->replaceAllUsesWith(AvailableAlloca); + AI->eraseFromParent(); + MergedAwayAlloca = true; + break; + } + + // If we already nuked the alloca, we're done with it. + if (MergedAwayAlloca) + continue; + + // If we were unable to merge away the alloca either because there are no + // allocas of the right type available or because we reused them all + // already, remember that this alloca came from an inlined function and mark + // it used so we don't reuse it for other allocas from this inline + // operation. + AllocasForType.push_back(AI); + UsedAllocas.insert(AI); + } + + return true; +} + + bool +AMDILInlinePass::runOnFunction(Function &MF) +{ + Function *F = &MF; + const AMDILSubtarget &STM = TM.getSubtarget<AMDILSubtarget>(); + if (STM.device()->isSupported(AMDILDeviceInfo::NoInline)) { + return false; + } + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + SmallVector<CallSite, 16> CallSites; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS = CallSite(cast<Value>(I)); + // If this isn't a call, or it is a call to an intrinsic, it can + // never be inlined. + if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I)) + continue; + + // If this is a direct call to an external function, we can never inline + // it. If it is an indirect call, inlining may resolve it to be a + // direct call, so we keep it. + if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) + continue; + + // We don't want to inline if we are recursive. + if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) { + AMDILMachineFunctionInfo *MFI = + getAnalysis<MachineFunctionAnalysis>().getMF() + .getInfo<AMDILMachineFunctionInfo>(); + MFI->addErrorMsg(amd::CompilerErrorMessage[RECURSIVE_FUNCTION]); + continue; + } + + CallSites.push_back(CS); + } + } + + InlinedArrayAllocasTy InlinedArrayAllocas; + bool Changed = false; + for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { + CallSite CS = CallSites[CSi]; + + Function *Callee = CS.getCalledFunction(); + + // We can only inline direct calls to non-declarations. + if (Callee == 0 || Callee->isDeclaration()) continue; + + // Attempt to inline the function... + if (!AMDILInlineCallIfPossible(CS, TD, InlinedArrayAllocas)) + continue; + Changed = true; + } + return Changed; +} + +const char* +AMDILInlinePass::getPassName() const +{ + return "AMDIL Inline Function Pass"; +} + bool +AMDILInlinePass::doInitialization(Module &M) +{ + return false; +} + + bool +AMDILInlinePass::doFinalization(Module &M) +{ + return false; +} + +void +AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const +{ + AU.addRequired<MachineFunctionAnalysis>(); + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); +} |