summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/radeon/AMDILInliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeon/AMDILInliner.cpp')
-rw-r--r--src/gallium/drivers/radeon/AMDILInliner.cpp271
1 files changed, 271 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/AMDILInliner.cpp b/src/gallium/drivers/radeon/AMDILInliner.cpp
new file mode 100644
index 0000000000..9dad6add97
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInliner.cpp
@@ -0,0 +1,271 @@
+//===-- AMDILInliner.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "amdilinline"
+#include "AMDIL.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+namespace
+{
+ class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass
+
+ {
+ public:
+ TargetMachine &TM;
+ static char ID;
+ AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ ~AMDILInlinePass();
+ virtual const char* getPassName() const;
+ virtual bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ private:
+ typedef DenseMap<const ArrayType*, SmallVector<AllocaInst*,
+ DEFAULT_VEC_SLOTS> > InlinedArrayAllocasTy;
+ bool
+ AMDILInlineCallIfPossible(CallSite CS,
+ const TargetData *TD,
+ InlinedArrayAllocasTy &InlinedArrayAllocas);
+
+ CodeGenOpt::Level OptLevel;
+ };
+ char AMDILInlinePass::ID = 0;
+} // anonymouse namespace
+
+
+namespace llvm
+{
+ FunctionPass*
+ createAMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ {
+ return new AMDILInlinePass(tm AMDIL_OPT_LEVEL_VAR);
+ }
+} // llvm namespace
+
+ AMDILInlinePass::AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+: FunctionPass(ID), TM(tm)
+{
+ OptLevel = tm.getOptLevel();
+}
+AMDILInlinePass::~AMDILInlinePass()
+{
+}
+
+
+bool
+AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS,
+ const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas) {
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ SmallVector<AllocaInst*, 16> StaticAllocas;
+ InlineFunctionInfo IFI;
+ if (!InlineFunction(CS, IFI))
+ return false;
+ DEBUG(errs() << "<amdilinline> function " << Caller->getName()
+ << ": inlined call to "<< Callee->getName() << "\n");
+
+ // If the inlined function had a higher stack protection level than the
+ // calling function, then bump up the caller's stack protection level.
+ if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+ !Caller->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtect);
+
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+
+ for (unsigned AllocaNo = 0,
+ e = IFI.StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+
+ AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (ATy == 0 || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ SmallVector<AllocaInst*, DEFAULT_VEC_SLOTS> &AllocasForType
+ = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+ AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca))
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(errs() << " ***MERGED ALLOCA: " << *AI);
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ break;
+ }
+
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
+
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
+ }
+
+ return true;
+}
+
+ bool
+AMDILInlinePass::runOnFunction(Function &MF)
+{
+ Function *F = &MF;
+ const AMDILSubtarget &STM = TM.getSubtarget<AMDILSubtarget>();
+ if (STM.device()->isSupported(AMDILDeviceInfo::NoInline)) {
+ return false;
+ }
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ SmallVector<CallSite, 16> CallSites;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS = CallSite(cast<Value>(I));
+ // If this isn't a call, or it is a call to an intrinsic, it can
+ // never be inlined.
+ if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+ continue;
+
+ // If this is a direct call to an external function, we can never inline
+ // it. If it is an indirect call, inlining may resolve it to be a
+ // direct call, so we keep it.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+ continue;
+
+ // We don't want to inline if we are recursive.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addErrorMsg(amd::CompilerErrorMessage[RECURSIVE_FUNCTION]);
+ continue;
+ }
+
+ CallSites.push_back(CS);
+ }
+ }
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+ bool Changed = false;
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+ CallSite CS = CallSites[CSi];
+
+ Function *Callee = CS.getCalledFunction();
+
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // Attempt to inline the function...
+ if (!AMDILInlineCallIfPossible(CS, TD, InlinedArrayAllocas))
+ continue;
+ Changed = true;
+ }
+ return Changed;
+}
+
+const char*
+AMDILInlinePass::getPassName() const
+{
+ return "AMDIL Inline Function Pass";
+}
+ bool
+AMDILInlinePass::doInitialization(Module &M)
+{
+ return false;
+}
+
+ bool
+AMDILInlinePass::doFinalization(Module &M)
+{
+ return false;
+}
+
+void
+AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}