diff options
-rw-r--r-- | lib/Transforms/Scalar/TailRecursionElimination.cpp | 149 | ||||
-rw-r--r-- | test/Transforms/TailCallElim/basic.ll | 86 | ||||
-rw-r--r-- | test/Transforms/TailCallElim/nocapture.ll | 25 |
3 files changed, 171 insertions, 89 deletions
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 2002e680d19..1da6a070d96 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -53,6 +53,7 @@ #define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" @@ -69,6 +70,7 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -129,34 +131,42 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfo>(); } -/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by -/// callees of this function. We only do very simple analysis right now, this -/// could be expanded in the future to use mod/ref information for particular -/// call sites if desired. -static bool AllocaMightEscapeToCalls(AllocaInst *AI) { - // FIXME: do simple 'address taken' analysis. - return true; +/// CanTRE - Scan the specified basic block for alloca instructions. +/// If it contains any that are variable-sized or not in the entry block, +/// returns false. +static bool CanTRE(AllocaInst *AI) { + // Because of PR962, we don't TRE allocas outside the entry block. + + // If this alloca is in the body of the function, or if it is a variable + // sized allocation, we cannot tail call eliminate calls marked 'tail' + // with this mechanism. + BasicBlock *BB = AI->getParent(); + return BB == &BB->getParent()->getEntryBlock() && + isa<ConstantInt>(AI->getArraySize()); } -/// CheckForEscapingAllocas - Scan the specified basic block for alloca -/// instructions. If it contains any that might be accessed by calls, return -/// true. -static bool CheckForEscapingAllocas(BasicBlock *BB, - bool &CannotTCETailMarkedCall) { - bool RetVal = false; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - RetVal |= AllocaMightEscapeToCalls(AI); - - // If this alloca is in the body of the function, or if it is a variable - // sized allocation, we cannot tail call eliminate calls marked 'tail' - // with this mechanism. - if (BB != &BB->getParent()->getEntryBlock() || - !isa<ConstantInt>(AI->getArraySize())) - CannotTCETailMarkedCall = true; - } - return RetVal; -} +struct AllocaCaptureTracker : public CaptureTracker { + AllocaCaptureTracker() : Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Value *V = U->getUser(); + if (isa<CallInst>(V) || isa<InvokeInst>(V)) + UsesAlloca.insert(V); + return true; + } + + bool captured(Use *U) { + if (isa<ReturnInst>(U->getUser())) + return false; + Captured = true; + return true; + } + + bool Captured; + SmallPtrSet<const Value *, 64> UsesAlloca; +}; bool TailCallElim::runOnFunction(Function &F) { // If this function is a varargs function, we won't be able to PHI the args @@ -168,41 +178,44 @@ bool TailCallElim::runOnFunction(Function &F) { bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; - bool FunctionContainsEscapingAllocas = false; - // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls + // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls // marked with the 'tail' attribute, because doing so would cause the stack - // size to increase (real TCE would deallocate variable sized allocas, TCE + // size to increase (real TRE would deallocate variable sized allocas, TRE // doesn't). - bool CannotTCETailMarkedCall = false; - - // Loop over the function, looking for any returning blocks, and keeping track - // of whether this function has any non-trivially used allocas. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall) - break; - - FunctionContainsEscapingAllocas |= - CheckForEscapingAllocas(BB, CannotTCETailMarkedCall); + bool CanTRETailMarkedCall = true; + + // Find calls that can be marked tail. + AllocaCaptureTracker ACT; + for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + CanTRETailMarkedCall &= CanTRE(AI); + PointerMayBeCaptured(AI, &ACT); + // If any allocas are captured, exit. + if (ACT.Captured) + return false; + } + } } - /// FIXME: The code generator produces really bad code when an 'escaping - /// alloca' is changed from being a static alloca to being a dynamic alloca. - /// Until this is resolved, disable this transformation if that would ever - /// happen. This bug is PR962. - if (FunctionContainsEscapingAllocas) - return false; - - // Second pass, change any tail calls to loops. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { - bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs,CannotTCETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, - TailCallsAreMarkedTail, ArgumentPHIs, - CannotTCETailMarkedCall); - MadeChange |= Change; + // Second pass, change any tail recursive calls to loops. + // + // FIXME: The code generator produces really bad code when an 'escaping + // alloca' is changed from being a static alloca to being a dynamic alloca. + // Until this is resolved, disable this transformation if that would ever + // happen. This bug is PR962. + if (ACT.UsesAlloca.empty()) { + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { + bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs, !CanTRETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall); + MadeChange |= Change; + } } } @@ -223,16 +236,24 @@ bool TailCallElim::runOnFunction(Function &F) { } } - // Finally, if this function contains no non-escaping allocas, or calls - // setjmp, mark all calls in the function as eligible for tail calls - //(there is no stack memory for them to access). - if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice()) - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + // At this point, we know that the function does not have any captured + // allocas. If additionally the function does not call setjmp, mark all calls + // in the function that do not access stack memory with the tail keyword. This + // implies ensuring that there does not exist any path from a call that takes + // in an alloca but does not capture it and the call which we wish to mark + // with "tail". + if (!F.callsFunctionThatReturnsTwice()) { + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast<CallInst>(I)) { - CI->setTailCall(); - MadeChange = true; + if (!ACT.UsesAlloca.count(CI)) { + CI->setTailCall(); + MadeChange = true; + } } + } + } + } return MadeChange; } diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll index 1db0e76e5be..ffb757005c2 100644 --- a/test/Transforms/TailCallElim/basic.ll +++ b/test/Transforms/TailCallElim/basic.ll @@ -2,6 +2,8 @@ declare void @noarg() declare void @use(i32*) +declare void @use_nocapture(i32* nocapture) +declare void @use2_nocapture(i32* nocapture, i32* nocapture) ; Trivial case. Mark @noarg with tail call. define void @test0() { @@ -57,3 +59,87 @@ return: ; preds = %entry ret i32 0 } +; Make sure that a nocapture pointer does not stop adding a tail call marker to +; an unrelated call and additionally that we do not mark the nocapture call with +; a tail call. +; +; rdar://14324281 +define void @test4() { +; CHECK: void @test4 +; CHECK-NOT: tail call void @use_nocapture +; CHECK: tail call void @noarg() +; CHECK: ret void + %a = alloca i32 + call void @use_nocapture(i32* %a) + call void @noarg() + ret void +} + +; Make sure that we do not perform TRE even with a nocapture use. This is due to +; bad codegen caused by PR962. +; +; rdar://14324281. +define i32* @test5(i32* nocapture %A, i1 %cond) { +; CHECK: i32* @test5 +; CHECK-NOT: tailrecurse: +; CHECK: ret i32* null + %B = alloca i32 + br i1 %cond, label %cond_true, label %cond_false +cond_true: + call i32* @test5(i32* %B, i1 false) + ret i32* null +cond_false: + call void @use2_nocapture(i32* %A, i32* %B) + call void @noarg() + ret i32* null +} + +; PR14143: Make sure that we do not mark functions with nocapture allocas with tail. +; +; rdar://14324281. +define void @test6(i32* %a, i32* %b) { +; CHECK: @test6 +; CHECK-NOT: tail call +; CHECK: ret void + %c = alloca [100 x i8], align 16 + %tmp = bitcast [100 x i8]* %c to i32* + call void @use2_nocapture(i32* %b, i32* %tmp) + ret void +} + +; PR14143: Make sure that we do not mark functions with nocapture allocas with tail. +; +; rdar://14324281 +define void @test7(i32* %a, i32* %b) nounwind uwtable { +entry: +; CHECK: @test7 +; CHECK-NOT: tail call +; CHECK: ret void + %c = alloca [100 x i8], align 16 + %0 = bitcast [100 x i8]* %c to i32* + call void @use2_nocapture(i32* %0, i32* %a) + call void @use2_nocapture(i32* %b, i32* %0) + ret void +} + +; If we have a mix of escaping captured/non-captured allocas, ensure that we do +; not do anything including marking callsites with the tail call marker. +; +; rdar://14324281. +define i32* @test8(i32* nocapture %A, i1 %cond) { +; CHECK: i32* @test8 +; CHECK-NOT: tailrecurse: +; CHECK-NOT: tail call +; CHECK: ret i32* null + %B = alloca i32 + %B2 = alloca i32 + br i1 %cond, label %cond_true, label %cond_false +cond_true: + call void @use(i32* %B2) + call i32* @test8(i32* %B, i1 false) + ret i32* null +cond_false: + call void @use2_nocapture(i32* %A, i32* %B) + call void @noarg() + ret i32* null +} diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll deleted file mode 100644 index e49d87cc4b5..00000000000 --- a/test/Transforms/TailCallElim/nocapture.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt -tailcallelim -S < %s | FileCheck %s -; XFAIL: * - -declare void @use(i8* nocapture, i8* nocapture) - -define i8* @foo(i8* nocapture %A, i1 %cond) { -; CHECK: tailrecurse: -; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ] -; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ] - %B = alloca i8 -; CHECK: %B = alloca i8 - br i1 %cond, label %cond_true, label %cond_false -; CHECK: br i1 %cond.tr, label %cond_true, label %cond_false -cond_true: -; CHECK: cond_true: -; CHECK: br label %tailrecurse - call i8* @foo(i8* %B, i1 false) - ret i8* null -cond_false: -; CHECK: cond_false - call void @use(i8* %A, i8* %B) -; CHECK: tail call void @use(i8* %A.tr, i8* %B) - ret i8* null -; CHECK: ret i8* null -} |