diff options
author | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-16 17:52:57 +0000 |
---|---|---|
committer | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-10-16 17:52:57 +0000 |
commit | 47ca737da5de48d621fa88adb89b6265fe224453 (patch) | |
tree | 7e60638529c99989f22bff7a0f40a71a9c3a7399 /lib | |
parent | 5f65515268b0a646b3e5ccedd85b6e0b5f831ac3 (diff) |
Merge master branch
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@166033 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
178 files changed, 7401 insertions, 2215 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 87a75fd3b11..588206e9159 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -31,6 +31,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); initializePrintDbgInfoPass(Registry); + initializeDependenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 263bfc031fc..36903f94e25 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -286,7 +286,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, V = GEPOp->getOperand(0); continue; } - + + unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); for (User::const_op_iterator I = GEPOp->op_begin()+1, @@ -315,7 +316,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth(); - if (TD->getPointerSizeInBits() > Width) + if (TD->getPointerSizeInBits(AS) > Width) Extension = EK_SignExt; // Use GetLinearExpression to decompose the index into a C1*V+C2 form. @@ -344,7 +345,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { + if (unsigned ShiftBits = 64-TD->getPointerSizeInBits(AS)) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index e461848e861..3ce888fefa4 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_library(LLVMAnalysis CodeMetrics.cpp ConstantFolding.cpp DbgInfoPrinter.cpp + DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 651a54be1b9..d6692684960 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -91,14 +91,16 @@ bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { // which doesn't contain values outside the range of a pointer. if (isa<IntToPtrInst>(CI) && TD && TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) + Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits( + cast<IntToPtrInst>(CI)->getAddressSpace())) return true; // A ptrtoint cast is free so long as the result is large enough to store // the pointer, and a legal integer type. if (isa<PtrToIntInst>(CI) && TD && TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) + Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits( + cast<PtrToIntInst>(CI)->getPointerAddressSpace())) return true; // trunc to a native type is free (assuming the target has compare and diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index b7bf044a368..146897ad675 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -916,10 +916,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, if (TD && CE->getOpcode() == Instruction::IntToPtr) { Constant *Input = CE->getOperand(0); unsigned InWidth = Input->getType()->getScalarSizeInBits(); - if (TD->getPointerSizeInBits() < InWidth) { + unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace(); + if (TD->getPointerSizeInBits(AS) < InWidth) { Constant *Mask = ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, - TD->getPointerSizeInBits())); + TD->getPointerSizeInBits(AS))); Input = ConstantExpr::getAnd(Input, Mask); } // Do a zext or trunc to get to the dest size. @@ -932,9 +933,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, // the int size is >= the ptr size. This requires knowing the width of a // pointer, so it can't be done in ConstantExpr::getCast. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) - if (TD && - TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() && - CE->getOpcode() == Instruction::PtrToInt) + if (TD && CE->getOpcode() == Instruction::PtrToInt && + TD->getPointerSizeInBits( + cast<PointerType>(CE->getOperand(0)->getType())->getAddressSpace()) + <= CE->getType()->getScalarSizeInBits()) return FoldBitCast(CE->getOperand(0), DestTy, *TD); return ConstantExpr::getCast(Opcode, Ops[0], DestTy); diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp new file mode 100644 index 00000000000..016fe396e7d --- /dev/null +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -0,0 +1,3781 @@ +//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DependenceAnalysis is an LLVM pass that analyses dependences between memory +// accesses. Currently, it is an (incomplete) implementation of the approach +// described in +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// There's a single entry point that analyzes the dependence between a pair +// of memory references in a function, returning either NULL, for no dependence, +// or a more-or-less detailed description of the dependence between them. +// +// Currently, the implementation cannot propagate constraints between +// coupled RDIV subscripts and lacks a multi-subscript MIV test. +// Both of these are conservative weaknesses; +// that is, not a source of correctness problems. +// +// The implementation depends on the GEP instruction to +// differentiate subscripts. Since Clang linearizes subscripts +// for most arrays, we give up some precision (though the existing MIV tests +// will help). We trust that the GEP instruction will eventually be extended. +// In the meantime, we should explore Maslov's ideas about delinearization. +// +// We should pay some careful attention to the possibility of integer overflow +// in the implementation of the various tests. This could happen with Add, +// Subtract, or Multiply, with both APInt's and SCEV's. +// +// Some non-linear subscript pairs can be handled by the GCD test +// (and perhaps other tests). +// Should explore how often these things occur. +// +// Finally, it seems like certain test cases expose weaknesses in the SCEV +// simplification, especially in the handling of sign and zero extensions. +// It could be useful to spend time exploring these. +// +// Please note that this is work in progress and the interface is subject to +// change. +// +//===----------------------------------------------------------------------===// +// // +// In memory of Ken Kennedy, 1945 - 2007 // +// // +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "da" + +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Instructions.h" +#include "llvm/Operator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// statistics + +STATISTIC(TotalArrayPairs, "Array pairs tested"); +STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); +STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); +STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); +STATISTIC(ZIVapplications, "ZIV applications"); +STATISTIC(ZIVindependence, "ZIV independence"); +STATISTIC(StrongSIVapplications, "Strong SIV applications"); +STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); +STATISTIC(StrongSIVindependence, "Strong SIV independence"); +STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); +STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); +STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); +STATISTIC(ExactSIVapplications, "Exact SIV applications"); +STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); +STATISTIC(ExactSIVindependence, "Exact SIV independence"); +STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); +STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); +STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); +STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); +STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); +STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); +STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); +STATISTIC(DeltaApplications, "Delta applications"); +STATISTIC(DeltaSuccesses, "Delta successes"); +STATISTIC(DeltaIndependence, "Delta independence"); +STATISTIC(DeltaPropagations, "Delta propagations"); +STATISTIC(GCDapplications, "GCD applications"); +STATISTIC(GCDsuccesses, "GCD successes"); +STATISTIC(GCDindependence, "GCD independence"); +STATISTIC(BanerjeeApplications, "Banerjee applications"); +STATISTIC(BanerjeeIndependence, "Banerjee independence"); +STATISTIC(BanerjeeSuccesses, "Banerjee successes"); + +//===----------------------------------------------------------------------===// +// basics + +INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", + "Dependence Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(DependenceAnalysis, "da", + "Dependence Analysis", true, true) + +char DependenceAnalysis::ID = 0; + + +FunctionPass *llvm::createDependenceAnalysisPass() { + return new DependenceAnalysis(); +} + + +bool DependenceAnalysis::runOnFunction(Function &F) { + this->F = &F; + AA = &getAnalysis<AliasAnalysis>(); + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfo>(); + return false; +} + + +void DependenceAnalysis::releaseMemory() { +} + + +void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<ScalarEvolution>(); + AU.addRequiredTransitive<LoopInfo>(); +} + + +// Used to test the dependence analyzer. +// Looks through the function, noting the first store instruction +// and the first load instruction +// (which always follows the first load in our tests). +// Calls depends() and prints out the result. +// Ignores all other instructions. +static +void dumpExampleDependence(raw_ostream &OS, Function *F, + DependenceAnalysis *DA) { + for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); + SrcI != SrcE; ++SrcI) { + if (const StoreInst *Src = dyn_cast<StoreInst>(&*SrcI)) { + for (inst_iterator DstI = SrcI, DstE = inst_end(F); + DstI != DstE; ++DstI) { + if (const LoadInst *Dst = dyn_cast<LoadInst>(&*DstI)) { + OS << "da analyze - "; + if (Dependence *D = DA->depends(Src, Dst, true)) { + D->dump(OS); + for (unsigned Level = 1; Level <= D->getLevels(); Level++) { + if (D->isSplitable(Level)) { + OS << "da analyze - split level = " << Level; + OS << ", iteration = " << *DA->getSplitIteration(D, Level); + OS << "!\n"; + } + } + delete D; + } + else + OS << "none!\n"; + return; + } + } + } + } +} + + +void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { + dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); +} + +//===----------------------------------------------------------------------===// +// Dependence methods + +// Returns true if this is an input dependence. +bool Dependence::isInput() const { + return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an output dependence. +bool Dependence::isOutput() const { + return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if this is an flow (aka true) dependence. +bool Dependence::isFlow() const { + return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an anti dependence. +bool Dependence::isAnti() const { + return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +// Leave this out of line, so it will serve as a virtual method anchor +bool Dependence::isScalar(unsigned level) const { + return false; +} + + +//===----------------------------------------------------------------------===// +// FullDependence methods + +FullDependence::FullDependence(const Instruction *Source, + const Instruction *Destination, + bool PossiblyLoopIndependent, + unsigned CommonLevels) : + Dependence(Source, Destination), + Levels(CommonLevels), + LoopIndependent(PossiblyLoopIndependent) { + Consistent = true; + DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; +} + +// The rest are simple getters that hide the implementation. + +// getDirection - Returns the direction associated with a particular level. +unsigned FullDependence::getDirection(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Direction; +} + + +// Returns the distance (or NULL) associated with a particular level. +const SCEV *FullDependence::getDistance(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Distance; +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +bool FullDependence::isScalar(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Scalar; +} + + +// Returns true if peeling the first iteration from this loop +// will break this dependence. +bool FullDependence::isPeelFirst(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelFirst; +} + + +// Returns true if peeling the last iteration from this loop +// will break this dependence. +bool FullDependence::isPeelLast(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelLast; +} + + +// Returns true if splitting this loop will break the dependence. +bool FullDependence::isSplitable(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Splitable; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis::Constraint methods + +// If constraint is a point <X, Y>, returns X. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getX() const { + assert(Kind == Point && "Kind should be Point"); + return A; +} + + +// If constraint is a point <X, Y>, returns Y. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getY() const { + assert(Kind == Point && "Kind should be Point"); + return B; +} + + +// If constraint is a line AX + BY = C, returns A. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getA() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return A; +} + + +// If constraint is a line AX + BY = C, returns B. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getB() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return B; +} + + +// If constraint is a line AX + BY = C, returns C. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getC() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return C; +} + + +// If constraint is a distance, returns D. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getD() const { + assert(Kind == Distance && "Kind should be Distance"); + return SE->getNegativeSCEV(C); +} + + +// Returns the loop associated with this constraint. +const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { + assert((Kind == Distance || Kind == Line || Kind == Point) && + "Kind should be Distance, Line, or Point"); + return AssociatedLoop; +} + + +void DependenceAnalysis::Constraint::setPoint(const SCEV *X, + const SCEV *Y, + const Loop *CurLoop) { + Kind = Point; + A = X; + B = Y; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setLine(const SCEV *AA, + const SCEV *BB, + const SCEV *CC, + const Loop *CurLoop) { + Kind = Line; + A = AA; + B = BB; + C = CC; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setDistance(const SCEV *D, + const Loop *CurLoop) { + Kind = Distance; + A = SE->getConstant(D->getType(), 1); + B = SE->getNegativeSCEV(A); + C = SE->getNegativeSCEV(D); + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setEmpty() { + Kind = Empty; +} + + +void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { + SE = NewSE; + Kind = Any; +} + + +// For debugging purposes. Dumps the constraint out to OS. +void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { + if (isEmpty()) + OS << " Empty\n"; + else if (isAny()) + OS << " Any\n"; + else if (isPoint()) + OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; + else if (isDistance()) + OS << " Distance is " << *getD() << + " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; + else if (isLine()) + OS << " Line is " << *getA() << "*X + " << + *getB() << "*Y = " << *getC() << "\n"; + else + llvm_unreachable("unknown constraint type in Constraint::dump"); +} + + +// Updates X with the intersection +// of the Constraints X and Y. Returns true if X has changed. +// Corresponds to Figure 4 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::intersectConstraints(Constraint *X, + const Constraint *Y) { + ++DeltaApplications; + DEBUG(dbgs() << "\tintersect constraints\n"); + DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); + DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); + assert(!Y->isPoint() && "Y must not be a Point"); + if (X->isAny()) { + if (Y->isAny()) + return false; + *X = *Y; + return true; + } + if (X->isEmpty()) + return false; + if (Y->isEmpty()) { + X->setEmpty(); + return true; + } + + if (X->isDistance() && Y->isDistance()) { + DEBUG(dbgs() << "\t intersect 2 distances\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + // Hmmm, interesting situation. + // I guess if either is constant, keep it and ignore the other. + if (isa<SCEVConstant>(Y->getD())) { + *X = *Y; + return true; + } + return false; + } + + // At this point, the pseudo-code in Figure 4 of the paper + // checks if (X->isPoint() && Y->isPoint()). + // This case can't occur in our implementation, + // since a Point can only arise as the result of intersecting + // two Line constraints, and the right-hand value, Y, is never + // the result of an intersection. + assert(!(X->isPoint() && Y->isPoint()) && + "We shouldn't ever see X->isPoint() && Y->isPoint()"); + + if (X->isLine() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect 2 lines\n"); + const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { + // slopes are equal, so lines are parallel + DEBUG(dbgs() << "\t\tsame slope\n"); + Prod1 = SE->getMulExpr(X->getC(), Y->getB()); + Prod2 = SE->getMulExpr(X->getB(), Y->getC()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + // slopes differ, so lines intersect + DEBUG(dbgs() << "\t\tdifferent slopes\n"); + const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); + const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); + const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); + const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); + const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); + const SCEVConstant *C1A2_C2A1 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); + const SCEVConstant *C1B2_C2B1 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); + const SCEVConstant *A1B2_A2B1 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); + const SCEVConstant *A2B1_A1B2 = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); + if (!C1B2_C2B1 || !C1A2_C2A1 || + !A1B2_A2B1 || !A2B1_A1B2) + return false; + APInt Xtop = C1B2_C2B1->getValue()->getValue(); + APInt Xbot = A1B2_A2B1->getValue()->getValue(); + APInt Ytop = C1A2_C2A1->getValue()->getValue(); + APInt Ybot = A2B1_A1B2->getValue()->getValue(); + DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); + DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); + DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); + DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); + APInt Xq = Xtop; // these need to be initialized, even + APInt Xr = Xtop; // though they're just going to be overwritten + APInt::sdivrem(Xtop, Xbot, Xq, Xr); + APInt Yq = Ytop; + APInt Yr = Ytop;; + APInt::sdivrem(Ytop, Ybot, Yq, Yr); + if (Xr != 0 || Yr != 0) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); + if (Xq.slt(0) || Yq.slt(0)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + if (const SCEVConstant *CUB = + collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { + APInt UpperBound = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); + if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + } + X->setPoint(SE->getConstant(Xq), + SE->getConstant(Yq), + X->getAssociatedLoop()); + ++DeltaSuccesses; + return true; + } + return false; + } + + // if (X->isLine() && Y->isPoint()) This case can't occur. + assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); + + if (X->isPoint() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect Point and Line\n"); + const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); + const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); + const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); + if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + + llvm_unreachable("shouldn't reach the end of Constraint intersection"); + return false; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis methods + +// For debugging purposes. Dumps a dependence to OS. +void Dependence::dump(raw_ostream &OS) const { + bool Splitable = false; + if (isConfused()) + OS << "confused"; + else { + if (isConsistent()) + OS << "consistent "; + if (isFlow()) + OS << "flow"; + else if (isOutput()) + OS << "output"; + else if (isAnti()) + OS << "anti"; + else if (isInput()) + OS << "input"; + unsigned Levels = getLevels(); + if (Levels) { + OS << " ["; + for (unsigned II = 1; II <= Levels; ++II) { + if (isSplitable(II)) + Splitable = true; + if (isPeelFirst(II)) + OS << 'p'; + const SCEV *Distance = getDistance(II); + if (Distance) + OS << *Distance; + else if (isScalar(II)) + OS << "S"; + else { + unsigned Direction = getDirection(II); + if (Direction == DVEntry::ALL) + OS << "*"; + else { + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; + } + } + if (isPeelLast(II)) + OS << 'p'; + if (II < Levels) + OS << " "; + } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; + } + } + OS << "!\n"; +} + + + +static +AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *AObj = GetUnderlyingObject(A); + const Value *BObj = GetUnderlyingObject(B); + return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), + BObj, AA->getTypeStoreSize(BObj->getType())); +} + + +// Returns true if the load or store can be analyzed. Atomic and volatile +// operations have properties which this analysis does not understand. +static +bool isLoadOrStore(const Instruction *I) { + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->isUnordered(); + else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->isUnordered(); + return false; +} + + +static +const Value *getPointerOperand(const Instruction *I) { + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerOperand(); + if (const StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerOperand(); + llvm_unreachable("Value is not load or store instruction"); + return 0; +} + + +// Examines the loop nesting of the Src and Dst +// instructions and establishes their shared loops. Sets the variables +// CommonLevels, SrcLevels, and MaxLevels. +// The source and destination instructions needn't be contained in the same +// loop. The routine establishNestingLevels finds the level of most deeply +// nested loop that contains them both, CommonLevels. An instruction that's +// not contained in a loop is at level = 0. MaxLevels is equal to the level +// of the source plus the level of the destination, minus CommonLevels. +// This lets us allocate vectors MaxLevels in length, with room for every +// distinct loop referenced in both the source and destination subscripts. +// The variable SrcLevels is the nesting depth of the source instruction. +// It's used to help calculate distinct loops referenced by the destination. +// Here's the map from loops to levels: +// 0 - unused +// 1 - outermost common loop +// ... - other common loops +// CommonLevels - innermost common loop +// ... - loops containing Src but not Dst +// SrcLevels - innermost loop containing Src but not Dst +// ... - loops containing Dst but not Src +// MaxLevels - innermost loops containing Dst but not Src +// Consider the follow code fragment: +// for (a = ...) { +// for (b = ...) { +// for (c = ...) { +// for (d = ...) { +// A[] = ...; +// } +// } +// for (e = ...) { +// for (f = ...) { +// for (g = ...) { +// ... = A[]; +// } +// } +// } +// } +// } +// If we're looking at the possibility of a dependence between the store +// to A (the Src) and the load from A (the Dst), we'll note that they +// have 2 loops in common, so CommonLevels will equal 2 and the direction +// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. +// A map from loop names to loop numbers would look like +// a - 1 +// b - 2 = CommonLevels +// c - 3 +// d - 4 = SrcLevels +// e - 5 +// f - 6 +// g - 7 = MaxLevels +void DependenceAnalysis::establishNestingLevels(const Instruction *Src, + const Instruction *Dst) { + const BasicBlock *SrcBlock = Src->getParent(); + const BasicBlock *DstBlock = Dst->getParent(); + unsigned SrcLevel = LI->getLoopDepth(SrcBlock); + unsigned DstLevel = LI->getLoopDepth(DstBlock); + const Loop *SrcLoop = LI->getLoopFor(SrcBlock); + const Loop *DstLoop = LI->getLoopFor(DstBlock); + SrcLevels = SrcLevel; + MaxLevels = SrcLevel + DstLevel; + while (SrcLevel > DstLevel) { + SrcLoop = SrcLoop->getParentLoop(); + SrcLevel--; + } + while (DstLevel > SrcLevel) { + DstLoop = DstLoop->getParentLoop(); + DstLevel--; + } + while (SrcLoop != DstLoop) { + SrcLoop = SrcLoop->getParentLoop(); + DstLoop = DstLoop->getParentLoop(); + SrcLevel--; + } + CommonLevels = SrcLevel; + MaxLevels -= CommonLevels; +} + + +// Given one of the loops containing the source, return +// its level index in our numbering scheme. +unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { + return SrcLoop->getLoopDepth(); +} + + +// Given one of the loops containing the destination, +// return its level index in our numbering scheme. +unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { + unsigned D = DstLoop->getLoopDepth(); + if (D > CommonLevels) + return D - CommonLevels + SrcLevels; + else + return D; +} + + +// Returns true if Expression is loop invariant in LoopNest. +bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, + const Loop *LoopNest) const { + if (!LoopNest) + return true; + return SE->isLoopInvariant(Expression, LoopNest) && + isLoopInvariant(Expression, LoopNest->getParentLoop()); +} + + + +// Finds the set of loops from the LoopNest that +// have a level <= CommonLevels and are referred to by the SCEV Expression. +void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, + const Loop *LoopNest, + SmallBitVector &Loops) const { + while (LoopNest) { + unsigned Level = LoopNest->getLoopDepth(); + if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) + Loops.set(Level); + LoopNest = LoopNest->getParentLoop(); + } +} + + +// removeMatchingExtensions - Examines a subscript pair. +// If the source and destination are identically sign (or zero) +// extended, it strips off the extension in an effect to simplify +// the actual analysis. +void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { + const SCEV *Src = Pair->Src; + const SCEV *Dst = Pair->Dst; + if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || + (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { + const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); + const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); + if (SrcCast->getType() == DstCast->getType()) { + Pair->Src = SrcCast->getOperand(); + Pair->Dst = DstCast->getOperand(); + } + } +} + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); + if (!AddRec) + return isLoopInvariant(Src, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapSrcLoop(AddRec->getLoop())); + return checkSrcSubscript(Start, LoopNest, Loops); +} + + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); + if (!AddRec) + return isLoopInvariant(Dst, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapDstLoop(AddRec->getLoop())); + return checkDstSubscript(Start, LoopNest, Loops); +} + + +// Examines the subscript pair (the Src and Dst SCEVs) +// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. +// Collects the associated loops in a set. +DependenceAnalysis::Subscript::ClassificationKind +DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, + const SCEV *Dst, const Loop *DstLoopNest, + SmallBitVector &Loops) { + SmallBitVector SrcLoops(MaxLevels + 1); + SmallBitVector DstLoops(MaxLevels + 1); + if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) + return Subscript::NonLinear; + if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) + return Subscript::NonLinear; + Loops = SrcLoops; + Loops |= DstLoops; + unsigned N = Loops.count(); + if (N == 0) + return Subscript::ZIV; + if (N == 1) + return Subscript::SIV; + if (N == 2 && (SrcLoops.count() == 0 || + DstLoops.count() == 0 || + (SrcLoops.count() == 1 && DstLoops.count() == 1))) + return Subscript::RDIV; + return Subscript::MIV; +} + + +// A wrapper around SCEV::isKnownPredicate. +// Looks for cases where we're interested in comparing for equality. +// If both X and Y have been identically sign or zero extended, +// it strips off the (confusing) extensions before invoking +// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package +// will be similarly updated. +// +// If SCEV::isKnownPredicate can't prove the predicate, +// we try simple subtraction, which seems to help in some cases +// involving symbolics. +bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *X, + const SCEV *Y) const { + if (Pred == CmpInst::ICMP_EQ || + Pred == CmpInst::ICMP_NE) { + if ((isa<SCEVSignExtendExpr>(X) && + isa<SCEVSignExtendExpr>(Y)) || + (isa<SCEVZeroExtendExpr>(X) && + isa<SCEVZeroExtendExpr>(Y))) { + const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); + const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); + const SCEV *Xop = CX->getOperand(); + const SCEV *Yop = CY->getOperand(); + if (Xop->getType() == Yop->getType()) { + X = Xop; + Y = Yop; + } + } + } + if (SE->isKnownPredicate(Pred, X, Y)) + return true; + // If SE->isKnownPredicate can't prove the condition, + // we try the brute-force approach of subtracting + // and testing the difference. + // By testing with SE->isKnownPredicate first, we avoid + // the possibility of overflow when the arguments are constants. + const SCEV *Delta = SE->getMinusSCEV(X, Y); + switch (Pred) { + case CmpInst::ICMP_EQ: + return Delta->isZero(); + case CmpInst::ICMP_NE: + return SE->isKnownNonZero(Delta); + case CmpInst::ICMP_SGE: + return SE->isKnownNonNegative(Delta); + case CmpInst::ICMP_SLE: + return SE->isKnownNonPositive(Delta); + case CmpInst::ICMP_SGT: + return SE->isKnownPositive(Delta); + case CmpInst::ICMP_SLT: + return SE->isKnownNegative(Delta); + default: + llvm_unreachable("unexpected predicate in isKnownPredicate"); + } +} + + +// All subscripts are all the same type. +// Loop bound may be smaller (e.g., a char). +// Should zero extend loop bound, since it's always >= 0. +// This routine collects upper bound and extends if needed. +// Return null if no bound available. +const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, + Type *T) const { + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + const SCEV *UB = SE->getBackedgeTakenCount(L); + return SE->getNoopOrZeroExtend(UB, T); + } + return NULL; +} + + +// Calls collectUpperBound(), then attempts to cast it to SCEVConstant. +// If the cast fails, returns NULL. +const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, + Type *T + ) const { + if (const SCEV *UB = collectUpperBound(L, T)) + return dyn_cast<SCEVConstant>(UB); + return NULL; +} + + +// testZIV - +// When we have a pair of subscripts of the form [c1] and [c2], +// where c1 and c2 are both loop invariant, we attack it using +// the ZIV test. Basically, we test by comparing the two values, +// but there are actually three possible results: +// 1) the values are equal, so there's a dependence +// 2) the values are different, so there's no dependence +// 3) the values might be equal, so we have to assume a dependence. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testZIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + ++ZIVapplications; + if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { + DEBUG(dbgs() << " provably dependent\n"); + return false; // provably dependent + } + if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { + DEBUG(dbgs() << " provably independent\n"); + ++ZIVindependence; + return true; // provably independent + } + DEBUG(dbgs() << " possibly dependent\n"); + Result.Consistent = false; + return false; // possibly dependent +} + + +// strongSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.1 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the Strong SIV test. +// +// Can prove independence. Failing that, can compute distance (and direction). +// In the presence of symbolic terms, we can sometimes make progress. +// +// If there's a dependence, +// +// c1 + a*i = c2 + a*i' +// +// The dependence distance is +// +// d = i' - i = (c1 - c2)/a +// +// A dependence only exists if d is an integer and abs(d) <= U, where U is the +// loop's upper bound. If a dependence exists, the dependence direction is +// defined as +// +// { < if d > 0 +// direction = { = if d = 0 +// { > if d < 0 +// +// Return true if dependence disproved. +bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tStrong SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff); + DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); + DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst); + DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); + ++StrongSIVapplications; + assert(0 < Level && Level <= CommonLevels && "level out of range"); + Level--; + + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + DEBUG(dbgs() << "\t Delta = " << *Delta); + DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); + + // check that |Delta| < iteration count + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); + DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); + const SCEV *AbsDelta = + SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); + const SCEV *AbsCoeff = + SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); + const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); + if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { + // Distance greater than trip count - no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + } + + // Can we compute distance? + if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { + APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); + APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); + APInt Distance = ConstDelta; // these need to be initialized + APInt Remainder = ConstDelta; + APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + // Make sure Coeff divides Delta exactly + if (Remainder != 0) { + // Coeff doesn't divide Distance, no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + Result.DV[Level].Distance = SE->getConstant(Distance); + NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); + if (Distance.sgt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::LT; + else if (Distance.slt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::GT; + else + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else if (Delta->isZero()) { + // since 0/X == 0 + Result.DV[Level].Distance = Delta; + NewConstraint.setDistance(Delta, CurLoop); + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else { + if (Coeff->isOne()) { + DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); + Result.DV[Level].Distance = Delta; // since X/1 == X + NewConstraint.setDistance(Delta, CurLoop); + } + else { + Result.Consistent = false; + NewConstraint.setLine(Coeff, + SE->getNegativeSCEV(Coeff), + SE->getNegativeSCEV(Delta), CurLoop); + } + + // maybe we can get a useful direction + bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); + bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); + bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); + bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); + bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); + // The double negatives above are confusing. + // It helps to read !SE->isKnownNonZero(Delta) + // as "Delta might be Zero" + unsigned NewDirection = Dependence::DVEntry::NONE; + if ((DeltaMaybePositive && CoeffMaybePositive) || + (DeltaMaybeNegative && CoeffMaybeNegative)) + NewDirection = Dependence::DVEntry::LT; + if (DeltaMaybeZero) + NewDirection |= Dependence::DVEntry::EQ; + if ((DeltaMaybeNegative && CoeffMaybePositive) || + (DeltaMaybePositive && CoeffMaybeNegative)) + NewDirection |= Dependence::DVEntry::GT; + if (NewDirection < Result.DV[Level].Direction) + ++StrongSIVsuccesses; + Result.DV[Level].Direction &= NewDirection; + } + return false; +} + + +// weakCrossingSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Crossing SIV test. +// +// Given c1 + a*i = c2 - a*i', we can look for the intersection of +// the two lines, where i = i', yielding +// +// c1 + a*i = c2 - a*i +// 2a*i = c2 - c1 +// i = (c2 - c1)/2a +// +// If i < 0, there is no dependence. +// If i > upperbound, there is no dependence. +// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. +// If i = upperbound, there's a dependence with distance = 0. +// If i is integral, there's a dependence (all directions). +// If the non-integer part = 1/2, there's a dependence (<> directions). +// Otherwise, there's no dependence. +// +// Can prove independence. Failing that, +// can sometimes refine the directions. +// Can determine iteration for splitting. +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakCrossingSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); + if (Delta->isZero()) { + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Distance = Delta; // = 0 + return false; + } + const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); + if (!ConstCoeff) + return false; + + Result.DV[Level].Splitable = true; + if (SE->isKnownNegative(ConstCoeff)) { + ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); + assert(ConstCoeff && + "dynamic cast of negative of ConstCoeff should yield constant"); + Delta = SE->getNegativeSCEV(Delta); + } + assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); + + // compute SplitIter for use by DependenceAnalysis::getSplitIteration() + SplitIter = + SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), + Delta), + SE->getMulExpr(SE->getConstant(Delta->getType(), 2), + ConstCoeff)); + DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); + + const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); + if (!ConstDelta) + return false; + + // We're certain that ConstCoeff > 0; therefore, + // if Delta < 0, then no dependence. + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); + if (SE->isKnownNegative(Delta)) { + // No dependence, Delta < 0 + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + + // We're certain that Delta > 0 and ConstCoeff > 0. + // Check Delta/(2*ConstCoeff) against upper loop bound + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); + const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), + ConstantTwo); + DEBUG(dbgs() << "\t ML = " << *ML << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { + // Delta too big, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { + // i = i' = UB + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Splitable = false; + Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); + return false; + } + } + + // check that Coeff divides Delta + APInt APDelta = ConstDelta->getValue()->getValue(); + APInt APCoeff = ConstCoeff->getValue()->getValue(); + APInt Distance = APDelta; // these need to be initialzed + APInt Remainder = APDelta; + APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Coeff doesn't divide Delta, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + + // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible + APInt Two = APInt(Distance.getBitWidth(), 2, true); + Remainder = Distance.srem(Two); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Equal direction isn't possible + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); + ++WeakCrossingSIVsuccesses; + } + return false; +} + + +// Kirch's algorithm, from +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// Program 2.1, page 29. +// Computes the GCD of AM and BM. +// Also finds a solution to the equation ax - by = gdc(a, b). +// Returns true iff the gcd divides Delta. +static +bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, + APInt &G, APInt &X, APInt &Y) { + APInt A0(Bits, 1, true), A1(Bits, 0, true); + APInt B0(Bits, 0, true), B1(Bits, 1, true); + APInt G0 = AM.abs(); + APInt G1 = BM.abs(); + APInt Q = G0; // these need to be initialized + APInt R = G0; + APInt::sdivrem(G0, G1, Q, R); + while (R != 0) { + APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; + APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; + G0 = G1; G1 = R; + APInt::sdivrem(G0, G1, Q, R); + } + G = G1; + DEBUG(dbgs() << "\t GCD = " << G << "\n"); + X = AM.slt(0) ? -A1 : A1; + Y = BM.slt(0) ? B1 : -B1; + + // make sure gcd divides Delta + R = Delta.srem(G); + if (R != 0) + return true; // gcd doesn't divide Delta, no dependence + Q = Delta.sdiv(G); + X *= Q; + Y *= Q; + return false; +} + + +static +APInt floorOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q; + else + return Q - 1; +} + + +static +APInt ceilingOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q + 1; + else + return Q; +} + + +static +APInt maxAPInt(APInt A, APInt B) { + return A.sgt(B) ? A : B; +} + + +static +APInt minAPInt(APInt A, APInt B) { + return A.slt(B) ? A : B; +} + + +// exactSIVtest - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], +// where i is an induction variable, c1 and c2 are loop invariant, and a1 +// and a2 are constant, we can solve it exactly using an algorithm developed +// by Banerjee and Wolfe. See Section 2.5.3 in +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), +// so use them if possible. They're also a bit better with symbolics and, +// in the case of the strong SIV test, can compute Distances. +// +// Return true if dependence disproved. +bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tExact SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), + Delta, CurLoop); + const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction normalizes, LM = 0 + APInt UM(Bits, 1, true); + bool UMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *CUB = + collectConstantUpperBound(CurLoop, Delta->getType())) { + UM = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t UM = " << UM << "\n"); + UMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) { + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + // explore directions + unsigned NewDirection = Dependence::DVEntry::NONE; + + // less than + APInt SaveTU(TU); // save these + APInt SaveTL(TL); + DEBUG(dbgs() << "\t exploring LT direction\n"); + TMUL = AM - BM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::LT; + ++ExactSIVsuccesses; + } + + // equal + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring EQ direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + TMUL = BM - AM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::EQ; + ++ExactSIVsuccesses; + } + + // greater than + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring GT direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::GT; + ++ExactSIVsuccesses; + } + + // finished + Result.DV[Level].Direction &= NewDirection; + if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) + ++ExactSIVindependence; + return Result.DV[Level].Direction == Dependence::DVEntry::NONE; +} + + + +// Return true if the divisor evenly divides the dividend. +static +bool isRemainderZero(const SCEVConstant *Dividend, + const SCEVConstant *Divisor) { + APInt ConstDividend = Dividend->getValue()->getValue(); + APInt ConstDivisor = Divisor->getValue()->getValue(); + return ConstDividend.srem(ConstDivisor) == 0; +} + + +// weakZeroSrcSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 = c2 + a*i +// +// we get +// +// (c1 - c2)/a = i +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroDstSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to + // the Src and Dst loops. If it isn't, then there's no need to + // record a direction. + DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= MaxLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), + DstCoeff, Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa<SCEVConstant>(Delta) && + !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// weakZeroDstSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 + a*i = c2 +// +// we get +// +// i = (c2 - c1)/a +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroSrcSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to the + // Src and Dst loops. If it isn't, then there's no need to record a direction. + DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= SrcLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), + Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa<SCEVConstant>(Delta) && + !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// exactRDIVtest - Tests the RDIV subscript pair for dependence. +// Things of the form [c1 + a*i] and [c2 + b*j], +// where i and j are induction variable, c1 and c2 are loop invariant, +// and a and b are constants. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistant. +// Works in some cases that symbolicRDIVtest doesn't, and vice versa. +bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop, + FullDependence &Result) const { + DEBUG(dbgs() << "\tExact RDIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactRDIVapplications; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactRDIVindependence; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction seems to normalize, LM = 0 + APInt SrcUM(Bits, 1, true); + bool SrcUMvalid = false; + // SrcUM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(SrcLoop, Delta->getType())) { + SrcUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); + SrcUMvalid = true; + } + + APInt DstUM(Bits, 1, true); + bool DstUMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(DstLoop, Delta->getType())) { + DstUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); + DstUMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (SrcUMvalid) { + TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (SrcUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (DstUMvalid) { + TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (DstUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) + ++ExactRDIVindependence; + return TL.sgt(TU); +} + + +// symbolicRDIVtest - +// In Section 4.5 of the Practical Dependence Testing paper,the authors +// introduce a special case of Banerjee's Inequalities (also called the +// Extreme-Value Test) that can handle some of the SIV and RDIV cases, +// particularly cases with symbolics. Since it's only able to disprove +// dependence (not compute distances or directions), we'll use it as a +// fall back for the other tests. +// +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables and c1 and c2 are loop invariants, +// we can use the symbolic tests to disprove some dependences, serving as a +// backup for the RDIV test. Note that i and j can be the same variable, +// letting this test serve as a backup for the various SIV tests. +// +// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some +// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) +// loop bounds for the i and j loops, respectively. So, ... +// +// c1 + a1*i = c2 + a2*j +// a1*i - a2*j = c2 - c1 +// +// To test for a dependence, we compute c2 - c1 and make sure it's in the +// range of the maximum and minimum possible values of a1*i - a2*j. +// Considering the signs of a1 and a2, we have 4 possible cases: +// +// 1) If a1 >= 0 and a2 >= 0, then +// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 +// -a2*N2 <= c2 - c1 <= a1*N1 +// +// 2) If a1 >= 0 and a2 <= 0, then +// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 +// 0 <= c2 - c1 <= a1*N1 - a2*N2 +// +// 3) If a1 <= 0 and a2 >= 0, then +// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 +// a1*N1 - a2*N2 <= c2 - c1 <= 0 +// +// 4) If a1 <= 0 and a2 <= 0, then +// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 +// a1*N1 <= c2 - c1 <= -a2*N2 +// +// return true if dependence disproved +bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, + const SCEV *A2, + const SCEV *C1, + const SCEV *C2, + const Loop *Loop1, + const Loop *Loop2) const { + ++SymbolicRDIVapplications; + DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); + DEBUG(dbgs() << "\t A1 = " << *A1); + DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); + DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); + DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); + DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); + const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); + const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); + DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); + DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); + const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); + const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); + DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); + DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); + if (SE->isKnownNonNegative(A1)) { + if (SE->isKnownNonNegative(A2)) { + // A1 >= 0 && A2 >= 0 + if (N1) { + // make sure that c2 - c1 <= a1*N1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 >= 0 && a2 <= 0 + if (N1 && N2) { + // make sure that c2 - c1 <= a1*N1 - a2*N2 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that 0 <= c2 - c1 + if (SE->isKnownNegative(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A1)) { + if (SE->isKnownNonNegative(A2)) { + // a1 <= 0 && a2 >= 0 + if (N1 && N2) { + // make sure that a1*N1 - a2*N2 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that c2 - c1 <= 0 + if (SE->isKnownPositive(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 <= 0 && a2 <= 0 + if (N1) { + // make sure that a1*N1 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + } + return false; +} + + +// testSIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] +// where i is an induction variable, c1 and c2 are loop invariant, and a1 and +// a2 are constant, we attack it with an SIV test. While they can all be +// solved with the Exact SIV test, it's worthwhile to use simpler tests when +// they apply; they're cheaper and sometimes more precise. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testSIV(const SCEV *Src, + const SCEV *Dst, + unsigned &Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); + if (SrcAddRec && DstAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const Loop *CurLoop = SrcAddRec->getLoop(); + assert(CurLoop == DstAddRec->getLoop() && + "both loops in SIV should be same"); + Level = mapSrcLoop(CurLoop); + bool disproven; + if (SrcCoeff == DstCoeff) + disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) + disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint, SplitIter); + else + disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + return disproven || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); + } + if (SrcAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstConst = Dst; + const Loop *CurLoop = SrcAddRec->getLoop(); + Level = mapSrcLoop(CurLoop); + return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + if (DstAddRec) { + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const SCEV *SrcConst = Src; + const Loop *CurLoop = DstAddRec->getLoop(); + Level = mapDstLoop(CurLoop); + return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, + CurLoop, Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + llvm_unreachable("SIV test expected at least one AddRec"); + return false; +} + + +// testRDIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables, c1 and c2 are loop invariant, +// and a1 and a2 are constant, we can solve it exactly with an easy adaptation +// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. +// It doesn't make sense to talk about distance or direction in this case, +// so there's no point in making special versions of the Strong SIV test or +// the Weak-crossing SIV test. +// +// With minor algebra, this test can also be used for things like +// [c1 + a1*i + a2*j][c2]. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testRDIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + // we have 3 possible situations here: + // 1) [a*i + b] and [c*j + d] + // 2) [a*i + c*j + b] and [d] + // 3) [b] and [a*i + c*j + d] + // We need to find what we've got and get organized + + const SCEV *SrcConst, *DstConst; + const SCEV *SrcCoeff, *DstCoeff; + const Loop *SrcLoop, *DstLoop; + + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); + if (SrcAddRec && DstAddRec) { + SrcConst = SrcAddRec->getStart(); + SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + SrcLoop = SrcAddRec->getLoop(); + DstConst = DstAddRec->getStart(); + DstCoeff = DstAddRec->getStepRecurrence(*SE); + DstLoop = DstAddRec->getLoop(); + } + else if (SrcAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { + SrcConst = tmpAddRec->getStart(); + SrcCoeff = tmpAddRec->getStepRecurrence(*SE); + SrcLoop = tmpAddRec->getLoop(); + DstConst = Dst; + DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); + DstLoop = SrcAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else if (DstAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { + DstConst = tmpAddRec->getStart(); + DstCoeff = tmpAddRec->getStepRecurrence(*SE); + DstLoop = tmpAddRec->getLoop(); + SrcConst = Src; + SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); + SrcLoop = DstAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else + llvm_unreachable("RDIV expected at least one AddRec"); + return exactRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop, + Result) || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop); +} + + +// Tests the single-subscript MIV pair (Src and Dst) for dependence. +// Return true if dependence disproved. +// Can sometimes refine direction vectors. +bool DependenceAnalysis::testMIV(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + Result.Consistent = false; + return gcdMIVtest(Src, Dst, Result) || + banerjeeMIVtest(Src, Dst, Loops, Result); +} + + +// Given a product, e.g., 10*X*Y, returns the first constant operand, +// in this case 10. If there is no constant part, returns NULL. +static +const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { + for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { + if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) + return Constant; + } + return NULL; +} + + +//===----------------------------------------------------------------------===// +// gcdMIVtest - +// Tests an MIV subscript pair for dependence. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistant. +// Can sometimes disprove the equal direction for 1 or more loops, +// as discussed in Michael Wolfe's book, +// High Performance Compilers for Parallel Computing, page 235. +// +// We spend some effort (code!) to handle cases like +// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, +// but M and N are just loop-invariant variables. +// This should help us handle linearized subscripts; +// also makes this test a useful backup to the various SIV tests. +// +// It occurs to me that the presence of loop-invariant variables +// changes the nature of the test from "greatest common divisor" +// to "a common divisor!" +bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << "starting gcd\n"); + ++GCDapplications; + unsigned BitWidth = Src->getType()->getIntegerBitWidth(); + APInt RunningGCD = APInt::getNullValue(BitWidth); + + // Examine Src coefficients. + // Compute running GCD and record source constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + const SCEV *Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *SrcConst = Coefficients; + + // Examine Dst coefficients. + // Compute running GCD and record destination constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + Coefficients = Dst; + while (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *DstConst = Coefficients; + + APInt ExtraGCD = APInt::getNullValue(BitWidth); + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << " Delta = " << *Delta << "\n"); + const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); + if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { + // If Delta is a sum of products, we may be able to make further progress. + for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { + const SCEV *Operand = Sum->getOperand(Op); + if (isa<SCEVConstant>(Operand)) { + assert(!Constant && "Surprised to find multiple constants"); + Constant = cast<SCEVConstant>(Operand); + } + else if (isa<SCEVMulExpr>(Operand)) { + // Search for constant operand to participate in GCD; + // If none found; return false. + const SCEVConstant *ConstOp = + getConstantPart(cast<SCEVMulExpr>(Operand)); + APInt ConstOpValue = ConstOp->getValue()->getValue(); + ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, + ConstOpValue.abs()); + } + else + return false; + } + } + if (!Constant) + return false; + APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); + DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); + if (ConstDelta == 0) + return false; + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); + DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); + APInt Remainder = ConstDelta.srem(RunningGCD); + if (Remainder != 0) { + ++GCDindependence; + return true; + } + + // Try to disprove equal directions. + // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], + // the code above can't disprove the dependence because the GCD = 1. + // So we consider what happen if i = i' and what happens if j = j'. + // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], + // which is infeasible, so we can disallow the = direction for the i level. + // Setting j = j' doesn't help matters, so we end up with a direction vector + // of [<>, *] + // + // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], + // we need to remember that the constant part is 5 and the RunningGCD should + // be initialized to ExtraGCD = 30. + DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); + + bool Improved = false; + Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(Coefficients)) { + Coefficients = AddRec->getStart(); + const Loop *CurLoop = AddRec->getLoop(); + RunningGCD = ExtraGCD; + const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); + const SCEV *Inner = Src; + while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { + AddRec = cast<SCEVAddRecExpr>(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + ; // SrcCoeff == Coeff + else { + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast<SCEVConstant>(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Inner = Dst; + while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { + AddRec = cast<SCEVAddRecExpr>(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + DstCoeff = Coeff; + else { + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast<SCEVConstant>(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); + if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else if (isa<SCEVConstant>(Delta)) + Constant = cast<SCEVConstant>(Delta); + else { + // The difference of the two coefficients might not be a product + // or constant, in which case we give up on this direction. + continue; + } + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); + if (RunningGCD != 0) { + Remainder = ConstDelta.srem(RunningGCD); + DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); + if (Remainder != 0) { + unsigned Level = mapSrcLoop(CurLoop); + Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); + Improved = true; + } + } + } + if (Improved) + ++GCDsuccesses; + DEBUG(dbgs() << "all done\n"); + return false; +} + + +//===----------------------------------------------------------------------===// +// banerjeeMIVtest - +// Use Banerjee's Inequalities to test an MIV subscript pair. +// (Wolfe, in the race-car book, calls this the Extreme Value Test.) +// Generally follows the discussion in Section 2.5.2 of +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// +// The inequalities given on page 25 are simplified in that loops are +// normalized so that the lower bound is always 0 and the stride is always 1. +// For example, Wolfe gives +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// where A_k is the coefficient of the kth index in the source subscript, +// B_k is the coefficient of the kth index in the destination subscript, +// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth +// index, and N_k is the stride of the kth index. Since all loops are normalized +// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the +// equation to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 +// = (A^-_k - B_k)^- (U_k - 1) - B_k +// +// Similar simplifications are possible for the other equations. +// +// When we can't determine the number of iterations for a loop, +// we use NULL as an indicator for the worst case, infinity. +// When computing the upper bound, NULL denotes +inf; +// for the lower bound, NULL denotes -inf. +// +// Return true if dependence disproved. +bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << "starting Banerjee\n"); + ++BanerjeeApplications; + DEBUG(dbgs() << " Src = " << *Src << '\n'); + const SCEV *A0; + CoefficientInfo *A = collectCoeffInfo(Src, true, A0); + DEBUG(dbgs() << " Dst = " << *Dst << '\n'); + const SCEV *B0; + CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); + BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; + const SCEV *Delta = SE->getMinusSCEV(B0, A0); + DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); + + // Compute bounds for all the * directions. + DEBUG(dbgs() << "\tBounds[*]\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; + Bound[K].Direction = Dependence::DVEntry::ALL; + Bound[K].DirSet = Dependence::DVEntry::NONE; + findBoundsALL(A, B, Bound, K); +#ifndef NDEBUG + DEBUG(dbgs() << "\t " << K << '\t'); + if (Bound[K].Lower[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[K].Upper[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + // Test the *, *, *, ... case. + bool Disproved = false; + if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { + // Explore the direction vector hierarchy. + unsigned DepthExpanded = 0; + unsigned NewDeps = exploreDirections(1, A, B, Bound, + Loops, DepthExpanded, Delta); + if (NewDeps > 0) { + bool Improved = false; + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + unsigned Old = Result.DV[K - 1].Direction; + Result.DV[K - 1].Direction = Old & Bound[K].DirSet; + Improved |= Old != Result.DV[K - 1].Direction; + if (!Result.DV[K - 1].Direction) { + Improved = false; + Disproved = true; + break; + } + } + } + if (Improved) + ++BanerjeeSuccesses; + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + delete [] Bound; + delete [] A; + delete [] B; + return Disproved; +} + + +// Hierarchically expands the direction vector +// search space, combining the directions of discovered dependences +// in the DirSet field of Bound. Returns the number of distinct +// dependences discovered. If the dependence is disproved, +// it will return 0. +unsigned DependenceAnalysis::exploreDirections(unsigned Level, + CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + const SmallBitVector &Loops, + unsigned &DepthExpanded, + const SCEV *Delta) const { + if (Level > CommonLevels) { + // record result + DEBUG(dbgs() << "\t["); + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + Bound[K].DirSet |= Bound[K].Direction; +#ifndef NDEBUG + switch (Bound[K].Direction) { + case Dependence::DVEntry::LT: + DEBUG(dbgs() << " <"); + break; + case Dependence::DVEntry::EQ: + DEBUG(dbgs() << " ="); + break; + case Dependence::DVEntry::GT: + DEBUG(dbgs() << " >"); + break; + case Dependence::DVEntry::ALL: + DEBUG(dbgs() << " *"); + break; + default: + llvm_unreachable("unexpected Bound[K].Direction"); + } +#endif + } + } + DEBUG(dbgs() << " ]\n"); + return 1; + } + if (Loops[Level]) { + if (Level > DepthExpanded) { + DepthExpanded = Level; + // compute bounds for <, =, > at current level + findBoundsLT(A, B, Bound, Level); + findBoundsGT(A, B, Bound, Level); + findBoundsEQ(A, B, Bound, Level); +#ifndef NDEBUG + DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); + DEBUG(dbgs() << "\t <\t"); + if (Bound[Level].Lower[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t =\t"); + if (Bound[Level].Lower[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t >\t"); + if (Bound[Level].Lower[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + unsigned NewDeps = 0; + + // test bounds for <, *, *, ... + if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // Test bounds for =, *, *, ... + if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // test bounds for >, *, *, ... + if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + Bound[Level].Direction = Dependence::DVEntry::ALL; + return NewDeps; + } + else + return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); +} + + +// Returns true iff the current bounds are plausible. +bool DependenceAnalysis::testBounds(unsigned char DirKind, + unsigned Level, + BoundInfo *Bound, + const SCEV *Delta) const { + Bound[Level].Direction = DirKind; + if (const SCEV *LowerBound = getLowerBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) + return false; + if (const SCEV *UpperBound = getUpperBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) + return false; + return true; +} + + +// Computes the upper and lower bounds for level K +// using the * direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k +// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^*_k = (A^-_k - B^+_k)U_k +// UB^*_k = (A^+_k - B^-_k)U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), + Bound[K].Iterations); + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), + Bound[K].Iterations); + } + else { + // If the difference is 0, we won't need to know the number of iterations. + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + } +} + + +// Computes the upper and lower bounds for level K +// using the = direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k +// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^=_k = (A_k - B_k)^- U_k +// UB^=_k = (A_k - B_k)^+ U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + Bound[K].Lower[Dependence::DVEntry::EQ] = + SE->getMulExpr(NegativePart, Bound[K].Iterations); + const SCEV *PositivePart = getPositivePart(Delta); + Bound[K].Upper[Dependence::DVEntry::EQ] = + SE->getMulExpr(PositivePart, Bound[K].Iterations); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + if (NegativePart->isZero()) + Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero + const SCEV *PositivePart = getPositivePart(Delta); + if (PositivePart->isZero()) + Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero + } +} + + +// Computes the upper and lower bounds for level K +// using the < direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + Bound[K].Lower[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + Bound[K].Upper[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + } +} + + +// Computes the upper and lower bounds for level K +// using the > direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + Bound[K].Lower[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + Bound[K].Upper[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; + const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; + } +} + + +// X^+ = max(X, 0) +const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { + return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// X^- = min(X, 0) +const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { + return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// Walks through the subscript, +// collecting each coefficient, the associated loop bounds, +// and recording its positive and negative parts for later use. +DependenceAnalysis::CoefficientInfo * +DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, + bool SrcFlag, + const SCEV *&Constant) const { + const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); + CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; + for (unsigned K = 1; K <= MaxLevels; ++K) { + CI[K].Coeff = Zero; + CI[K].PosPart = Zero; + CI[K].NegPart = Zero; + CI[K].Iterations = NULL; + } + while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { + const Loop *L = AddRec->getLoop(); + unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); + CI[K].Coeff = AddRec->getStepRecurrence(*SE); + CI[K].PosPart = getPositivePart(CI[K].Coeff); + CI[K].NegPart = getNegativePart(CI[K].Coeff); + CI[K].Iterations = collectUpperBound(L, Subscript->getType()); + Subscript = AddRec->getStart(); + } + Constant = Subscript; +#ifndef NDEBUG + DEBUG(dbgs() << "\tCoefficient Info\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); + DEBUG(dbgs() << "\tPos Part = "); + DEBUG(dbgs() << *CI[K].PosPart); + DEBUG(dbgs() << "\tNeg Part = "); + DEBUG(dbgs() << *CI[K].NegPart); + DEBUG(dbgs() << "\tUpper Bound = "); + if (CI[K].Iterations) + DEBUG(dbgs() << *CI[K].Iterations); + else + DEBUG(dbgs() << "+inf"); + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); +#endif + return CI; +} + + +// Looks through all the bounds info and +// computes the lower bound given the current direction settings +// at each level. If the lower bound for any level is -inf, +// the result is -inf. +const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Lower[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +// Looks through all the bounds info and +// computes the upper bound given the current direction settings +// at each level. If the upper bound at any level is +inf, +// the result is +inf. +const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Upper[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +//===----------------------------------------------------------------------===// +// Constraint manipulation for Delta test. + +// Given a linear SCEV, +// return the coefficient (the step) +// corresponding to the specified loop. +// If there isn't one, return 0. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield b. +const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); + if (!AddRec) + return SE->getConstant(Expr->getType(), 0); + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStepRecurrence(*SE); + return findCoefficient(AddRec->getStart(), TargetLoop); +} + + +// Given a linear SCEV, +// return the SCEV given by zeroing out the coefficient +// corresponding to the specified loop. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield a*i + c*k. +const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); + if (!AddRec) + return Expr; // ignore + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStart(); + return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Given a linear SCEV Expr, +// return the SCEV given by adding some Value to the +// coefficient corresponding to the specified TargetLoop. +// For example, given a*i + b*j + c*k, adding 1 to the coefficient +// corresponding to the j loop would yield a*i + (b+1)*j + c*k. +const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, + const Loop *TargetLoop, + const SCEV *Value) const { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); + if (!AddRec) // create a new addRec + return SE->getAddRecExpr(Expr, + Value, + TargetLoop, + SCEV::FlagAnyWrap); // Worst case, with no info. + if (AddRec->getLoop() == TargetLoop) { + const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); + if (Sum->isZero()) + return AddRec->getStart(); + return SE->getAddRecExpr(AddRec->getStart(), + Sum, + AddRec->getLoop(), + AddRec->getNoWrapFlags()); + } + return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), + TargetLoop, Value), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Review the constraints, looking for opportunities +// to simplify a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +// Corresponds to Figure 5 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::propagate(const SCEV *&Src, + const SCEV *&Dst, + SmallBitVector &Loops, + SmallVector<Constraint, 4> &Constraints, + bool &Consistent) { + bool Result = false; + for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { + DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); + DEBUG(Constraints[LI].dump(dbgs())); + if (Constraints[LI].isDistance()) + Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isLine()) + Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isPoint()) + Result |= propagatePoint(Src, Dst, Constraints[LI]); + } + return Result; +} + + +// Attempt to propagate a distance +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateDistance(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + if (A_K->isZero()) + return false; + const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); + Src = SE->getMinusSCEV(Src, DA_K); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + return true; +} + + +// Attempt to propagate a line +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateLine(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A = CurConstraint.getA(); + const SCEV *B = CurConstraint.getB(); + const SCEV *C = CurConstraint.getC(); + DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); + DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); + if (A->isZero()) { + const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); + const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); + if (!Bconst || !Cconst) return false; + APInt Beta = Bconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivB = Charlie.sdiv(Beta); + assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Dst = zeroCoefficient(Dst, CurLoop); + if (!findCoefficient(Src, CurLoop)->isZero()) + Consistent = false; + } + else if (B->isZero()) { + const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); + const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { + const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); + const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, A_K); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else { + // paper is incorrect here, or perhaps just misleading + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getMulExpr(Src, A); + Dst = SE->getMulExpr(Dst, A); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); + return true; +} + + +// Attempt to propagate a point +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +bool DependenceAnalysis::propagatePoint(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); + const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = zeroCoefficient(Dst, CurLoop); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + return true; +} + + +// Update direction vector entry based on the current constraint. +void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, + const Constraint &CurConstraint + ) const { + DEBUG(dbgs() << "\tUpdate direction, constraint ="); + DEBUG(CurConstraint.dump(dbgs())); + if (CurConstraint.isAny()) + ; // use defaults + else if (CurConstraint.isDistance()) { + // this one is consistent, the others aren't + Level.Scalar = false; + Level.Distance = CurConstraint.getD(); + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!SE->isKnownNonZero(Level.Distance)) // if may be zero + NewDirection = Dependence::DVEntry::EQ; + if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive + NewDirection |= Dependence::DVEntry::LT; + if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else if (CurConstraint.isLine()) { + Level.Scalar = false; + Level.Distance = NULL; + // direction should be accurate + } + else if (CurConstraint.isPoint()) { + Level.Scalar = false; + Level.Distance = NULL; + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!isKnownPredicate(CmpInst::ICMP_NE, + CurConstraint.getY(), + CurConstraint.getX())) + // if X may be = Y + NewDirection |= Dependence::DVEntry::EQ; + if (!isKnownPredicate(CmpInst::ICMP_SLE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be > X + NewDirection |= Dependence::DVEntry::LT; + if (!isKnownPredicate(CmpInst::ICMP_SGE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be < X + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else + llvm_unreachable("constraint has unexpected kind"); +} + + +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +// For debugging purposes, dump a small bit vector to dbgs(). +static void dumpSmallBitVector(SmallBitVector &BV) { + dbgs() << "{"; + for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { + dbgs() << VI; + if (BV.find_next(VI) >= 0) + dbgs() << ' '; + } + dbgs() << "}\n"; +} +#endif + + +// depends - +// Returns NULL if there is no dependence. +// Otherwise, return a Dependence with as many details as possible. +// Corresponds to Section 3.1 in the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// Care is required to keep the code below up to date w.r.t. this routine. +Dependence *DependenceAnalysis::depends(const Instruction *Src, + const Instruction *Dst, + bool PossiblyLoopIndependent) { + if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || + (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) + // if both instructions don't reference memory, there's no dependence + return NULL; + + if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) + // can only analyze simple loads and stores, i.e., no calls, invokes, etc. + return new Dependence(Src, Dst); + + const Value *SrcPtr = getPointerOperand(Src); + const Value *DstPtr = getPointerOperand(Dst); + + switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + // cannot analyse objects if we don't understand their aliasing. + return new Dependence(Src, Dst); + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + return NULL; + case AliasAnalysis::MustAlias: + break; // The underlying objects alias; test accesses for dependence. + } + + const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); + const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); + if (!SrcGEP || !DstGEP) + return new Dependence(Src, Dst); // missing GEP, assume dependence + + if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType()) + return new Dependence(Src, Dst); // different types, assume dependence + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + + FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); + ++TotalArrayPairs; + + // classify subscript pairs + unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + SmallVector<Subscript, 4> Pair(Pairs); + for (unsigned SI = 0; SI < Pairs; ++SI) { + Pair[SI].Loops.resize(MaxLevels + 1); + Pair[SI].GroupLoops.resize(MaxLevels + 1); + Pair[SI].Group.resize(Pairs); + } + Pairs = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(), + DstEnd = DstGEP->idx_end(); + SrcIdx != SrcEnd && DstIdx != DstEnd; + ++SrcIdx, ++DstIdx, ++Pairs) { + Pair[Pairs].Src = SE->getSCEV(*SrcIdx); + Pair[Pairs].Dst = SE->getSCEV(*DstIdx); + removeMatchingExtensions(&Pair[Pairs]); + Pair[Pairs].Classification = + classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), + Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), + Pair[Pairs].Loops); + Pair[Pairs].GroupLoops = Pair[Pairs].Loops; + Pair[Pairs].Group.set(Pairs); + DEBUG(dbgs() << " subscript " << Pairs << "\n"); + DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n"); + DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n"); + DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n"); + DEBUG(dbgs() << "\tloops = "); + DEBUG(dumpSmallBitVector(Pair[Pairs].Loops)); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // Partition subscripts into separable and minimally-coupled groups + // Algorithm in paper is algorithmically better; + // this may be faster in practice. Check someday. + // + // Here's an example of how it works. Consider this code: + // + // for (i = ...) { + // for (j = ...) { + // for (k = ...) { + // for (l = ...) { + // for (m = ...) { + // A[i][j][k][m] = ...; + // ... = A[0][j][l][i + j]; + // } + // } + // } + // } + // } + // + // There are 4 subscripts here: + // 0 [i] and [0] + // 1 [j] and [j] + // 2 [k] and [l] + // 3 [m] and [i + j] + // + // We've already classified each subscript pair as ZIV, SIV, etc., + // and collected all the loops mentioned by pair P in Pair[P].Loops. + // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops + // and set Pair[P].Group = {P}. + // + // Src Dst Classification Loops GroupLoops Group + // 0 [i] [0] SIV {1} {1} {0} + // 1 [j] [j] SIV {2} {2} {1} + // 2 [k] [l] RDIV {3,4} {3,4} {2} + // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} + // + // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. + // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. + // + // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. + // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. + // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, + // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added + // to either Separable or Coupled). + // + // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. + // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, + // so Pair[3].Group = {0, 1, 3} and Done = false. + // + // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. + // Since Done remains true, we add 2 to the set of Separable pairs. + // + // Finally, we consider 3. There's nothing to compare it with, + // so Done remains true and we add it to the Coupled set. + // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. + // + // In the end, we've got 1 separable subscript and 1 coupled group. + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + ++NonlinearSubscriptPairs; + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) { + // always separable + Separable.set(SI); + } + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) { + Separable.set(SI); + ++SeparableSubscriptPairs; + } + else { + Coupled.set(SI); + ++CoupledSubscriptPairs; + } + } + } + } + + DEBUG(dbgs() << " Separable = "); + DEBUG(dumpSmallBitVector(Separable)); + DEBUG(dbgs() << " Coupled = "); + DEBUG(dumpSmallBitVector(Coupled)); + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + DEBUG(dbgs() << "testing subscript " << SI); + switch (Pair[SI].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << ", ZIV\n"); + if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::SIV: { + DEBUG(dbgs() << ", SIV\n"); + unsigned Level; + const SCEV *SplitIter = NULL; + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + break; + } + case Subscript::RDIV: + DEBUG(dbgs() << ", RDIV\n"); + if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::MIV: + DEBUG(dbgs() << ", MIV\n"); + if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) + return NULL; + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + DEBUG(dbgs() << "starting on coupled subscripts\n"); + DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); + SmallVector<Constraint, 4> Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + DEBUG(dbgs() << "testing subscript group " << SI << " { "); + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + DEBUG(dbgs() << SJ << " "); + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + DEBUG(dbgs() << "}\n"); + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + DEBUG(dbgs() << "SIV\n"); + if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) { + if (Constraints[Level].isEmpty()) { + ++DeltaIndependence; + return NULL; + } + Changed = true; + } + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + DEBUG(dbgs() << " propagating\n"); + DEBUG(dbgs() << "\tMivs = "); + DEBUG(dumpSmallBitVector(Mivs)); + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, + Constraints, Result.Consistent)) { + DEBUG(dbgs() << "\t Changed\n"); + ++DeltaPropagations; + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << "ZIV\n"); + if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + + // test & propagate remaining RDIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::RDIV) { + DEBUG(dbgs() << "RDIV test\n"); + if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + // I don't yet understand how to propagate RDIV results + Mivs.reset(SJ); + } + } + + // test remaining MIVs + // This code is temporary. + // Better to somehow test all remaining subscripts simultaneously. + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::MIV) { + DEBUG(dbgs() << "MIV test\n"); + if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) + return NULL; + } + else + llvm_unreachable("expected only MIV subscripts at this point"); + } + + // update Result.DV from constraint vector + DEBUG(dbgs() << " updating\n"); + for (int SJ = ConstrainedLevels.find_first(); + SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { + updateDirection(Result.DV[SJ - 1], Constraints[SJ]); + if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) + return NULL; + } + } + } + + // make sure Scalar flags are set correctly + SmallBitVector CompleteLoops(MaxLevels + 1); + for (unsigned SI = 0; SI < Pairs; ++SI) + CompleteLoops |= Pair[SI].Loops; + for (unsigned II = 1; II <= CommonLevels; ++II) + if (CompleteLoops[II]) + Result.DV[II - 1].Scalar = false; + + // make sure loopIndepent flag is set correctly + if (PossiblyLoopIndependent) { + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { + Result.LoopIndependent = false; + break; + } + } + } + + FullDependence *Final = new FullDependence(Result); + Result.DV = NULL; + return Final; +} + + + +//===----------------------------------------------------------------------===// +// getSplitIteration - +// Rather than spend rarely-used space recording the splitting iteration +// during the Weak-Crossing SIV test, we re-compute it on demand. +// The re-computation is basically a repeat of the entire dependence test, +// though simplified since we know that the dependence exists. +// It's tedious, since we must go through all propagations, etc. +// +// Care is required to keep this code up to date w.r.t. the code above. +// +// Generally, the dependence analyzer will be used to build +// a dependence graph for a function (basically a map from instructions +// to dependences). Looking for cycles in the graph shows us loops +// that cannot be trivially vectorized/parallelized. +// +// We can try to improve the situation by examining all the dependences +// that make up the cycle, looking for ones we can break. +// Sometimes, peeling the first or last iteration of a loop will break +// dependences, and we've got flags for those possibilities. +// Sometimes, splitting a loop at some other iteration will do the trick, +// and we've got a flag for that case. Rather than waste the space to +// record the exact iteration (since we rarely know), we provide +// a method that calculates the iteration. It's a drag that it must work +// from scratch, but wonderful in that it's possible. +// +// Here's an example: +// +// for (i = 0; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// There's a loop-carried flow dependence from the store to the load, +// found by the weak-crossing SIV test. The dependence will have a flag, +// indicating that the dependence can be broken by splitting the loop. +// Calling getSplitIteration will return 5. +// Splitting the loop breaks the dependence, like so: +// +// for (i = 0; i <= 5; i++) +// A[i] = ... +// ... = A[11 - i] +// for (i = 6; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// breaks the dependence and allows us to vectorize/parallelize +// both loops. +const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, + unsigned SplitLevel) { + assert(Dep && "expected a pointer to a Dependence"); + assert(Dep->isSplitable(SplitLevel) && + "Dep should be splitable at SplitLevel"); + const Instruction *Src = Dep->getSrc(); + const Instruction *Dst = Dep->getDst(); + assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); + assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); + assert(isLoadOrStore(Src)); + assert(isLoadOrStore(Dst)); + const Value *SrcPtr = getPointerOperand(Src); + const Value *DstPtr = getPointerOperand(Dst); + assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == + AliasAnalysis::MustAlias); + const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); + const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); + assert(SrcGEP); + assert(DstGEP); + assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()); + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + + FullDependence Result(Src, Dst, false, CommonLevels); + + // classify subscript pairs + unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + SmallVector<Subscript, 4> Pair(Pairs); + for (unsigned SI = 0; SI < Pairs; ++SI) { + Pair[SI].Loops.resize(MaxLevels + 1); + Pair[SI].GroupLoops.resize(MaxLevels + 1); + Pair[SI].Group.resize(Pairs); + } + Pairs = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(), + DstEnd = DstGEP->idx_end(); + SrcIdx != SrcEnd && DstIdx != DstEnd; + ++SrcIdx, ++DstIdx, ++Pairs) { + Pair[Pairs].Src = SE->getSCEV(*SrcIdx); + Pair[Pairs].Dst = SE->getSCEV(*DstIdx); + Pair[Pairs].Classification = + classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), + Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), + Pair[Pairs].Loops); + Pair[Pairs].GroupLoops = Pair[Pairs].Loops; + Pair[Pairs].Group.set(Pairs); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // partition subscripts into separable and minimally-coupled groups + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) + Separable.set(SI); + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) + Separable.set(SI); + else + Coupled.set(SI); + } + } + } + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + switch (Pair[SI].Classification) { + case Subscript::SIV: { + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel) { + assert(SplitIter != NULL); + return SplitIter; + } + break; + } + case Subscript::ZIV: + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + SmallVector<Constraint, 4> Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel && SplitIter) + return SplitIter; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) + Changed = true; + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, + Pair[SJ].Loops, Constraints, Result.Consistent)) { + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + } + } + llvm_unreachable("somehow reached end of routine"); + return NULL; +} diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 5f51f775f14..95e58022ca1 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -243,7 +243,8 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { if (!TD) return false; - unsigned IntPtrWidth = TD->getPointerSizeInBits(); + unsigned AS = GEP.getPointerAddressSpace(); + unsigned IntPtrWidth = TD->getPointerSizeInBits(AS); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -391,7 +392,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + unsigned AS = I.getPointerAddressSpace(); + if (TD && IntegerSize >= TD->getPointerSizeInBits(AS)) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -425,7 +427,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + unsigned AS = I.getAddressSpace(); + if (TD && IntegerSize <= TD->getPointerSizeInBits(AS)) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -760,7 +763,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!TD || !V->getType()->isPointerTy()) return 0; - unsigned IntPtrWidth = TD->getPointerSizeInBits(); + unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();; + unsigned IntPtrWidth = TD->getPointerSizeInBits(AS); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -824,7 +828,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); + unsigned AS = PTy->getAddressSpace(); + unsigned PointerSize = TD->getPointerSizeInBits(AS); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index b3d62487fc1..8e326122fa5 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -666,7 +666,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// 'Offset' APInt must be the bitwidth of the target's pointer size. static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, APInt &Offset) { - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned AS = GEP->getPointerAddressSpace(); + unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); assert(IntPtrWidth == Offset.getBitWidth()); gep_type_iterator GTI = gep_type_begin(GEP); @@ -696,12 +697,14 @@ static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. +/// FIXME: This function also exists in InlineCost.cpp. static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Value *&V) { if (!V->getType()->isPointerTy()) return 0; - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();; + unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -1877,7 +1880,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && - Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + Q.TD->getPointerSizeInBits( + cast<PtrToIntInst>(LI)->getPointerAddressSpace()) == + DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 5e05f4c8ca1..5c2a49e767f 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -19,8 +19,8 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/TargetTransformInfo.h" using namespace llvm; @@ -1599,15 +1599,15 @@ static bool width_descending(Value *lhs, Value *rhs) { /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl<WeakVH> &DeadInsts, - const TargetLowering *TLI) { + SmallVectorImpl<WeakVH> &DeadInsts, + const ScalarTargetTransformInfo *STTI) { // Find integer phis in order of increasing width. SmallVector<PHINode*, 8> Phis; for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast<PHINode>(I); ++I) { Phis.push_back(Phi); } - if (TLI) + if (STTI) std::sort(Phis.begin(), Phis.end(), width_descending); unsigned NumElim = 0; @@ -1624,8 +1624,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && TLI - && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + if (Phi->getType()->isIntegerTy() && STTI && + STTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 951b442b874..1d7f0692cbe 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -40,7 +40,8 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; assert(isa<PointerType>(Ty) && "Expected a pointer type!"); - return TD ? TD->getPointerSizeInBits() : 0; + return TD ? + TD->getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace()) : 0; } static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, @@ -1621,7 +1622,8 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, // Re-sign extend from the pointer size if needed to get overflow edge cases // right. - unsigned PtrSize = TD.getPointerSizeInBits(); + unsigned AS = GEP->getPointerAddressSpace(); + unsigned PtrSize = TD.getPointerSizeInBits(AS); if (PtrSize < 64) Offset = SignExtend64(Offset, PtrSize); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 86bc7aced1e..60e1741d694 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -916,7 +916,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { /// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind /// indicates what kind of attribute list this is: 0: function arg, 1: result, /// 2: function attr. -bool LLParser::ParseOptionalAttrs(Attributes::Builder &B, unsigned AttrKind) { +bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) { LocTy AttrLoc = Lex.getLoc(); bool HaveError = false; @@ -1435,7 +1435,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, // Parse the argument. LocTy ArgLoc; Type *ArgTy = 0; - Attributes::Builder ArgAttrs; + AttrBuilder ArgAttrs; Value *V; if (ParseType(ArgTy, ArgLoc)) return true; @@ -1443,7 +1443,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, // Otherwise, handle normal operands. if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS)) return true; - ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(ArgAttrs))); + ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(), + ArgAttrs))); } Lex.Lex(); // Lex the ')'. @@ -1475,7 +1476,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, } else { LocTy TypeLoc = Lex.getLoc(); Type *ArgTy = 0; - Attributes::Builder Attrs; + AttrBuilder Attrs; std::string Name; if (ParseType(ArgTy) || @@ -1492,7 +1493,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, if (!FunctionType::isValidArgumentType(ArgTy)) return Error(TypeLoc, "invalid type for function argument"); - ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name)); + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, + Attributes::get(ArgTy->getContext(), + Attrs), Name)); while (EatIfPresent(lltok::comma)) { // Handle ... at end of arg list. @@ -1518,7 +1521,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, if (!ArgTy->isFirstClassType()) return Error(TypeLoc, "invalid type for function argument"); - ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name)); + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, + Attributes::get(ArgTy->getContext(), Attrs), + Name)); } } @@ -1542,7 +1547,7 @@ bool LLParser::ParseFunctionType(Type *&Result) { for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { if (!ArgList[i].Name.empty()) return Error(ArgList[i].Loc, "argument name invalid in function type"); - if (ArgList[i].Attrs) + if (ArgList[i].Attrs.hasAttributes()) return Error(ArgList[i].Loc, "argument attributes invalid in function type"); } @@ -2672,7 +2677,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { unsigned Linkage; unsigned Visibility; - Attributes::Builder RetAttrs; + AttrBuilder RetAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc = Lex.getLoc(); @@ -2736,7 +2741,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { SmallVector<ArgInfo, 8> ArgList; bool isVarArg; - Attributes::Builder FuncAttrs; + AttrBuilder FuncAttrs; std::string Section; unsigned Alignment; std::string GC; @@ -2766,7 +2771,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { SmallVector<AttributeWithIndex, 8> Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs))); + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(RetType->getContext(), + RetAttrs))); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { ParamTypeList.push_back(ArgList[i].Ty); @@ -2775,7 +2783,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { } if (FuncAttrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FuncAttrs))); + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attributes::get(RetType->getContext(), + FuncAttrs))); AttrListPtr PAL = AttrListPtr::get(Attrs); @@ -2795,6 +2806,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { ForwardRefVals.find(FunctionName); if (FRVI != ForwardRefVals.end()) { Fn = M->getFunction(FunctionName); + if (!Fn) + return Error(FRVI->second.second, "invalid forward reference to " + "function as global value!"); if (Fn->getType() != PFT) return Error(FRVI->second.second, "invalid forward reference to " "function '" + FunctionName + "' with wrong type!"); @@ -3248,7 +3262,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) { /// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); - Attributes::Builder RetAttrs, FnAttrs; + AttrBuilder RetAttrs, FnAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3294,7 +3308,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // Set up the Attributes for the function. SmallVector<AttributeWithIndex, 8> Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs))); + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(Callee->getContext(), + RetAttrs))); SmallVector<Value*, 8> Args; @@ -3322,7 +3339,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return Error(CallLoc, "not enough parameters specified for call"); if (FnAttrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs))); + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attributes::get(Callee->getContext(), + FnAttrs))); // Finish off the Attributes and check them AttrListPtr PAL = AttrListPtr::get(Attrs); @@ -3647,7 +3667,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { /// ParameterList OptionalAttrs bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { - Attributes::Builder RetAttrs, FnAttrs; + AttrBuilder RetAttrs, FnAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3690,7 +3710,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Set up the Attributes for the function. SmallVector<AttributeWithIndex, 8> Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs))); + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(Callee->getContext(), + RetAttrs))); SmallVector<Value*, 8> Args; @@ -3718,7 +3741,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, return Error(CallLoc, "not enough parameters specified for call"); if (FnAttrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs))); + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attributes::get(Callee->getContext(), + FnAttrs))); // Finish off the Attributes and check them AttrListPtr PAL = AttrListPtr::get(Attrs); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 671eaf64291..c6bbdb27aee 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -175,7 +175,7 @@ namespace llvm { bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalAddrSpace(unsigned &AddrSpace); - bool ParseOptionalAttrs(Attributes::Builder &Attrs, unsigned AttrKind); + bool ParseOptionalAttrs(AttrBuilder &Attrs, unsigned AttrKind); bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); bool ParseOptionalLinkage(unsigned &Linkage) { bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 8860ef065c6..279343c48c6 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -211,7 +211,6 @@ namespace { } /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. - //static inline bool classof(const ConstantPlaceHolder *) { return true; } static bool classof(const Value *V) { return isa<ConstantExpr>(V) && cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1; @@ -477,14 +476,15 @@ bool BitcodeReader::ParseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Attributes ReconstitutedAttr = - Attributes::decodeLLVMAttributesForBitcode(Record[i+1]); + Attributes::decodeLLVMAttributesForBitcode(Context, Record[i+1]); Record[i+1] = ReconstitutedAttr.Raw(); } for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - if (Attributes(Record[i+1]).hasAttributes()) + AttrBuilder B(Record[i+1]); + if (B.hasAttributes()) Attrs.push_back(AttributeWithIndex::get(Record[i], - Attributes(Record[i+1]))); + Attributes::get(Context, B))); } MAttributes.push_back(AttrListPtr::get(Attrs)); @@ -891,9 +891,9 @@ bool BitcodeReader::ParseMetadata() { } } -/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in +/// decodeSignRotatedValue - Decode a signed value stored with the sign bit in /// the LSB for dense VBR encoding. -static uint64_t DecodeSignRotatedValue(uint64_t V) { +uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { if ((V & 1) == 0) return V >> 1; if (V != 1) @@ -943,7 +943,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() { static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) { SmallVector<uint64_t, 8> Words(Vals.size()); std::transform(Vals.begin(), Vals.end(), Words.begin(), - DecodeSignRotatedValue); + BitcodeReader::decodeSignRotatedValue); return APInt(TypeBits, Words); } @@ -997,7 +997,7 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_INTEGER: // INTEGER: [intval] if (!CurTy->isIntegerTy() || Record.empty()) return Error("Invalid CST_INTEGER record"); - V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0])); + V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!CurTy->isIntegerTy() || Record.empty()) @@ -1524,13 +1524,22 @@ bool BitcodeReader::ParseModule(bool Resume) { // Read a record. switch (Stream.ReadRecord(Code, Record)) { default: break; // Default behavior, ignore unknown content. - case bitc::MODULE_CODE_VERSION: // VERSION: [version#] + case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) return Error("Malformed MODULE_CODE_VERSION"); - // Only version #0 is supported so far. - if (Record[0] != 0) - return Error("Unknown bitstream version!"); + // Only version #0 and #1 are supported so far. + unsigned module_version = Record[0]; + switch (module_version) { + default: return Error("Unknown bitstream version!"); + case 0: + UseRelativeIDs = false; + break; + case 1: + UseRelativeIDs = true; + break; + } break; + } case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) @@ -1797,13 +1806,6 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { // Read a record. switch (Stream.ReadRecord(Code, Record)) { default: break; // Default behavior, ignore unknown content. - case bitc::MODULE_CODE_VERSION: // VERSION: [version#] - if (Record.size() < 1) - return Error("Malformed MODULE_CODE_VERSION"); - // Only version #0 is supported so far. - if (Record[0] != 0) - return Error("Unknown bitstream version!"); - break; case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) @@ -2016,7 +2018,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - getValue(Record, OpNum, LHS->getType(), RHS) || + popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 > Record.size()) return Error("Invalid BINOP record"); @@ -2131,8 +2133,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || - getValue(Record, OpNum, TrueVal->getType(), FalseVal) || - getValue(Record, OpNum, Type::getInt1Ty(Context), Cond)) + popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || + popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond)) return Error("Invalid SELECT record"); I = SelectInst::Create(Cond, TrueVal, FalseVal); @@ -2146,7 +2148,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || - getValue(Record, OpNum, TrueVal->getType(), FalseVal) || + popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || getValueTypePair(Record, OpNum, NextValueNo, Cond)) return Error("Invalid SELECT record"); @@ -2171,7 +2173,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - getValue(Record, OpNum, Type::getInt32Ty(Context), Idx)) + popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) return Error("Invalid EXTRACTELT record"); I = ExtractElementInst::Create(Vec, Idx); InstructionList.push_back(I); @@ -2182,9 +2184,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Vec, *Elt, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast<VectorType>(Vec->getType())->getElementType(), Elt) || - getValue(Record, OpNum, Type::getInt32Ty(Context), Idx)) + popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) return Error("Invalid INSERTELT record"); I = InsertElementInst::Create(Vec, Elt, Idx); InstructionList.push_back(I); @@ -2195,7 +2197,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Vec1, *Vec2, *Mask; if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || - getValue(Record, OpNum, Vec1->getType(), Vec2)) + popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2)) return Error("Invalid SHUFFLEVEC record"); if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) @@ -2215,7 +2217,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - getValue(Record, OpNum, LHS->getType(), RHS) || + popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 != Record.size()) return Error("Invalid CMP record"); @@ -2260,7 +2262,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } else { BasicBlock *FalseDest = getBasicBlock(Record[1]); - Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context)); + Value *Cond = getValue(Record, 2, NextValueNo, + Type::getInt1Ty(Context)); if (FalseDest == 0 || Cond == 0) return Error("Invalid BR record"); I = BranchInst::Create(TrueDest, FalseDest, Cond); @@ -2276,7 +2279,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Type *OpTy = getTypeByID(Record[1]); unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth(); - Value *Cond = getFnValueByID(Record[2], OpTy); + Value *Cond = getValue(Record, 2, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[3]); if (OpTy == 0 || Cond == 0 || Default == 0) return Error("Invalid SWITCH record"); @@ -2331,7 +2334,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() < 3 || (Record.size() & 1) == 0) return Error("Invalid SWITCH record"); Type *OpTy = getTypeByID(Record[0]); - Value *Cond = getFnValueByID(Record[1], OpTy); + Value *Cond = getValue(Record, 1, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[2]); if (OpTy == 0 || Cond == 0 || Default == 0) return Error("Invalid SWITCH record"); @@ -2355,7 +2358,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() < 2) return Error("Invalid INDIRECTBR record"); Type *OpTy = getTypeByID(Record[0]); - Value *Address = getFnValueByID(Record[1], OpTy); + Value *Address = getValue(Record, 1, NextValueNo, OpTy); if (OpTy == 0 || Address == 0) return Error("Invalid INDIRECTBR record"); unsigned NumDests = Record.size()-2; @@ -2397,7 +2400,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { SmallVector<Value*, 16> Ops; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { - Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); + Ops.push_back(getValue(Record, OpNum, NextValueNo, + FTy->getParamType(i))); if (Ops.back() == 0) return Error("Invalid INVOKE record"); } @@ -2444,7 +2448,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(PN); for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { - Value *V = getFnValueByID(Record[1+i], Ty); + Value *V; + // With the new function encoding, it is possible that operands have + // negative IDs (for forward references). Use a signed VBR + // representation to keep the encoding small. + if (UseRelativeIDs) + V = getValueSigned(Record, 1+i, NextValueNo, Ty); + else + V = getValue(Record, 1+i, NextValueNo, Ty); BasicBlock *BB = getBasicBlock(Record[2+i]); if (!V || !BB) return Error("Invalid PHI record"); PN->addIncoming(V, BB); @@ -2542,7 +2553,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast<PointerType>(Ptr->getType())->getElementType(), Val) || OpNum+2 != Record.size()) return Error("Invalid STORE record"); @@ -2556,7 +2567,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast<PointerType>(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) return Error("Invalid STOREATOMIC record"); @@ -2579,9 +2590,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Ptr, *Cmp, *New; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast<PointerType>(Ptr->getType())->getElementType(), Cmp) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast<PointerType>(Ptr->getType())->getElementType(), New) || OpNum+3 != Record.size()) return Error("Invalid CMPXCHG record"); @@ -2599,7 +2610,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Ptr, *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast<PointerType>(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) return Error("Invalid ATOMICRMW record"); @@ -2653,7 +2664,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (FTy->getParamType(i)->isLabelTy()) Args.push_back(getBasicBlock(Record[OpNum])); else - Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); + Args.push_back(getValue(Record, OpNum, NextValueNo, + FTy->getParamType(i))); if (Args.back() == 0) return Error("Invalid CALL record"); } @@ -2682,7 +2694,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() < 3) return Error("Invalid VAARG record"); Type *OpTy = getTypeByID(Record[0]); - Value *Op = getFnValueByID(Record[1], OpTy); + Value *Op = getValue(Record, 1, NextValueNo, OpTy); Type *ResTy = getTypeByID(Record[2]); if (!OpTy || !Op || !ResTy) return Error("Invalid VAARG record"); diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index e7c4e94f785..3d5c0eb4def 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -179,18 +179,27 @@ class BitcodeReader : public GVMaterializer { typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy; DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs; + /// UseRelativeIDs - Indicates that we are using a new encoding for + /// instruction operands where most operands in the current + /// FUNCTION_BLOCK are encoded relative to the instruction number, + /// for a more compact encoding. Some instruction operands are not + /// relative to the instruction ID: basic block numbers, and types. + /// Once the old style function blocks have been phased out, we would + /// not need this flag. + bool UseRelativeIDs; + public: explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false), ErrorString(0), ValueList(C), MDValueList(C), - SeenFirstFunctionBody(false) { + SeenFirstFunctionBody(false), UseRelativeIDs(false) { } explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) : Context(C), TheModule(0), Buffer(0), BufferOwned(false), LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), ErrorString(0), ValueList(C), MDValueList(C), - SeenFirstFunctionBody(false) { + SeenFirstFunctionBody(false), UseRelativeIDs(false) { } ~BitcodeReader() { FreeState(); @@ -223,6 +232,9 @@ public: /// @brief Cheap mechanism to just extract module triple /// @returns true if an error occurred. bool ParseTriple(std::string &Triple); + + static uint64_t decodeSignRotatedValue(uint64_t V); + private: Type *getTypeByID(unsigned ID); Value *getFnValueByID(unsigned ID, Type *Ty) { @@ -247,6 +259,9 @@ private: unsigned InstNum, Value *&ResVal) { if (Slot == Record.size()) return true; unsigned ValNo = (unsigned)Record[Slot++]; + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; if (ValNo < InstNum) { // If this is not a forward reference, just return the value we already // have. @@ -255,20 +270,54 @@ private: } else if (Slot == Record.size()) { return true; } - + unsigned TypeNo = (unsigned)Record[Slot++]; ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); return ResVal == 0; } - bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot, - Type *Ty, Value *&ResVal) { - if (Slot == Record.size()) return true; - unsigned ValNo = (unsigned)Record[Slot++]; - ResVal = getFnValueByID(ValNo, Ty); + + /// popValue - Read a value out of the specified record from slot 'Slot'. + /// Increment Slot past the number of slots used by the value in the record. + /// Return true if there is an error. + bool popValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot, + unsigned InstNum, Type *Ty, Value *&ResVal) { + if (getValue(Record, Slot, InstNum, Ty, ResVal)) + return true; + // All values currently take a single record slot. + ++Slot; + return false; + } + + /// getValue -- Like popValue, but does not increment the Slot number. + bool getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot, + unsigned InstNum, Type *Ty, Value *&ResVal) { + ResVal = getValue(Record, Slot, InstNum, Ty); return ResVal == 0; } - + /// getValue -- Version of getValue that returns ResVal directly, + /// or 0 if there is an error. + Value *getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot, + unsigned InstNum, Type *Ty) { + if (Slot == Record.size()) return 0; + unsigned ValNo = (unsigned)Record[Slot]; + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; + return getFnValueByID(ValNo, Ty); + } + + /// getValueSigned -- Like getValue, but decodes signed VBRs. + Value *getValueSigned(SmallVector<uint64_t, 64> &Record, unsigned Slot, + unsigned InstNum, Type *Ty) { + if (Slot == Record.size()) return 0; + unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; + return getFnValueByID(ValNo, Ty); + } + bool ParseModule(bool Resume); bool ParseAttributeBlock(); bool ParseTypeTable(); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index b3f1bb13a9f..60c657ae6dd 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -41,8 +41,6 @@ EnablePreserveUseListOrdering("enable-bc-uselist-preserve", /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { - CurVersion = 0, - // VALUE_SYMTAB_BLOCK abbrev id's. VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV, VST_ENTRY_7_ABBREV, @@ -722,16 +720,20 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { Stream.ExitBlock(); } +static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) { + if ((int64_t)V >= 0) + Vals.push_back(V << 1); + else + Vals.push_back((-V << 1) | 1); +} + static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals, unsigned &Code, unsigned &AbbrevToUse, const APInt &Val, bool EmitSizeForWideNumbers = false ) { if (Val.getBitWidth() <= 64) { uint64_t V = Val.getSExtValue(); - if ((int64_t)V >= 0) - Vals.push_back(V << 1); - else - Vals.push_back((-V << 1) | 1); + emitSignedInt64(Vals, V); Code = bitc::CST_CODE_INTEGER; AbbrevToUse = CONSTANTS_INTEGER_ABBREV; } else { @@ -747,11 +749,7 @@ static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals, const uint64_t *RawWords = Val.getRawData(); for (unsigned i = 0; i != NWords; ++i) { - int64_t V = RawWords[i]; - if (V >= 0) - Vals.push_back(V << 1); - else - Vals.push_back((-V << 1) | 1); + emitSignedInt64(Vals, RawWords[i]); } Code = bitc::CST_CODE_WIDE_INTEGER; } @@ -1025,12 +1023,13 @@ static void WriteModuleConstants(const ValueEnumerator &VE, /// /// This function adds V's value ID to Vals. If the value ID is higher than the /// instruction ID, then it is a forward reference, and it also includes the -/// type ID. +/// type ID. The value ID that is written is encoded relative to the InstID. static bool PushValueAndType(const Value *V, unsigned InstID, SmallVector<unsigned, 64> &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); - Vals.push_back(ValID); + // Make encoding relative to the InstID. + Vals.push_back(InstID - ValID); if (ValID >= InstID) { Vals.push_back(VE.getTypeID(V->getType())); return true; @@ -1038,6 +1037,30 @@ static bool PushValueAndType(const Value *V, unsigned InstID, return false; } +/// pushValue - Like PushValueAndType, but where the type of the value is +/// omitted (perhaps it was already encoded in an earlier operand). +static void pushValue(const Value *V, unsigned InstID, + SmallVector<unsigned, 64> &Vals, + ValueEnumerator &VE) { + unsigned ValID = VE.getValueID(V); + Vals.push_back(InstID - ValID); +} + +static void pushValue64(const Value *V, unsigned InstID, + SmallVector<uint64_t, 128> &Vals, + ValueEnumerator &VE) { + uint64_t ValID = VE.getValueID(V); + Vals.push_back(InstID - ValID); +} + +static void pushValueSigned(const Value *V, unsigned InstID, + SmallVector<uint64_t, 128> &Vals, + ValueEnumerator &VE) { + unsigned ValID = VE.getValueID(V); + int64_t diff = ((int32_t)InstID - (int32_t)ValID); + emitSignedInt64(Vals, diff); +} + /// WriteInstruction - Emit an instruction to the specified stream. static void WriteInstruction(const Instruction &I, unsigned InstID, ValueEnumerator &VE, BitstreamWriter &Stream, @@ -1058,7 +1081,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Code = bitc::FUNC_CODE_INST_BINOP; if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) AbbrevToUse = FUNCTION_INST_BINOP_ABBREV; - Vals.push_back(VE.getValueID(I.getOperand(1))); + pushValue(I.getOperand(1), InstID, Vals, VE); Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode())); uint64_t Flags = GetOptimizationFlags(&I); if (Flags != 0) { @@ -1096,32 +1119,32 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::Select: Code = bitc::FUNC_CODE_INST_VSELECT; PushValueAndType(I.getOperand(1), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(2))); + pushValue(I.getOperand(2), InstID, Vals, VE); PushValueAndType(I.getOperand(0), InstID, Vals, VE); break; case Instruction::ExtractElement: Code = bitc::FUNC_CODE_INST_EXTRACTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); + pushValue(I.getOperand(1), InstID, Vals, VE); break; case Instruction::InsertElement: Code = bitc::FUNC_CODE_INST_INSERTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); - Vals.push_back(VE.getValueID(I.getOperand(2))); + pushValue(I.getOperand(1), InstID, Vals, VE); + pushValue(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ShuffleVector: Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); - Vals.push_back(VE.getValueID(I.getOperand(2))); + pushValue(I.getOperand(1), InstID, Vals, VE); + pushValue(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ICmp: case Instruction::FCmp: // compare returning Int1Ty or vector of Int1Ty Code = bitc::FUNC_CODE_INST_CMP2; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); + pushValue(I.getOperand(1), InstID, Vals, VE); Vals.push_back(cast<CmpInst>(I).getPredicate()); break; @@ -1147,7 +1170,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(VE.getValueID(II.getSuccessor(0))); if (II.isConditional()) { Vals.push_back(VE.getValueID(II.getSuccessor(1))); - Vals.push_back(VE.getValueID(II.getCondition())); + pushValue(II.getCondition(), InstID, Vals, VE); } } break; @@ -1164,7 +1187,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals64.push_back(SwitchRecordHeader); Vals64.push_back(VE.getTypeID(SI.getCondition()->getType())); - Vals64.push_back(VE.getValueID(SI.getCondition())); + pushValue64(SI.getCondition(), InstID, Vals64, VE); Vals64.push_back(VE.getValueID(SI.getDefaultDest())); Vals64.push_back(SI.getNumCases()); for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); @@ -1215,7 +1238,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::IndirectBr: Code = bitc::FUNC_CODE_INST_INDIRECTBR; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) + // Encode the address operand as relative, but not the basic blocks. + pushValue(I.getOperand(0), InstID, Vals, VE); + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i))); break; @@ -1234,7 +1259,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Vals.push_back(VE.getValueID(I.getOperand(i))); // fixed param. + pushValue(I.getOperand(i), InstID, Vals, VE); // fixed param. // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { @@ -1256,12 +1281,19 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::PHI: { const PHINode &PN = cast<PHINode>(I); Code = bitc::FUNC_CODE_INST_PHI; - Vals.push_back(VE.getTypeID(PN.getType())); + // With the newer instruction encoding, forward references could give + // negative valued IDs. This is most common for PHIs, so we use + // signed VBRs. + SmallVector<uint64_t, 128> Vals64; + Vals64.push_back(VE.getTypeID(PN.getType())); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - Vals.push_back(VE.getValueID(PN.getIncomingValue(i))); - Vals.push_back(VE.getValueID(PN.getIncomingBlock(i))); + pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE); + Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); } - break; + // Emit a Vals64 vector and exit. + Stream.EmitRecord(Code, Vals64, AbbrevToUse); + Vals64.clear(); + return; } case Instruction::LandingPad: { @@ -1311,7 +1343,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, else Code = bitc::FUNC_CODE_INST_STORE; PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr - Vals.push_back(VE.getValueID(I.getOperand(0))); // val. + pushValue(I.getOperand(0), InstID, Vals, VE); // val. Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1); Vals.push_back(cast<StoreInst>(I).isVolatile()); if (cast<StoreInst>(I).isAtomic()) { @@ -1322,8 +1354,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::AtomicCmpXchg: Code = bitc::FUNC_CODE_INST_CMPXCHG; PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr - Vals.push_back(VE.getValueID(I.getOperand(1))); // cmp. - Vals.push_back(VE.getValueID(I.getOperand(2))); // newval. + pushValue(I.getOperand(1), InstID, Vals, VE); // cmp. + pushValue(I.getOperand(2), InstID, Vals, VE); // newval. Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile()); Vals.push_back(GetEncodedOrdering( cast<AtomicCmpXchgInst>(I).getOrdering())); @@ -1333,7 +1365,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::AtomicRMW: Code = bitc::FUNC_CODE_INST_ATOMICRMW; PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr - Vals.push_back(VE.getValueID(I.getOperand(1))); // val. + pushValue(I.getOperand(1), InstID, Vals, VE); // val. Vals.push_back(GetEncodedRMWOperation( cast<AtomicRMWInst>(I).getOperation())); Vals.push_back(cast<AtomicRMWInst>(I).isVolatile()); @@ -1358,8 +1390,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee // Emit value #'s for the fixed parameters. - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Vals.push_back(VE.getValueID(CI.getArgOperand(i))); // fixed param. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { + // Check for labels (can happen with asm labels). + if (FTy->getParamType(i)->isLabelTy()) + Vals.push_back(VE.getValueID(CI.getArgOperand(i))); + else + pushValue(CI.getArgOperand(i), InstID, Vals, VE); // fixed param. + } // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { @@ -1372,7 +1409,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::VAArg: Code = bitc::FUNC_CODE_INST_VAARG; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty - Vals.push_back(VE.getValueID(I.getOperand(0))); // valist. + pushValue(I.getOperand(0), InstID, Vals, VE); // valist. Vals.push_back(VE.getTypeID(I.getType())); // restype. break; } @@ -1514,8 +1551,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, // Emit blockinfo, which defines the standard abbreviations etc. static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { // We only want to emit block info records for blocks that have multiple - // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. Other - // blocks can defined their abbrevs inline. + // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. + // Other blocks can define their abbrevs inline. Stream.EnterBlockInfoBlock(2); { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings. @@ -1773,12 +1810,10 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); - // Emit the version number if it is non-zero. - if (CurVersion) { - SmallVector<unsigned, 1> Vals; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); - } + SmallVector<unsigned, 1> Vals; + unsigned CurVersion = 1; + Vals.push_back(CurVersion); + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); // Analyze the module, enumerating globals, functions, etc. ValueEnumerator VE(M); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 205480a4692..7a1c049d522 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -635,7 +635,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( --R; const unsigned NewSuperReg = Order[R]; // Don't consider non-allocatable registers - if (!RegClassInfo.isAllocatable(NewSuperReg)) continue; + if (!MRI.isAllocatable(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -818,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!RegClassInfo.isAllocatable(AntiDepReg)) { + if (!MRI.isAllocatable(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 32ad34a76d6..7cde136c5ef 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -29,6 +29,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg); std::pair<unsigned, unsigned> HintPair = VRM.getRegInfo().getRegAllocationHint(VirtReg); + const MachineRegisterInfo &MRI = VRM.getRegInfo(); // HintPair.second is a register, phys or virt. Hint = HintPair.second; @@ -52,7 +53,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, unsigned *P = new unsigned[Order.size()]; Begin = P; for (unsigned i = 0; i != Order.size(); ++i) - if (!RCI.isReserved(Order[i])) + if (!MRI.isReserved(Order[i])) *P++ = Order[i]; End = P; @@ -69,7 +70,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, // The hint must be a valid physreg for allocation. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || RCI.isReserved(Hint))) + !RC->contains(Hint) || MRI.isReserved(Hint))) Hint = 0; } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 09e30eba579..5162ad762e7 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -314,8 +314,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr) - .removeAttribute(Attributes::NoAlias).hasAttributes()) + if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) != + AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias)) return false; // It's not safe to eliminate the sign / zero extension of the return value. @@ -356,7 +356,7 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if (Attributes::Builder(CallerRetAttr) + if (AttrBuilder(CallerRetAttr) .removeAttribute(Attributes::NoAlias).hasAttributes()) return false; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d74a70362a2..4de98da655b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -385,7 +385,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned PtrSize = TD->getPointerSizeInBits()/8; + unsigned AS = GV->getType()->getAddressSpace(); + unsigned PtrSize = TD->getPointerSizeInBits(AS)/8; OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); @@ -1299,7 +1300,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { // Emit the function pointers in the target-specific order const DataLayout *TD = TM.getDataLayout(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + unsigned Align = Log2_32(TD->getPointerPrefAlignment(0)); std::stable_sort(Structors.begin(), Structors.end(), priority_order); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { const MCSection *OutputSection = @@ -1480,8 +1481,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { if (Offset == 0) return Base; + unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace(); // Truncate/sext the offset to the pointer size. - unsigned Width = TD.getPointerSizeInBits(); + unsigned Width = TD.getPointerSizeInBits(AS); if (Width < 64) Offset = SignExtend64(Offset, Width); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index d94e1fe61bf..6c17af2e8c8 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { switch (Encoding & 0x07) { default: llvm_unreachable("Invalid encoded value."); - case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); + case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(0); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: return 4; case dwarf::DW_EH_PE_udata8: return 8; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 4d73b3c2226..73e18cd817b 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -200,7 +200,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: - Size = Asm->getDataLayout().getPointerSize(); break; + Size = Asm->getDataLayout().getPointerSize(0); break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); @@ -222,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); + case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(0); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -249,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getDataLayout().getPointerSize(0); } #ifndef NDEBUG @@ -273,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getDataLayout().getPointerSize(0); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index f93ea1b045b..28a96f3b2b6 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -214,9 +214,6 @@ namespace llvm { /// virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *) { return true; } - #ifndef NDEBUG virtual void print(raw_ostream &O) = 0; void dump(); @@ -257,7 +254,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEInteger *) { return true; } static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG @@ -286,7 +282,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIELabel *) { return true; } static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG @@ -313,7 +308,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEDelta *) { return true; } static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG @@ -343,7 +337,6 @@ namespace llvm { } // Implement isa/cast/dyncast. - static bool classof(const DIEEntry *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG @@ -383,7 +376,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEBlock *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6acf19ee8c4..df162e07a88 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -384,7 +384,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); + * Asm->getDataLayout().getPointerSize(0)); for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -450,7 +450,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); + * Asm->getDataLayout().getPointerSize(0)); for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -1765,7 +1765,7 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), DwarfAbbrevSectionSym); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0)); emitDIE(Die); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); @@ -1811,14 +1811,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(0); Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0) + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->OutStreamer.AddComment("Section end label"); Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize(), + Asm->getDataLayout().getPointerSize(0), 0/*AddrSpace*/); // Mark end of matrix. @@ -2047,7 +2047,7 @@ void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(); + unsigned char Size = Asm->getDataLayout().getPointerSize(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; for (SmallVector<DotDebugLocEntry, 4>::iterator @@ -2144,7 +2144,7 @@ void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(); + unsigned char Size = Asm->getDataLayout().getPointerSize(0); for (SmallVector<const MCSymbol *, 8>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { @@ -2202,7 +2202,7 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Dwarf Version"); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0)); for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { @@ -2233,7 +2233,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize(),0); + Asm->getDataLayout().getPointerSize(0),0); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 08fb6b3f52c..31d07141a1d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() { // that we're omitting that bit. TTypeEncoding = dwarf::DW_EH_PE_omit; // dwarf::DW_EH_PE_absptr - TypeFormatSize = Asm->getDataLayout().getPointerSize(); + TypeFormatSize = Asm->getDataLayout().getPointerSize(0); } else { // Okay, we have actual filters or typeinfos to emit. As such, we need to // pick a type encoding for them. We're about to emit a list of pointers to diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index f7c011968c2..d0e27d1d04d 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { /// either condition is detected in a function which uses the GC. /// void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(0); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(getModule(), AP, "code_end"); diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index bc5258ef7d8..dee339a4586 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -164,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { continue; float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && LIS.isAllocatable(hint)) + if (hweight > bestPhys && mri.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; } else { if (hweight > bestVirt) diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index cdae33c2441..22b91409240 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, if (MinAlign > (int)Align) Align = MinAlign; MF.getFrameInfo()->ensureMaxAlignment(Align); - TM.getTargetLowering()->HandleByVal(this, Size); + TM.getTargetLowering()->HandleByVal(this, Size, Align); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index a9de1c7490f..377b4712bea 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!RegClassInfo.isAllocatable(AntiDepReg)) + if (!MRI.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.test(AntiDepReg)) diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index b4394e8d56e..8964269dde5 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -33,7 +33,6 @@ namespace { const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; BitVector LivePhysRegs; - BitVector ReservedRegs; public: static char ID; // Pass identification, replacement for typeid @@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. - if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg)) + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; } else { if (!MRI->use_nodbg_empty(Reg)) @@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); - // Treat reserved registers as always live. - ReservedRegs = TRI->getReservedRegs(MF); - // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. @@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = &*I; // Start out assuming that reserved registers are live out of this block. - LivePhysRegs = ReservedRegs; + LivePhysRegs = MRI->getReservedRegs(); // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().isReturn()) diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 141f8edc839..65bc4af99e2 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -110,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { DomTree = &getAnalysis<MachineDominatorTree>(); if (!LRCalc) LRCalc = new LiveRangeCalc(); - AllocatableRegs = TRI->getAllocatableSet(fn); - ReservedRegs = TRI->getReservedRegs(fn); // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); @@ -542,11 +540,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { // Ignore uses of reserved registers. We only track defs of those. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { unsigned Root = *Roots; - if (!isReserved(Root) && !MRI->reg_empty(Root)) + if (!MRI->isReserved(Root) && !MRI->reg_empty(Root)) LRCalc->extendToUses(LI, Root); for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { unsigned Reg = *Supers; - if (!isReserved(Reg) && !MRI->reg_empty(Reg)) + if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) LRCalc->extendToUses(LI, Reg); } } @@ -761,38 +759,41 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, LI->removeRange(Kill, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); - // Find all blocks that are reachable from MBB without leaving VNI's live - // range. - for (df_iterator<MachineBasicBlock*> - I = df_begin(KillMBB), E = df_end(KillMBB); I != E;) { - MachineBasicBlock *MBB = *I; - // KillMBB itself was already handled. - if (MBB == KillMBB) { - ++I; - continue; - } + // Find all blocks that are reachable from KillMBB without leaving VNI's live + // range. It is possible that KillMBB itself is reachable, so start a DFS + // from each successor. + typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy; + VisitedTy Visited; + for (MachineBasicBlock::succ_iterator + SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end(); + SuccI != SuccE; ++SuccI) { + for (df_ext_iterator<MachineBasicBlock*, VisitedTy> + I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited); + I != E;) { + MachineBasicBlock *MBB = *I; + + // Check if VNI is live in to MBB. + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + LiveRangeQuery LRQ(*LI, MBBStart); + if (LRQ.valueIn() != VNI) { + // This block isn't part of the VNI live range. Prune the search. + I.skipChildren(); + continue; + } - // Check if VNI is live in to MBB. - tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); - LiveRangeQuery LRQ(*LI, MBBStart); - if (LRQ.valueIn() != VNI) { - // This block isn't part of the VNI live range. Prune the search. - I.skipChildren(); - continue; - } + // Prune the search if VNI is killed in MBB. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(MBBStart, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + I.skipChildren(); + continue; + } - // Prune the search if VNI is killed in MBB. - if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(MBBStart, LRQ.endPoint()); - if (EndPoints) EndPoints->push_back(LRQ.endPoint()); - I.skipChildren(); - continue; + // VNI is live through MBB. + LI->removeRange(MBBStart, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + ++I; } - - // VNI is live through MBB. - LI->removeRange(MBBStart, MBBEnd); - if (EndPoints) EndPoints->push_back(MBBEnd); - ++I; } } @@ -1007,246 +1008,252 @@ private: LiveIntervals& LIS; const MachineRegisterInfo& MRI; const TargetRegisterInfo& TRI; + SlotIndex OldIdx; SlotIndex NewIdx; - - typedef std::pair<LiveInterval*, LiveRange*> IntRangePair; - typedef DenseSet<IntRangePair> RangeSet; - - struct RegRanges { - LiveRange* Use; - LiveRange* EC; - LiveRange* Dead; - LiveRange* Def; - RegRanges() : Use(0), EC(0), Dead(0), Def(0) {} - }; - typedef DenseMap<unsigned, RegRanges> BundleRanges; + SmallPtrSet<LiveInterval*, 8> Updated; + bool UpdateFlags; public: HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI, - const TargetRegisterInfo& TRI, SlotIndex NewIdx) - : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {} - - // Update intervals for all operands of MI from OldIdx to NewIdx. - // This assumes that MI used to be at OldIdx, and now resides at - // NewIdx. - void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) { - assert(NewIdx != OldIdx && "No-op move? That's a bit strange."); - - // Collect the operands. - RangeSet Entering, Internal, Exiting; - bool hasRegMaskOp = false; - collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - - // To keep the LiveRanges valid within an interval, move the ranges closest - // to the destination first. This prevents ranges from overlapping, to that - // APIs like removeRange still work. - if (NewIdx < OldIdx) { - moveAllEnteringFrom(OldIdx, Entering); - moveAllInternalFrom(OldIdx, Internal); - moveAllExitingFrom(OldIdx, Exiting); - } - else { - moveAllExitingFrom(OldIdx, Exiting); - moveAllInternalFrom(OldIdx, Internal); - moveAllEnteringFrom(OldIdx, Entering); - } - - if (hasRegMaskOp) - updateRegMaskSlots(OldIdx); - -#ifndef NDEBUG - LIValidator validator; - validator = std::for_each(Entering.begin(), Entering.end(), validator); - validator = std::for_each(Internal.begin(), Internal.end(), validator); - validator = std::for_each(Exiting.begin(), Exiting.end(), validator); - assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); -#endif - + const TargetRegisterInfo& TRI, + SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags) + : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx), + UpdateFlags(UpdateFlags) {} + + // FIXME: UpdateFlags is a workaround that creates live intervals for all + // physregs, even those that aren't needed for regalloc, in order to update + // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill + // flags, and postRA passes will use a live register utility instead. + LiveInterval *getRegUnitLI(unsigned Unit) { + if (UpdateFlags) + return &LIS.getRegUnit(Unit); + return LIS.getCachedRegUnit(Unit); } - // Update intervals for all operands of MI to refer to BundleStart's - // SlotIndex. - void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) { - if (MI == BundleStart) - return; // Bundling instr with itself - nothing to do. - - SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI); - assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI && - "SlotIndex <-> Instruction mapping broken for MI"); - - // Collect all ranges already in the bundle. - MachineBasicBlock::instr_iterator BII(BundleStart); - RangeSet Entering, Internal, Exiting; - bool hasRegMaskOp = false; - collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) { - if (&*BII == MI) + /// Update all live ranges touched by MI, assuming a move from OldIdx to + /// NewIdx. + void updateAllRanges(MachineInstr *MI) { + DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); + bool hasRegMask = false; + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (MO->isRegMask()) + hasRegMask = true; + if (!MO->isReg()) continue; - collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - } - - BundleRanges BR = createBundleRanges(Entering, Internal, Exiting); - - Entering.clear(); - Internal.clear(); - Exiting.clear(); - collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - - DEBUG(dbgs() << "Entering: " << Entering.size() << "\n"); - DEBUG(dbgs() << "Internal: " << Internal.size() << "\n"); - DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n"); - - moveAllEnteringFromInto(OldIdx, Entering, BR); - moveAllInternalFromInto(OldIdx, Internal, BR); - moveAllExitingFromInto(OldIdx, Exiting, BR); + // Aggressively clear all kill flags. + // They are reinserted by VirtRegRewriter. + if (MO->isUse()) + MO->setIsKill(false); + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + updateRange(LIS.getInterval(Reg)); + continue; + } -#ifndef NDEBUG - LIValidator validator; - validator = std::for_each(Entering.begin(), Entering.end(), validator); - validator = std::for_each(Internal.begin(), Internal.end(), validator); - validator = std::for_each(Exiting.begin(), Exiting.end(), validator); - assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); -#endif + // For physregs, only update the regunits that actually have a + // precomputed live range. + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = getRegUnitLI(*Units)) + updateRange(*LI); + } + if (hasRegMask) + updateRegMaskSlots(); } private: + /// Update a single live range, assuming an instruction has been moved from + /// OldIdx to NewIdx. + void updateRange(LiveInterval &LI) { + if (!Updated.insert(&LI)) + return; + DEBUG({ + dbgs() << " "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + dbgs() << PrintReg(LI.reg); + else + dbgs() << PrintRegUnit(LI.reg, &TRI); + dbgs() << ":\t" << LI << '\n'; + }); + if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) + handleMoveDown(LI); + else + handleMoveUp(LI); + DEBUG(dbgs() << " -->\t" << LI << '\n'); + LI.verify(); + } + + /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Move def to NewIdx, assert endpoint after NewIdx. + /// + /// 2. Live def at OldIdx, killed at NewIdx: + /// Change to dead def at NewIdx. + /// (Happens when bundling def+kill together). + /// + /// 3. Dead def at OldIdx: + /// Move def to NewIdx, possibly across another live value. + /// + /// 4. Def at OldIdx AND at NewIdx: + /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value read at OldIdx, killed before NewIdx: + /// Extend kill to NewIdx. + /// + void handleMoveDown(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; -#ifndef NDEBUG - class LIValidator { - private: - DenseSet<const LiveInterval*> Checked, Bogus; - public: - void operator()(const IntRangePair& P) { - const LiveInterval* LI = P.first; - if (Checked.count(LI)) + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + bool isKill = SlotIndex::isSameInstr(OldIdx, I->end); + // If the live-in value already extends to NewIdx, there is nothing to do. + if (!SlotIndex::isEarlierInstr(I->end, NewIdx)) return; - Checked.insert(LI); - if (LI->empty()) + // Aggressively remove all kill flags from the old kill point. + // Kill flags shouldn't be used while live intervals exist, they will be + // reinserted by VirtRegRewriter. + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end)) + for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse()) + MO->setIsKill(false); + // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // overlapping ranges. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + // If this was a kill, there may also be a def. Otherwise we're done. + if (!isKill) return; - SlotIndex LastEnd = LI->begin()->start; - for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end(); - LRI != LRE; ++LRI) { - const LiveRange& LR = *LRI; - if (LastEnd > LR.start || LR.start >= LR.end) - Bogus.insert(LI); - LastEnd = LR.end; - } - } - - bool rangesOk() const { - return Bogus.empty(); - } - }; -#endif - - // Collect IntRangePairs for all operands of MI that may need fixing. - // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes' - // maps). - void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal, - RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) { - hasRegMaskOp = false; - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& MO = *MOI; - - if (MO.isRegMask()) { - hasRegMaskOp = true; - continue; - } - - if (!MO.isReg() || MO.getReg() == 0) - continue; - - unsigned Reg = MO.getReg(); - - // Don't track uses of reserved registers - they're not accurate. - // Reserved register live ranges look like a set of dead defs. - bool Resv = - TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg); - - // Collect ranges for register units. These live ranges are computed on - // demand, so just skip any that haven't been computed yet. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) - collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx, Resv); - } else { - // Collect ranges for individual virtual registers. - collectRanges(MO, &LIS.getInterval(Reg), - Entering, Internal, Exiting, OldIdx); - } + ++I; } - } - void collectRanges(const MachineOperand &MO, LiveInterval *LI, - RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting, - SlotIndex OldIdx, bool IgnoreReads = false) { - if (!IgnoreReads && MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); + // Check for a def at OldIdx. + if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start)) + return; + // We have a def at OldIdx. + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + // If the defined value extends beyond NewIdx, just move the def down. + // This is case 1 above. + if (SlotIndex::isEarlierInstr(NewIdx, I->end)) { + I->start = DefVNI->def; + return; } - if (MO.isDef()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); - assert(LR != 0 && "No live range for def?"); - if (LR->end > OldIdx.getDeadSlot()) - Exiting.insert(std::make_pair(LI, LR)); - else - Internal.insert(std::make_pair(LI, LR)); + // The remaining possibilities are now: + // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx). + // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot(). + // In either case, it is possible that there is an existing def at NewIdx. + assert((I->end == OldIdx.getDeadSlot() || + SlotIndex::isSameInstr(I->end, NewIdx)) && + "Cannot move def below kill"); + LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { + // There is an existing def at NewIdx, case 4 above. The def at OldIdx is + // coalesced into that value. + assert(NewI->valno != DefVNI && "Multiple defs of value?"); + LI.removeValNo(DefVNI); + return; } + // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. + // If the def at OldIdx was dead, we allow it to be moved across other LI + // values. The new range should be placed immediately before NewI, move any + // intermediate ranges up. + assert(NewI != I && "Inconsistent iterators"); + std::copy(llvm::next(I), NewI, I); + *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - BundleRanges createBundleRanges(RangeSet& Entering, - RangeSet& Internal, - RangeSet& Exiting) { - BundleRanges BR; + /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Hoist def to NewIdx. + /// + /// 2. Dead def at OldIdx: + /// Hoist def+end to NewIdx, possibly move across other values. + /// + /// 3. Dead def at OldIdx AND existing def at NewIdx: + /// Remove value defined at OldIdx, coalescing it with existing value. + /// + /// 4. Live def at OldIdx AND existing def at NewIdx: + /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value killed at OldIdx: + /// Hoist kill to NewIdx, then scan for last kill between NewIdx and + /// OldIdx. + /// + void handleMoveUp(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) { - LiveInterval* LI = EI->first; - LiveRange* LR = EI->second; - BR[LI->reg].Use = LR; + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + // If the live-in value isn't killed here, there is nothing to do. + if (!SlotIndex::isSameInstr(OldIdx, I->end)) + return; + // Adjust I->end to end at NewIdx. If we are hoisting a kill above + // another use, we need to search for that use. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + ++I; + // If OldIdx also defines a value, there couldn't have been another use. + if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { + // No def, search for the new kill. + // This can never be an early clobber kill since there is no def. + llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + return; + } } - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) { - LiveInterval* LI = II->first; - LiveRange* LR = II->second; - if (LR->end.isDead()) { - BR[LI->reg].Dead = LR; - } else { - BR[LI->reg].EC = LR; + // Now deal with the def at OldIdx. + assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?"); + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + + // Check for an existing def at NewIdx. + LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { + assert(NewI->valno != DefVNI && "Same value defined more than once?"); + // There is an existing def at NewIdx. + if (I->end.isDead()) { + // Case 3: Remove the dead def at OldIdx. + LI.removeValNo(DefVNI); + return; } + // Case 4: Replace def at NewIdx with live def at OldIdx. + I->start = DefVNI->def; + LI.removeValNo(NewI->valno); + return; } - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) { - LiveInterval* LI = EI->first; - LiveRange* LR = EI->second; - BR[LI->reg].Def = LR; + // There is no existing def at NewIdx. Hoist DefVNI. + if (!I->end.isDead()) { + // Leave the end point of a live def. + I->start = DefVNI->def; + return; } - return BR; - } - - void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) { - MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx); - if (!OldKillMI->killsRegister(reg)) - return; // Bail out if we don't have kill flags on the old register. - MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); - assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); - assert(!NewKillMI->killsRegister(reg) && - "New kill instr is already a kill."); - OldKillMI->clearRegisterKills(reg, &TRI); - NewKillMI->addRegisterKilled(reg, &TRI); + // DefVNI is a dead def. It may have been moved across other values in LI, + // so move I up to NewI. Slide [NewI;I) down one position. + std::copy_backward(NewI, I, llvm::next(I)); + *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - void updateRegMaskSlots(SlotIndex OldIdx) { + void updateRegMaskSlots() { SmallVectorImpl<SlotIndex>::iterator RI = std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), OldIdx); @@ -1257,7 +1264,7 @@ private: } // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) { + SlotIndex findLastUseBefore(unsigned Reg) { SlotIndex LastUse = NewIdx; if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -1291,233 +1298,26 @@ private: } return LastUse; } - - void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - bool LiveThrough = LR->end > OldIdx.getRegSlot(); - if (LiveThrough) - return; - SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); - if (LastUse != NewIdx) - moveKillFlags(LI->reg, NewIdx, LastUse); - LR->end = LastUse.getRegSlot(LR->end.isEarlyClobber()); - } - - void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - // Extend the LiveRange if NewIdx is past the end. - if (NewIdx > LR->end) { - // Move kill flags if OldIdx was not originally the end - // (otherwise LR->end points to an invalid slot). - if (LR->end.getRegSlot() != OldIdx.getRegSlot()) { - assert(LR->end > OldIdx && "LiveRange does not cover original slot"); - moveKillFlags(LI->reg, LR->end, NewIdx); - } - LR->end = NewIdx.getRegSlot(LR->end.isEarlyClobber()); - } - } - - void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) { - bool GoingUp = NewIdx < OldIdx; - - if (GoingUp) { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringUpFrom(OldIdx, *EI); - } else { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringDownFrom(OldIdx, *EI); - } - } - - void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && - LR->end <= OldIdx.getDeadSlot() && - "Range should be internal to OldIdx."); - LiveRange Tmp(*LR); - Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber()); - Tmp.valno->def = Tmp.start; - Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot(); - LI->removeRange(*LR); - LI->addRange(Tmp); - } - - void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) { - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) - moveInternalFrom(OldIdx, *II); - } - - void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveRange* LR = P.second; - assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && - "Range should start in OldIdx."); - assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx."); - SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber()); - LR->start = NewStart; - LR->valno->def = NewStart; - } - - void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) { - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) - moveExitingFrom(OldIdx, *EI); - } - - void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - bool LiveThrough = LR->end > OldIdx.getRegSlot(); - if (LiveThrough) { - assert((LR->start < NewIdx || BR[LI->reg].Def == LR) && - "Def in bundle should be def range."); - assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && - "If bundle has use for this reg it should be LR."); - BR[LI->reg].Use = LR; - return; - } - - SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); - moveKillFlags(LI->reg, OldIdx, LastUse); - - if (LR->start < NewIdx) { - // Becoming a new entering range. - assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 && - "Bundle shouldn't be re-defining reg mid-range."); - assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && - "Bundle shouldn't have different use range for same reg."); - LR->end = LastUse.getRegSlot(); - BR[LI->reg].Use = LR; - } else { - // Becoming a new Dead-def. - assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) && - "Live range starting at unexpected slot."); - assert(BR[LI->reg].Def == LR && "Reg should have def range."); - assert(BR[LI->reg].Dead == 0 && - "Can't have def and dead def of same reg in a bundle."); - LR->end = LastUse.getDeadSlot(); - BR[LI->reg].Dead = BR[LI->reg].Def; - BR[LI->reg].Def = 0; - } - } - - void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - if (NewIdx > LR->end) { - // Range extended to bundle. Add to bundle uses. - // Note: Currently adds kill flags to bundle start. - assert(BR[LI->reg].Use == 0 && - "Bundle already has use range for reg."); - moveKillFlags(LI->reg, LR->end, NewIdx); - LR->end = NewIdx.getRegSlot(); - BR[LI->reg].Use = LR; - } else { - assert(BR[LI->reg].Use != 0 && - "Bundle should already have a use range for reg."); - } - } - - void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering, - BundleRanges& BR) { - bool GoingUp = NewIdx < OldIdx; - - if (GoingUp) { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringUpFromInto(OldIdx, *EI, BR); - } else { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringDownFromInto(OldIdx, *EI, BR); - } - } - - void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - // TODO: Sane rules for moving ranges into bundles. - } - - void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal, - BundleRanges& BR) { - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) - moveInternalFromInto(OldIdx, *II, BR); - } - - void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - - assert(LR->start.isRegister() && - "Don't know how to merge exiting ECs into bundles yet."); - - if (LR->end > NewIdx.getDeadSlot()) { - // This range is becoming an exiting range on the bundle. - // If there was an old dead-def of this reg, delete it. - if (BR[LI->reg].Dead != 0) { - LI->removeRange(*BR[LI->reg].Dead); - BR[LI->reg].Dead = 0; - } - assert(BR[LI->reg].Def == 0 && - "Can't have two defs for the same variable exiting a bundle."); - LR->start = NewIdx.getRegSlot(); - LR->valno->def = LR->start; - BR[LI->reg].Def = LR; - } else { - // This range is becoming internal to the bundle. - assert(LR->end == NewIdx.getRegSlot() && - "Can't bundle def whose kill is before the bundle"); - if (BR[LI->reg].Dead || BR[LI->reg].Def) { - // Already have a def for this. Just delete range. - LI->removeRange(*LR); - } else { - // Make range dead, record. - LR->end = NewIdx.getDeadSlot(); - BR[LI->reg].Dead = LR; - assert(BR[LI->reg].Use == LR && - "Range becoming dead should currently be use."); - } - // In both cases the range is no longer a use on the bundle. - BR[LI->reg].Use = 0; - } - } - - void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting, - BundleRanges& BR) { - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) - moveExitingFromInto(OldIdx, *EI, BR); - } - }; -void LiveIntervals::handleMove(MachineInstr* MI) { +void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) { + assert(!MI->isBundled() && "Can't handle bundled instructions yet."); SlotIndex OldIndex = Indexes->getInstructionIndex(MI); Indexes->removeMachineInstrFromMaps(MI); - SlotIndex NewIndex = MI->isInsideBundle() ? - Indexes->getInstructionIndex(MI) : - Indexes->insertMachineInstrInMaps(MI); + SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); assert(getMBBStartIdx(MI->getParent()) <= OldIndex && OldIndex < getMBBEndIdx(MI->getParent()) && "Cannot handle moves across basic block boundaries."); - assert(!MI->isBundled() && "Can't handle bundled instructions yet."); - HMEditor HME(*this, *MRI, *TRI, NewIndex); - HME.moveAllRangesFrom(MI, OldIndex); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); } void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, - MachineInstr* BundleStart) { + MachineInstr* BundleStart, + bool UpdateFlags) { + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); - HMEditor HME(*this, *MRI, *TRI, NewIndex); - HME.moveAllRangesInto(MI, BundleStart); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); } diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 82710414b30..0dfb084f1e1 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -249,7 +249,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, unsigned Reg = MOI->getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) { // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; continue; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 7359bb92a15..6ea933d4304 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -503,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); TRI = MF->getTarget().getRegisterInfo(); - ReservedRegisters = TRI->getReservedRegs(mf); - unsigned NumRegs = TRI->getNumRegs(); PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; @@ -588,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = UseRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); - else if (!ReservedRegisters[MOReg]) + else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); } @@ -601,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = DefRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); - else if (!ReservedRegisters[MOReg]) + else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, MI, Defs); } UpdatePhysRegDefs(MI, Defs); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 896461fd194..0f260205df2 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -63,8 +63,6 @@ namespace { virtual void releaseMemory() { ScopeMap.clear(); Exps.clear(); - AllocatableRegs.clear(); - ReservedRegs.clear(); } private: @@ -78,8 +76,6 @@ namespace { ScopedHTType VNT; SmallVector<MachineInstr*, 64> Exps; unsigned CurrVN; - BitVector AllocatableRegs; - BitVector ReservedRegs; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, @@ -242,7 +238,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { - if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i])) + if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) // Avoid extending live range of physical registers if they are //allocatable or reserved. return false; @@ -635,7 +631,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<MachineDominatorTree>(); - AllocatableRegs = TRI->getAllocatableSet(MF); - ReservedRegs = TRI->getReservedRegs(MF); return PerformCSE(DT->getRootNode()); } diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index bac3aa2c155..4a793281b2c 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -16,6 +16,7 @@ #include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -32,7 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { class MachineCopyPropagation : public MachineFunctionPass { const TargetRegisterInfo *TRI; - BitVector ReservedRegs; + MachineRegisterInfo *MRI; public: static char ID; // Pass identification, replacement for typeid @@ -146,8 +147,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; - if (!ReservedRegs.test(Def) && - (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && + if (!MRI->isReserved(Def) && + (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && isNopCopy(CopyMI, Def, Src, TRI)) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. @@ -259,7 +260,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { unsigned Reg = (*DI)->getOperand(0).getReg(); - if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg)) + if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) continue; (*DI)->eraseFromParent(); Changed = true; @@ -296,7 +297,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (SmallSetVector<MachineInstr*, 8>::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { - if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { + if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; @@ -311,7 +312,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; TRI = MF.getTarget().getRegisterInfo(); - ReservedRegs = TRI->getReservedRegs(MF); + MRI = &MF.getRegInfo(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) Changed |= CopyPropagateBlock(*I); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 91d52118576..f11785070bb 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -550,7 +550,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { // address of a block, in which case it is the pointer size. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerSize(); + return TD.getPointerSize(0); case MachineJumpTableInfo::EK_GPRel64BlockAddress: return 8; case MachineJumpTableInfo::EK_GPRel32BlockAddress: @@ -570,7 +570,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // alignment. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerABIAlignment(); + return TD.getPointerABIAlignment(0); case MachineJumpTableInfo::EK_GPRel64BlockAddress: return TD.getABIIntegerTypeAlignment(64); case MachineJumpTableInfo::EK_GPRel32BlockAddress: diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 5fb938f3400..ae7c15be158 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -306,22 +306,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const { void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { ReservedRegs = TRI->getReservedRegs(MF); + assert(ReservedRegs.size() == TRI->getNumRegs() && + "Invalid ReservedRegs vector from target"); } bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, const MachineFunction &MF) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - // Check if any overlapping register is modified. + // Check if any overlapping register is modified, or allocatable so it may be + // used later. for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - if (!def_empty(*AI)) - return false; - - // Check if any overlapping register is allocatable so it may be used later. - if (AllocatableRegs.empty()) - AllocatableRegs = TRI->getAllocatableSet(MF); - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - if (AllocatableRegs.test(*AI)) + if (!def_empty(*AI) || isAllocatable(*AI)) return false; return true; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 11a7d4760cb..c55e8b78988 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" @@ -359,7 +360,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI, BB->splice(InsertPos, BB, MI); // Update LiveIntervals - LIS->handleMove(MI); + LIS->handleMove(MI, /*UpdateFlags=*/true); // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) @@ -451,26 +452,6 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { } } -// Release all DAG roots for scheduling. -void ScheduleDAGMI::releaseRoots() { - SmallVector<SUnit*, 16> BotRoots; - - for (std::vector<SUnit>::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - // A SUnit is ready to top schedule if it has no predecessors. - if (I->Preds.empty()) - SchedImpl->releaseTopNode(&(*I)); - // A SUnit is ready to bottom schedule if it has no successors. - if (I->Succs.empty()) - BotRoots.push_back(&(*I)); - } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl<SUnit*>::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) - SchedImpl->releaseBottomNode(*I); -} - /// schedule - Called back from MachineScheduler::runOnMachineFunction /// after setting up the current scheduling region. [RegionBegin, RegionEnd) /// only includes instructions that have DAG nodes, not scheduling boundaries. @@ -532,8 +513,29 @@ void ScheduleDAGMI::postprocessDAG() { } } +// Release all DAG roots for scheduling. +void ScheduleDAGMI::releaseRoots() { + SmallVector<SUnit*, 16> BotRoots; + + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + BotRoots.push_back(&(*I)); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) + SchedImpl->releaseBottomNode(*I); +} + /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues() { + // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); @@ -544,6 +546,8 @@ void ScheduleDAGMI::initQueues() { // Release all DAG roots for scheduling. releaseRoots(); + SchedImpl->registerRoots(); + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); CurrentBottom = RegionEnd; } @@ -1198,6 +1202,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched); //===----------------------------------------------------------------------===// +// ILP Scheduler. Currently for experimental analysis of heuristics. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Order nodes by the ILP metric. +struct ILPOrder { + ScheduleDAGILP *ILP; + bool MaximizeILP; + + ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} + + /// \brief Apply a less-than relation on node priority. + bool operator()(const SUnit *A, const SUnit *B) const { + // Return true if A comes after B in the Q. + if (MaximizeILP) + return ILP->getILP(A) < ILP->getILP(B); + else + return ILP->getILP(A) > ILP->getILP(B); + } +}; + +/// \brief Schedule based on the ILP metric. +class ILPScheduler : public MachineSchedStrategy { + ScheduleDAGILP ILP; + ILPOrder Cmp; + + std::vector<SUnit*> ReadyQ; +public: + ILPScheduler(bool MaximizeILP) + : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} + + virtual void initialize(ScheduleDAGMI *DAG) { + ReadyQ.clear(); + ILP.resize(DAG->SUnits.size()); + } + + virtual void registerRoots() { + for (std::vector<SUnit*>::const_iterator + I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { + ILP.computeILP(*I); + } + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + if (ReadyQ.empty()) return NULL; + pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + SUnit *SU = ReadyQ.back(); + ReadyQ.pop_back(); + IsTopNode = false; + DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() + << " ILP: " << ILP.getILP(SU) << '\n'); + return SU; + } + + virtual void schedNode(SUnit *, bool) {} + + virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + + virtual void releaseBottomNode(SUnit *SU) { + ReadyQ.push_back(SU); + std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } +}; +} // namespace + +static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(true)); +} +static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(false)); +} +static MachineSchedRegistry ILPMaxRegistry( + "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); +static MachineSchedRegistry ILPMinRegistry( + "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); + +//===----------------------------------------------------------------------===// // Machine Instruction Shuffler for Correctness Testing //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index bc383cba455..b117f8c3a20 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -49,7 +49,6 @@ namespace { MachineDominatorTree *DT; // Machine dominator tree MachineLoopInfo *LI; AliasAnalysis *AA; - BitVector AllocatableSet; // Which physregs are allocatable? // Remember which edges have been considered for breaking. SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8> @@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<MachineLoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); - AllocatableSet = TRI->getAllocatableSet(MF); bool EverMadeChange = false; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index b3abec76bc9..9686b041329 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -850,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep, return false; } -/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists -/// of all the blocks in Trace. Stop when reaching the block that contains -/// DefMI. +/// Assuming that the virtual register defined by DefMI:DefOp was used by +/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop +/// when reaching the block that contains DefMI. void MachineTraceMetrics::Ensemble:: -addLiveIns(const MachineInstr *DefMI, +addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef<const MachineBasicBlock*> Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); - unsigned Reg = DefMI->getOperand(0).getReg(); + unsigned Reg = DefMI->getOperand(DefOp).getReg(); assert(TargetRegisterInfo::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); @@ -950,7 +950,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel, MTM.TII)) - addLiveIns(Deps.front().DefMI, Stack); + addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); } } } @@ -983,7 +983,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // Update the required height of any virtual registers read by MI. for (unsigned i = 0, e = Deps.size(); i != e; ++i) if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) - addLiveIns(Deps[i].DefMI, Stack); + addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack); InstrCycles &MICycles = Cycles[MI]; MICycles.Height = Cycle; diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h index 5f3b1d23e41..460730b0405 100644 --- a/lib/CodeGen/MachineTraceMetrics.h +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -279,7 +279,7 @@ public: unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); void computeInstrDepths(const MachineBasicBlock*); void computeInstrHeights(const MachineBasicBlock*); - void addLiveIns(const MachineInstr *DefMI, + void addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef<const MachineBasicBlock*> Trace); protected: diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 181e09ecc9e..dca68da2f3e 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -80,7 +80,6 @@ namespace { BlockSet FunctionBlocks; BitVector regsReserved; - BitVector regsAllocatable; RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; RegMaskVector regMasks; @@ -186,7 +185,7 @@ namespace { } bool isAllocatable(unsigned Reg) { - return Reg < regsAllocatable.size() && regsAllocatable.test(Reg); + return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); } // Analysis information if available @@ -427,7 +426,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionBefore() { lastIndex = SlotIndex(); - regsReserved = TRI->getReservedRegs(*MF); + regsReserved = MRI->getReservedRegs(); // A sub-register of a reserved register is also reserved for (int Reg = regsReserved.find_first(); Reg>=0; @@ -439,8 +438,6 @@ void MachineVerifier::visitMachineFunctionBefore() { } } - regsAllocatable = TRI->getAllocatableSet(*MF); - markReachable(&MF->front()); // Build a set of the basic blocks in the function. diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 4ea21d4ff7b..abd62efc026 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -359,7 +359,7 @@ void TargetPassConfig::addIRPasses() { // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - addPass(createLoopStrengthReducePass(getTargetLowering())); + addPass(createLoopStrengthReducePass()); if (PrintLSR) addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } @@ -389,7 +389,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass(TM->getTargetLowering())); + addPass(createLowerInvokePass()); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 9099862bd31..a795ac8448f 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); + // optimizeExtInstr might have created new instructions after MI + // and before the already incremented MII. Adjust MII so that the + // next iteration sees the new instructions. + MII = MI; + ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 32c02bf0f03..d57bc7362de 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -490,7 +490,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); BitVector killedRegs(TRI->getNumRegs()); - BitVector ReservedRegs = TRI->getReservedRegs(MF); StartBlockForKills(MBB); @@ -531,7 +530,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); - if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + if ((Reg == 0) || MRI.isReserved(Reg)) continue; bool kill = false; if (!killedRegs.test(Reg)) { @@ -566,7 +565,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); - if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + if ((Reg == 0) || MRI.isReserved(Reg)) continue; LiveRegs.set(Reg); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index f573d419ea5..e096240e04b 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -509,7 +509,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // Ignore invalid hints. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint))) + !RC->contains(Hint) || !MRI->isAllocatable(Hint))) Hint = 0; // Take hint when possible. @@ -838,7 +838,7 @@ void RAFast::AllocateBasicBlock() { // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - if (RegClassInfo.isAllocatable(*I)) + if (MRI->isAllocatable(*I)) definePhysReg(MII, *I, regReserved); SmallVector<unsigned, 8> VirtDead; @@ -970,7 +970,7 @@ void RAFast::AllocateBasicBlock() { } continue; } - if (!RegClassInfo.isAllocatable(Reg)) continue; + if (!MRI->isAllocatable(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { @@ -1058,7 +1058,7 @@ void RAFast::AllocateBasicBlock() { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!RegClassInfo.isAllocatable(Reg)) continue; + if (!MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); continue; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 984aab2a7a8..9320993d90a 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -208,8 +208,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, mri->setPhysRegUsed(Reg); } - BitVector reservedRegs = tri->getReservedRegs(*mf); - // Iterate over vregs. for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); vregItr != vregEnd; ++vregItr) { @@ -227,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; - if (reservedRegs.test(preg)) + if (mri->isReserved(preg)) continue; // vregLI crosses a regmask operand that clobbers preg. @@ -357,7 +355,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( loopInfo->getLoopDepth(mbb)); if (cp.isPhys()) { - if (!lis->isAllocatable(dst)) { + if (!mf->getRegInfo().isAllocatable(dst)) { continue; } diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 652bc3015a3..805d2356730 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -15,8 +15,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { CalleeSaved = CSR; // Different reserved registers? - BitVector RR = TRI->getReservedRegs(*MF); - if (RR != Reserved) + const BitVector &RR = MF->getRegInfo().getReservedRegs(); + if (Reserved.size() != RR.size() || RR != Reserved) { Update = true; - Reserved = RR; + Reserved = RR; + } // Invalidate cached information from previous function. if (Update) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1b46256baf2..ba6b4569a8f 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -895,7 +895,7 @@ bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is /// always beneficial. - if (!RegClassInfo.isReserved(CP.getDstReg())) { + if (!MRI->isReserved(CP.getDstReg())) { DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } @@ -1070,7 +1070,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { /// Attempt joining with a reserved physreg. bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); - assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register"); + assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS << '\n'); @@ -1241,6 +1241,9 @@ class JoinVals { // Value in the other live range that overlaps this def, if any. VNInfo *OtherVNI; + // Is this value an IMPLICIT_DEF? + bool IsImplicitDef; + // True when the live range of this value will be pruned because of an // overlapping CR_Replace value in the other live range. bool Pruned; @@ -1249,7 +1252,8 @@ class JoinVals { bool PrunedComputed; Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), - RedefVNI(0), OtherVNI(0), Pruned(false), PrunedComputed(false) {} + RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false), + PrunedComputed(false) {} bool isAnalyzed() const { return WriteLanes != 0; } }; @@ -1385,8 +1389,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } // An IMPLICIT_DEF writes undef values. - if (DefMI->isImplicitDef()) + if (DefMI->isImplicitDef()) { + V.IsImplicitDef = true; V.ValidLanes &= ~V.WriteLanes; + } } // Find the value in Other that overlaps VNI->def, if any. @@ -1724,22 +1730,34 @@ void JoinVals::pruneValues(JoinVals &Other, switch (Vals[i].Resolution) { case CR_Keep: break; - case CR_Replace: + case CR_Replace: { // This value takes precedence over the value in Other.LI. LIS->pruneValue(&Other.LI, Def, &EndPoints); - // Remove <def,read-undef> flags. This def is now a partial redef. + // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF + // instructions are only inserted to provide a live-out value for PHI + // predecessors, so the instruction should simply go away once its value + // has been replaced. + Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; + bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep; if (!Def.isBlock()) { + // Remove <def,read-undef> flags. This def is now a partial redef. + // Also remove <def,dead> flags since the joined live range will + // continue past this instruction. for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); MO.isValid(); ++MO) - if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) - MO->setIsUndef(false); - // This value will reach instructions below, but we need to make sure - // the live range also reaches the instruction at Def. - EndPoints.push_back(Def); + if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { + MO->setIsUndef(EraseImpDef); + MO->setIsDead(false); + } + // This value will reach instructions below, but we need to make sure + // the live range also reaches the instruction at Def. + if (!EraseImpDef) + EndPoints.push_back(Def); } DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def << ": " << Other.LI << '\n'); break; + } case CR_Erase: case CR_Merge: if (isPrunedValue(i, Other)) { @@ -1762,21 +1780,41 @@ void JoinVals::pruneValues(JoinVals &Other, void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs) { for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { - if (Vals[i].Resolution != CR_Erase) - continue; + // Get the def location before markUnused() below invalidates it. SlotIndex Def = LI.getValNumInfo(i)->def; - MachineInstr *MI = Indexes->getInstructionFromIndex(Def); - assert(MI && "No instruction to erase"); - if (MI->isCopy()) { - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && - Reg != CP.getSrcReg() && Reg != CP.getDstReg()) - ShrinkRegs.push_back(Reg); + switch (Vals[i].Resolution) { + case CR_Keep: + // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any + // longer. The IMPLICIT_DEF instructions are only inserted by + // PHIElimination to guarantee that all PHI predecessors have a value. + if (!Vals[i].IsImplicitDef || !Vals[i].Pruned) + break; + // Remove value number i from LI. Note that this VNInfo is still present + // in NewVNInfo, so it will appear as an unused value number in the final + // joined interval. + LI.getValNumInfo(i)->markUnused(); + LI.removeValNo(LI.getValNumInfo(i)); + DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); + // FALL THROUGH. + + case CR_Erase: { + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); + assert(MI && "No instruction to erase"); + if (MI->isCopy()) { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + ShrinkRegs.push_back(Reg); + } + ErasedInstrs.insert(MI); + DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + default: + break; } - ErasedInstrs.insert(MI); - DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); - LIS->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); } } diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index b267aea816c..94779770e0e 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -337,7 +337,7 @@ static void collectOperands(const MachineInstr *MI, PhysRegOperands &PhysRegOpers, VirtRegOperands &VirtRegOpers, const TargetRegisterInfo *TRI, - const RegisterClassInfo *RCI) { + const MachineRegisterInfo *MRI) { for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) { const MachineOperand &MO = *OperI; if (!MO.isReg() || !MO.getReg()) @@ -345,7 +345,7 @@ static void collectOperands(const MachineInstr *MI, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) VirtRegOpers.collect(MO, TRI); - else if (RCI->isAllocatable(MO.getReg())) + else if (MRI->isAllocatable(MO.getReg())) PhysRegOpers.collect(MO, TRI); } // Remove redundant physreg dead defs. @@ -451,7 +451,7 @@ bool RegPressureTracker::recede() { PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); // Boost pressure for all dead defs together. increasePhysRegPressure(PhysRegOpers.DeadDefs); @@ -524,7 +524,7 @@ bool RegPressureTracker::advance() { PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); // Kill liveness at last uses. for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { @@ -666,7 +666,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Account for register pressure similar to RegPressureTracker::recede(). PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); // Boost max pressure for all dead defs together. // Since CurrSetPressure and MaxSetPressure @@ -752,7 +752,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // Account for register pressure similar to RegPressureTracker::recede(). PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); // Kill liveness at last uses. Assume allocatable physregs are single-use // rather than checking LiveIntervals. diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index d673794e1b9..5ec6564ce39 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -92,9 +92,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { KillRegs.resize(NumPhysRegs); DefRegs.resize(NumPhysRegs); - // Create reserved registers bitvector. - ReservedRegs = TRI->getReservedRegs(MF); - // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); @@ -225,9 +222,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { used = RegsAvailable; used.flip(); if (includeReserved) - used |= ReservedRegs; + used |= MRI->getReservedRegs(); else - used.reset(ReservedRegs); + used.reset(MRI->getReservedRegs()); } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index aa45a6861ca..8dcbf83353d 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" @@ -30,6 +31,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -933,3 +935,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { std::string ScheduleDAGInstrs::getDAGName() const { return "dag." + BB->getFullName(); } + +namespace { +/// \brief Manage the stack used by a reverse depth-first search over the DAG. +class SchedDAGReverseDFS { + std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; +public: + bool isComplete() const { return DFSStack.empty(); } + + void follow(const SUnit *SU) { + DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); + } + void advance() { ++DFSStack.back().second; } + + void backtrack() { DFSStack.pop_back(); } + + const SUnit *getCurr() const { return DFSStack.back().first; } + + SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } + + SUnit::const_pred_iterator getPredEnd() const { + return getCurr()->Preds.end(); + } +}; +} // anonymous + +void ScheduleDAGILP::resize(unsigned NumSUnits) { + ILPValues.resize(NumSUnits); +} + +ILPValue ScheduleDAGILP::getILP(const SUnit *SU) { + return ILPValues[SU->NodeNum]; +} + +// A leaf node has an ILP of 1/1. +static ILPValue initILP(const SUnit *SU) { + unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1; + return ILPValue(Cnt, 1 + SU->getDepth()); +} + +/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first +/// search from this root. +void ScheduleDAGILP::computeILP(const SUnit *Root) { + if (!IsBottomUp) + llvm_unreachable("Top-down ILP metric is unimplemnted"); + + SchedDAGReverseDFS DFS; + // Mark a node visited by validating it. + ILPValues[Root->NodeNum] = initILP(Root); + DFS.follow(Root); + for (;;) { + // Traverse the leftmost path as far as possible. + while (DFS.getPred() != DFS.getPredEnd()) { + const SUnit *PredSU = DFS.getPred()->getSUnit(); + DFS.advance(); + // If the pred is already valid, skip it. + if (ILPValues[PredSU->NodeNum].isValid()) + continue; + ILPValues[PredSU->NodeNum] = initILP(PredSU); + DFS.follow(PredSU); + } + // Visit the top of the stack in postorder and backtrack. + unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount; + DFS.backtrack(); + if (DFS.isComplete()) + break; + // Add the recently finished predecessor's bottom-up descendent count. + ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ILPValue::print(raw_ostream &OS) const { + if (!isValid()) + OS << "BADILP"; + OS << InstrCount << " / " << Cycles << " = " + << format("%g", ((double)InstrCount / Cycles)); +} + +void ILPValue::dump() const { + dbgs() << *this << '\n'; +} + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { + Val.print(OS); + return OS; +} + +} // namespace llvm +#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 80f495309d1..2ec129f7308 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1587,26 +1587,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, break; case TargetLowering::Expand: { ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - // FIXME: Implement more expansions. - } - - SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); - SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + case ISD::SETO: + assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) + == TargetLowering::Legal + && "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETUO: + assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) + == TargetLowering::Legal + && "If SETUO is expanded, SETUNE must be legal!"); + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + case ISD::SETNE: + case ISD::SETEQ: + InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { + // We only support using the inverted operation and not a + // different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); + } + LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); + RHS = SDValue(); + CC = SDValue(); + return; + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + } LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); @@ -3108,6 +3153,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && + // If div is legal, it's better to do the normal expansion + !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && useDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e3938968b20..92dc5a9831b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) - == TargetLowering::Custom) - Res = TLI.LowerOperation(SDValue(N, 0), DAG); - - if (Res.getNode() == 0) { - switch (N->getOpcode()) { - default: - #ifndef NDEBUG - dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); dbgs() << "\n"; - #endif - llvm_unreachable("Do not know how to expand this operator's operand!"); - - case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; - case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; - case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; - - case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; - case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; - case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; - case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; - case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; - case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; - case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), - OpNo); break; - } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; } // If the result is null, the sub-method took care of registering results etc. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 37f0e60087d..20b7ce6b15b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -634,7 +634,7 @@ private: SDValue WidenVecRes_InregOp(SDNode *N); // Widen Vector Operand. - bool WidenVectorOperand(SDNode *N, unsigned ResNo); + bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index bb54fd24e21..6bcb3b25e98 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -124,6 +124,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // there are only two nodes left, i.e. Lo and Hi. SDValue LHS = Vals[Slot]; SDValue RHS = Vals[Slot + 1]; + + if (TLI.isBigEndian()) + std::swap(LHS, RHS); + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, EVT::getIntegerVT( *DAG.getContext(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index eca4d99098a..d51a6eb192e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2082,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// // Widen Vector Operand //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + switch (N->getOpcode()) { default: #ifndef NDEBUG - dbgs() << "WidenVectorOperand op #" << ResNo << ": "; + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 79cfcdfe0ea..183416f3fd2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3449,9 +3449,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, IsZeroVal, MemcpyStrSrc, DAG.getMachineFunction()); + Type *vtType = VT.isExtended() ? VT.getTypeForEVT(*DAG.getContext()) : NULL; + unsigned AS = (vtType && vtType->isPointerTy()) ? + cast<PointerType>(vtType)->getAddressSpace() : 0; if (VT == MVT::Other) { - if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || TLI.allowsUnalignedMemoryAccesses(VT)) { VT = TLI.getPointerTy(); } else { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8fc9c70f995..c314fa5b511 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -474,6 +474,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } + // Freeze the set of reserved registers now that MachineFrameInfo has been + // set up. All the information required by getReservedRegs() should be + // available now. + MRI.freezeReservedRegs(*MF); + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 8ed66f70443..4439192fe2f 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -563,6 +563,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, /// Return the default expected latency for a def based on it's opcode. unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, const MachineInstr *DefMI) const { + if (DefMI->isTransient()) + return 0; if (DefMI->mayLoad()) return SchedModel->LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 8f5d770f665..bf26a6d5920 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); - unsigned Size = TM.getDataLayout()->getPointerSize(); + unsigned Size = TM.getDataLayout()->getPointerSize(0); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment(0)); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 4e753c6ecb4..7a6e2604d77 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -50,10 +50,12 @@ unsigned TargetSchedModel::getNumMicroOps(MachineInstr *MI) const { int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); } - if (hasInstrSchedModel()) - return resolveSchedClass(MI)->NumMicroOps; - - return 1; + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); + if (SCDesc->isValid()) + return SCDesc->NumMicroOps; + } + return MI->isTransient() ? 0 : 1; } /// If we can determine the operand latency from the def only, without machine @@ -199,7 +201,7 @@ unsigned TargetSchedModel::computeOperandLatency( report_fatal_error(ss.str()); } #endif - return 1; + return DefMI->isTransient() ? 0 : 1; } unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { @@ -209,16 +211,18 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { return TII->getInstrLatency(&InstrItins, MI); if (hasInstrSchedModel()) { - unsigned Latency = 0; const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); - for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; - DefIdx != DefEnd; ++DefIdx) { - // Lookup the definition's write latency in SubtargetInfo. - const MCWriteLatencyEntry *WLEntry = - STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, WLEntry->Cycles); + if (SCDesc->isValid()) { + unsigned Latency = 0; + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + Latency = std::max(Latency, WLEntry->Cycles); + } + return Latency; } - return Latency; } return TII->defaultDefLatency(&SchedModel, MI); } @@ -251,10 +255,12 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // an unbuffered resource. If so, it treated like an in-order cpu. if (hasInstrSchedModel()) { const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); - for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), - *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { - if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) - return 1; + if (SCDesc->isValid()) { + for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), + *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + return 1; + } } } return 0; diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index a69a8169d30..bb93bdc0bc2 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -257,9 +257,6 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; -#ifndef NDEBUG - BitVector Reserved = TRI->getReservedRegs(*MF); -#endif for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { @@ -283,7 +280,7 @@ void VirtRegRewriter::rewrite() { unsigned PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); - assert(!Reserved.test(PhysReg) && "Reserved register assignment"); + assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. if (MO.getSubReg()) { diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index c5c46815a28..94a2542e7ad 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -17,6 +17,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ADT/SmallString.h" @@ -267,7 +268,7 @@ public: void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, const std::vector<std::string> &InputArgv) { clear(); // Free the old contents. - unsigned PtrSize = EE->getDataLayout()->getPointerSize(); + unsigned PtrSize = EE->getDataLayout()->getPointerSize(0); Array = new char[(InputArgv.size()+1)*PtrSize]; DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n"); @@ -342,7 +343,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) { #ifndef NDEBUG /// isTargetNullPtr - Return whether the target pointer stored at Loc is null. static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) { - unsigned PtrSize = EE->getDataLayout()->getPointerSize(); + unsigned PtrSize = EE->getDataLayout()->getPointerSize(0); for (unsigned i = 0; i < PtrSize; ++i) if (*(i + (uint8_t*)Loc)) return false; @@ -644,13 +645,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::PtrToInt: { GenericValue GV = getConstantValue(Op0); - uint32_t PtrWidth = TD->getPointerSizeInBits(); + unsigned AS = cast<PointerType>(CE->getOperand(1)->getType()) + ->getAddressSpace(); + uint32_t PtrWidth = TD->getPointerSizeInBits(AS); GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal)); return GV; } case Instruction::IntToPtr: { GenericValue GV = getConstantValue(Op0); - uint32_t PtrWidth = TD->getPointerSizeInBits(); + unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace(); + uint32_t PtrWidth = TD->getPointerSizeInBits(AS); if (PtrWidth != GV.IntVal.getBitWidth()) GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width"); diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 5202b091654..326bf79c589 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1054,7 +1054,8 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy, GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction"); - uint32_t PtrSize = TD.getPointerSizeInBits(); + unsigned AS = cast<PointerType>(DstTy)->getAddressSpace(); + uint32_t PtrSize = TD.getPointerSizeInBits(AS); if (PtrSize != Src.IntVal.getBitWidth()) Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize); diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index e16e2d112a9..f58adbe1e1a 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -376,7 +376,7 @@ GenericValue lle_X_sprintf(FunctionType *FT, case 'x': case 'X': if (HowLong >= 1) { if (HowLong == 1 && - TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 && + TheInterpreter->getDataLayout()->getPointerSizeInBits(0) == 64 && sizeof(long) < sizeof(int64_t)) { // Make sure we use %lld with a 64 bit argument because we might be // compiling LLI on a 32 bit compiler. diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index 19c197903a6..bcd5b263654 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -14,7 +14,9 @@ #include "JIT.h" #include "JITDwarfEmitter.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunction.h" @@ -66,7 +68,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, void JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, const std::vector<MachineMove> &Moves) const { - unsigned PointerSize = TD->getPointerSize(); + unsigned PointerSize = TD->getPointerSize(0); int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? PointerSize : -PointerSize; MCSymbol *BaseLabel = 0; @@ -378,7 +380,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, for (unsigned i = 0, e = CallSites.size(); i < e; ++i) SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); - unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); + unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(0); unsigned TypeOffset = sizeof(int8_t) + // Call site format // Call-site table length @@ -454,12 +456,12 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, const GlobalVariable *GV = TypeInfos[M - 1]; if (GV) { - if (TD->getPointerSize() == sizeof(int32_t)) + if (TD->getPointerSize(GV->getType()->getAddressSpace()) == sizeof(int32_t)) JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV)); else JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV)); } else { - if (TD->getPointerSize() == sizeof(int32_t)) + if (TD->getPointerSize(0) == sizeof(int32_t)) JCE->emitInt32(0); else JCE->emitInt64(0); @@ -481,7 +483,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, unsigned char* JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { - unsigned PointerSize = TD->getPointerSize(); + unsigned PointerSize = TD->getPointerSize(0); int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? PointerSize : -PointerSize; @@ -541,7 +543,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* ExceptionTable) const { - unsigned PointerSize = TD->getPointerSize(); + unsigned PointerSize = TD->getPointerSize(0); // EH frame header. unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue(); diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 1198a7be372..ecafda7286f 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -384,11 +384,6 @@ namespace { delete MemMgr; } - /// classof - Methods for support type inquiry through isa, cast, and - /// dyn_cast: - /// - static inline bool classof(const MachineCodeEmitter*) { return true; } - JITResolver &getJITResolver() { return Resolver; } virtual void startFunction(MachineFunction &F); @@ -1265,15 +1260,13 @@ void *JIT::getPointerToFunctionOrStub(Function *F) { return Addr; // Get a stub if the target supports it. - assert(isa<JITEmitter>(JCE) && "Unexpected MCE?"); - JITEmitter *JE = cast<JITEmitter>(getCodeEmitter()); + JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter()); return JE->getJITResolver().getLazyFunctionStub(F); } void JIT::updateFunctionStub(Function *F) { // Get the empty stub we generated earlier. - assert(isa<JITEmitter>(JCE) && "Unexpected MCE?"); - JITEmitter *JE = cast<JITEmitter>(getCodeEmitter()); + JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter()); void *Stub = JE->getJITResolver().getLazyFunctionStub(F); void *Addr = getPointerToGlobalIfAvailable(F); assert(Addr != Stub && "Function must have non-stub address to be updated."); @@ -1294,6 +1287,5 @@ void JIT::freeMachineCodeForFunction(Function *F) { updateGlobalMapping(F, 0); // Free the actual memory for the function body and related stuff. - assert(isa<JITEmitter>(JCE) && "Unexpected MCE?"); - cast<JITEmitter>(JCE)->deallocateMemForFunction(F); + static_cast<JITEmitter*>(JCE)->deallocateMemForFunction(F); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index eb69693359d..c1f8baed1a4 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -378,17 +378,17 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID, void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value) { - // Ignore relocations for sections that were not loaded - if (Sections[RE.SectionID].Address != 0) { - uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset; - DEBUG(dbgs() << "\tSectionID: " << RE.SectionID - << " + " << RE.Offset << " (" << format("%p", Target) << ")" - << " RelType: " << RE.RelType - << " Addend: " << RE.Addend - << "\n"); - - resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset, - Value, RE.RelType, RE.Addend); + // Ignore relocations for sections that were not loaded + if (Sections[RE.SectionID].Address != 0) { + uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset; + DEBUG(dbgs() << "\tSectionID: " << RE.SectionID + << " + " << RE.Offset << " (" << format("%p", Target) << ")" + << " RelType: " << RE.RelType + << " Addend: " << RE.Addend + << "\n"); + + resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset, + Value, RE.RelType, RE.Addend); } } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index efefacf632d..08aba64e460 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -59,9 +59,6 @@ public: const ELFObjectFile<target_endianness, is64Bits> *v) { return v->isDyldType(); } - static inline bool classof(const DyldELFObject *v) { - return true; - } }; template<support::endianness target_endianness, bool is64Bits> @@ -416,7 +413,13 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, if (si == Obj.end_sections()) llvm_unreachable("Symbol section not found, bad object file format!"); DEBUG(dbgs() << "\t\tThis is section symbol\n"); - Value.SectionID = findOrEmitSection(Obj, (*si), true, ObjSectionToID); + // Default to 'true' in case isText fails (though it never does). + bool isCode = true; + si->isText(isCode); + Value.SectionID = findOrEmitSection(Obj, + (*si), + isCode, + ObjSectionToID); Value.Addend = Addend; break; } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index b0bc2900ecb..17a6323d0e7 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -251,6 +251,7 @@ public: virtual void EmitPad(int64_t Offset); virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool); + virtual void EmitTCEntry(const MCSymbol &S); virtual void EmitInstruction(const MCInst &Inst); @@ -1299,6 +1300,14 @@ void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, EmitEOL(); } +void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) { + OS << "\t.tc "; + OS << S.getName(); + OS << "[TC],"; + OS << S.getName(); + EmitEOL(); +} + void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { assert(getCurrentSection() && "Cannot emit contents before setting section!"); diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 8107005481d..14fbc1ec839 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -103,6 +103,8 @@ public: virtual void EmitFileDirective(StringRef Filename); + virtual void EmitTCEntry(const MCSymbol &S); + virtual void FinishImpl(); private: @@ -469,6 +471,12 @@ void MCELFStreamer::FinishImpl() { this->MCObjectStreamer::FinishImpl(); } +void MCELFStreamer::EmitTCEntry(const MCSymbol &S) +{ + // Creates a R_PPC64_TOC relocation + MCObjectStreamer::EmitSymbolValue(&S, 8, 0); +} + MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *CE, bool RelaxAll, bool NoExecStack) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index c2fff3c5207..0406ff8d446 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -133,6 +133,15 @@ private: /// IsDarwin - is Darwin compatibility enabled? bool IsDarwin; + /// ParsingInlineAsm - Are we parsing ms-style inline assembly? + bool ParsingInlineAsm; + + /// ParsedOperands - The parsed operands from the last parsed statement. + SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; + + /// Opcode - The opcode from the last parsed instruction. + unsigned Opcode; + public: AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, const MCAsmInfo &MAI); @@ -171,6 +180,21 @@ public: virtual const AsmToken &Lex(); + bool ParseStatement(); + void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; } + unsigned getNumParsedOperands() { return ParsedOperands.size(); } + MCParsedAsmOperand &getParsedOperand(unsigned OpNum) { + assert (ParsedOperands.size() > OpNum); + return *ParsedOperands[OpNum]; + } + void freeParsedOperands() { + for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) + delete ParsedOperands[i]; + ParsedOperands.clear(); + } + bool isInstruction() { return Opcode != (unsigned)~0x0; } + unsigned getOpcode() { return Opcode; } + bool ParseExpression(const MCExpr *&Res); virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); @@ -181,7 +205,6 @@ public: private: void CheckForValidSection(); - bool ParseStatement(); void EatToEndOfLine(); bool ParseCppHashLineFilenameComment(const SMLoc &L); @@ -412,7 +435,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), GenericParser(new GenericAsmParser), PlatformParser(0), CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0), - AssemblerDialect(~0U), IsDarwin(false) { + AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false), + Opcode(~0x0) { // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); SavedDiagContext = SrcMgr.getDiagContext(); @@ -604,7 +628,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { } void AsmParser::CheckForValidSection() { - if (!getStreamer().getCurrentSection()) { + if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) { TokError("expected section directive before assembly directive"); Out.SwitchSection(Ctx.getMachOSection( "__TEXT", "__text", @@ -1310,12 +1334,11 @@ bool AsmParser::ParseStatement() { CheckForValidSection(); // Canonicalize the opcode to lower case. - SmallString<128> Opcode; + SmallString<128> OpcodeStr; for (unsigned i = 0, e = IDVal.size(); i != e; ++i) - Opcode.push_back(tolower(IDVal[i])); + OpcodeStr.push_back(tolower(IDVal[i])); - SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; - bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc, + bool HadError = getTargetParser().ParseInstruction(OpcodeStr.str(), IDLoc, ParsedOperands); // Dump the parsed representation, if requested. @@ -1346,13 +1369,18 @@ bool AsmParser::ParseStatement() { } // If parsing succeeded, match the instruction. - if (!HadError) - HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands, - Out); - - // Free any parsed operands. - for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) - delete ParsedOperands[i]; + if (!HadError) { + unsigned ErrorInfo; + HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, Opcode, + ParsedOperands, Out, + ErrorInfo, + ParsingInlineAsm); + } + + // Free any parsed operands. If parsing ms-style inline assembly it is the + // responsibility of the caller (i.e., clang) to free the parsed operands. + if (!ParsingInlineAsm) + freeParsedOperands(); // Don't skip the rest of the line, the instruction parser is responsible for // that. diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 0bac24dc3a7..afece0ba551 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -561,6 +561,10 @@ void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) { abort(); } +void MCStreamer::EmitTCEntry(const MCSymbol &S) { + llvm_unreachable("Unsupported method"); +} + /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index abfaecc2790..2cc7a584624 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -98,6 +98,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case BGP: return "bgp"; case BGQ: return "bgq"; case Freescale: return "fsl"; + case IBM: return "ibm"; } llvm_unreachable("Invalid VendorType!"); @@ -128,6 +129,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case NativeClient: return "nacl"; case CNK: return "cnk"; case Bitrig: return "bitrig"; + case AIX: return "aix"; } llvm_unreachable("Invalid OSType"); @@ -278,6 +280,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("bgp", Triple::BGP) .Case("bgq", Triple::BGQ) .Case("fsl", Triple::Freescale) + .Case("ibm", Triple::IBM) .Default(Triple::UnknownVendor); } @@ -304,6 +307,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("nacl", Triple::NativeClient) .StartsWith("cnk", Triple::CNK) .StartsWith("bitrig", Triple::Bitrig) + .StartsWith("aix", Triple::AIX) .Default(Triple::UnknownOS); } diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt index 28240f4a673..4f64eb4ff24 100644 --- a/lib/TableGen/CMakeLists.txt +++ b/lib/TableGen/CMakeLists.txt @@ -1,5 +1,3 @@ -## FIXME: This only requires RTTI because tblgen uses it. Fix that. -set(LLVM_REQUIRES_RTTI 1) set(LLVM_REQUIRES_EH 1) add_llvm_library(LLVMTableGen diff --git a/lib/TableGen/Makefile b/lib/TableGen/Makefile index 44724389e1d..732d8a197ee 100644 --- a/lib/TableGen/Makefile +++ b/lib/TableGen/Makefile @@ -11,8 +11,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMTableGen BUILD_ARCHIVE = 1 -## FIXME: This only requires RTTI because tblgen uses it. Fix that. -REQUIRES_RTTI = 1 REQUIRES_EH = 1 include $(LEVEL)/Makefile.common diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 83f2fff9312..c7b2de2b0fe 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -200,7 +200,7 @@ Init *IntRecTy::convertValue(BitInit *BI) { Init *IntRecTy::convertValue(BitsInit *BI) { int64_t Result = 0; for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) - if (BitInit *Bit = dynamic_cast<BitInit*>(BI->getBit(i))) { + if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i))) { Result |= Bit->getValue() << i; } else { return 0; @@ -615,7 +615,7 @@ ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const { Record *ListInit::getElementAsRecord(unsigned i) const { assert(i < Values.size() && "List element index out of range!"); - DefInit *DI = dynamic_cast<DefInit*>(Values[i]); + DefInit *DI = dyn_cast<DefInit>(Values[i]); if (DI == 0) throw "Expected record in list!"; return DI->getDef(); } @@ -650,7 +650,7 @@ Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV, // If the element is set to some value, or if we are resolving a reference // to a specific variable and that variable is explicitly unset, then // replace the VarListElementInit with it. - if (IRV || !dynamic_cast<UnsetInit*>(E)) + if (IRV || !isa<UnsetInit>(E)) return E; return 0; } @@ -667,13 +667,13 @@ std::string ListInit::getAsString() const { Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, unsigned Elt) const { Init *Resolved = resolveReferences(R, IRV); - OpInit *OResolved = dynamic_cast<OpInit *>(Resolved); + OpInit *OResolved = dyn_cast<OpInit>(Resolved); if (OResolved) { Resolved = OResolved->Fold(&R, 0); } if (Resolved != this) { - TypedInit *Typed = dynamic_cast<TypedInit *>(Resolved); + TypedInit *Typed = dyn_cast<TypedInit>(Resolved); assert(Typed && "Expected typed init for list reference"); if (Typed) { Init *New = Typed->resolveListElementReference(R, IRV, Elt); @@ -709,23 +709,16 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { switch (getOpcode()) { case CAST: { if (getType()->getAsString() == "string") { - StringInit *LHSs = dynamic_cast<StringInit*>(LHS); - if (LHSs) { + if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) return LHSs; - } - DefInit *LHSd = dynamic_cast<DefInit*>(LHS); - if (LHSd) { + if (DefInit *LHSd = dyn_cast<DefInit>(LHS)) return StringInit::get(LHSd->getDef()->getName()); - } - IntInit *LHSi = dynamic_cast<IntInit*>(LHS); - if (LHSi) { + if (IntInit *LHSi = dyn_cast<IntInit>(LHS)) return StringInit::get(LHSi->getAsString()); - } } else { - StringInit *LHSs = dynamic_cast<StringInit*>(LHS); - if (LHSs) { + if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) { std::string Name = LHSs->getValue(); // From TGParser::ParseIDValue @@ -773,8 +766,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { break; } case HEAD: { - ListInit *LHSl = dynamic_cast<ListInit*>(LHS); - if (LHSl) { + if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) { if (LHSl->getSize() == 0) { assert(0 && "Empty list in car"); return 0; @@ -784,8 +776,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { break; } case TAIL: { - ListInit *LHSl = dynamic_cast<ListInit*>(LHS); - if (LHSl) { + if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) { if (LHSl->getSize() == 0) { assert(0 && "Empty list in cdr"); return 0; @@ -802,16 +793,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { break; } case EMPTY: { - ListInit *LHSl = dynamic_cast<ListInit*>(LHS); - if (LHSl) { + if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) { if (LHSl->getSize() == 0) { return IntInit::get(1); } else { return IntInit::get(0); } } - StringInit *LHSs = dynamic_cast<StringInit*>(LHS); - if (LHSs) { + if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) { if (LHSs->getValue().empty()) { return IntInit::get(1); } else { @@ -865,11 +854,11 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs, Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { switch (getOpcode()) { case CONCAT: { - DagInit *LHSs = dynamic_cast<DagInit*>(LHS); - DagInit *RHSs = dynamic_cast<DagInit*>(RHS); + DagInit *LHSs = dyn_cast<DagInit>(LHS); + DagInit *RHSs = dyn_cast<DagInit>(RHS); if (LHSs && RHSs) { - DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator()); - DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator()); + DefInit *LOp = dyn_cast<DefInit>(LHSs->getOperator()); + DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator()); if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef()) throw "Concated Dag operators do not match!"; std::vector<Init*> Args; @@ -887,8 +876,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { break; } case STRCONCAT: { - StringInit *LHSs = dynamic_cast<StringInit*>(LHS); - StringInit *RHSs = dynamic_cast<StringInit*>(RHS); + StringInit *LHSs = dyn_cast<StringInit>(LHS); + StringInit *RHSs = dyn_cast<StringInit>(RHS); if (LHSs && RHSs) return StringInit::get(LHSs->getValue() + RHSs->getValue()); break; @@ -897,15 +886,15 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { // try to fold eq comparison for 'bit' and 'int', otherwise fallback // to string objects. IntInit *L = - dynamic_cast<IntInit*>(LHS->convertInitializerTo(IntRecTy::get())); + dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get())); IntInit *R = - dynamic_cast<IntInit*>(RHS->convertInitializerTo(IntRecTy::get())); + dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get())); if (L && R) return IntInit::get(L->getValue() == R->getValue()); - StringInit *LHSs = dynamic_cast<StringInit*>(LHS); - StringInit *RHSs = dynamic_cast<StringInit*>(RHS); + StringInit *LHSs = dyn_cast<StringInit>(LHS); + StringInit *RHSs = dyn_cast<StringInit>(RHS); // Make sure we've resolved if (LHSs && RHSs) @@ -916,8 +905,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { case SHL: case SRA: case SRL: { - IntInit *LHSi = dynamic_cast<IntInit*>(LHS); - IntInit *RHSi = dynamic_cast<IntInit*>(RHS); + IntInit *LHSi = dyn_cast<IntInit>(LHS); + IntInit *RHSi = dyn_cast<IntInit>(RHS); if (LHSi && RHSi) { int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue(); int64_t Result; @@ -990,7 +979,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, MultiClass *CurMultiClass) { std::vector<Init *> NewOperands; - TypedInit *TArg = dynamic_cast<TypedInit*>(Arg); + TypedInit *TArg = dyn_cast<TypedInit>(Arg); // If this is a dag, recurse if (TArg && TArg->getType()->getAsString() == "dag") { @@ -1004,7 +993,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, } for (int i = 0; i < RHSo->getNumOperands(); ++i) { - OpInit *RHSoo = dynamic_cast<OpInit*>(RHSo->getOperand(i)); + OpInit *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i)); if (RHSoo) { Init *Result = EvaluateOperation(RHSoo, LHS, Arg, @@ -1032,16 +1021,16 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Record *CurRec, MultiClass *CurMultiClass) { - DagInit *MHSd = dynamic_cast<DagInit*>(MHS); - ListInit *MHSl = dynamic_cast<ListInit*>(MHS); + DagInit *MHSd = dyn_cast<DagInit>(MHS); + ListInit *MHSl = dyn_cast<ListInit>(MHS); - OpInit *RHSo = dynamic_cast<OpInit*>(RHS); + OpInit *RHSo = dyn_cast<OpInit>(RHS); if (!RHSo) { throw TGError(CurRec->getLoc(), "!foreach requires an operator\n"); } - TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS); + TypedInit *LHSt = dyn_cast<TypedInit>(LHS); if (!LHSt) { throw TGError(CurRec->getLoc(), "!foreach requires typed variable\n"); @@ -1110,17 +1099,17 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { switch (getOpcode()) { case SUBST: { - DefInit *LHSd = dynamic_cast<DefInit*>(LHS); - VarInit *LHSv = dynamic_cast<VarInit*>(LHS); - StringInit *LHSs = dynamic_cast<StringInit*>(LHS); + DefInit *LHSd = dyn_cast<DefInit>(LHS); + VarInit *LHSv = dyn_cast<VarInit>(LHS); + StringInit *LHSs = dyn_cast<StringInit>(LHS); - DefInit *MHSd = dynamic_cast<DefInit*>(MHS); - VarInit *MHSv = dynamic_cast<VarInit*>(MHS); - StringInit *MHSs = dynamic_cast<StringInit*>(MHS); + DefInit *MHSd = dyn_cast<DefInit>(MHS); + VarInit *MHSv = dyn_cast<VarInit>(MHS); + StringInit *MHSs = dyn_cast<StringInit>(MHS); - DefInit *RHSd = dynamic_cast<DefInit*>(RHS); - VarInit *RHSv = dynamic_cast<VarInit*>(RHS); - StringInit *RHSs = dynamic_cast<StringInit*>(RHS); + DefInit *RHSd = dyn_cast<DefInit>(RHS); + VarInit *RHSv = dyn_cast<VarInit>(RHS); + StringInit *RHSs = dyn_cast<StringInit>(RHS); if ((LHSd && MHSd && RHSd) || (LHSv && MHSv && RHSv) @@ -1168,9 +1157,9 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { } case IF: { - IntInit *LHSi = dynamic_cast<IntInit*>(LHS); + IntInit *LHSi = dyn_cast<IntInit>(LHS); if (Init *I = LHS->convertInitializerTo(IntRecTy::get())) - LHSi = dynamic_cast<IntInit*>(I); + LHSi = dyn_cast<IntInit>(I); if (LHSi) { if (LHSi->getValue()) { return MHS; @@ -1190,9 +1179,9 @@ Init *TernOpInit::resolveReferences(Record &R, Init *lhs = LHS->resolveReferences(R, RV); if (Opc == IF && lhs != LHS) { - IntInit *Value = dynamic_cast<IntInit*>(lhs); + IntInit *Value = dyn_cast<IntInit>(lhs); if (Init *I = lhs->convertInitializerTo(IntRecTy::get())) - Value = dynamic_cast<IntInit*>(I); + Value = dyn_cast<IntInit>(I); if (Value != 0) { // Short-circuit if (Value->getValue()) { @@ -1285,8 +1274,7 @@ VarInit *VarInit::get(Init *VN, RecTy *T) { } const std::string &VarInit::getName() const { - StringInit *NameString = - dynamic_cast<StringInit *>(getNameInit()); + StringInit *NameString = dyn_cast<StringInit>(getNameInit()); assert(NameString && "VarInit name is not a string!"); return NameString->getValue(); } @@ -1305,9 +1293,9 @@ Init *VarInit::resolveListElementReference(Record &R, RecordVal *RV = R.getValue(getNameInit()); assert(RV && "Reference to a non-existent variable?"); - ListInit *LI = dynamic_cast<ListInit*>(RV->getValue()); + ListInit *LI = dyn_cast<ListInit>(RV->getValue()); if (!LI) { - TypedInit *VI = dynamic_cast<TypedInit*>(RV->getValue()); + TypedInit *VI = dyn_cast<TypedInit>(RV->getValue()); assert(VI && "Invalid list element!"); return VarListElementInit::get(VI, Elt); } @@ -1318,7 +1306,7 @@ Init *VarInit::resolveListElementReference(Record &R, // If the element is set to some value, or if we are resolving a reference // to a specific variable and that variable is explicitly unset, then // replace the VarListElementInit with it. - if (IRV || !dynamic_cast<UnsetInit*>(E)) + if (IRV || !isa<UnsetInit>(E)) return E; return 0; } @@ -1335,7 +1323,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV, const std::string &FieldName) const { if (isa<RecordRecTy>(getType())) if (const RecordVal *Val = R.getValue(VarName)) { - if (RV != Val && (RV || dynamic_cast<UnsetInit*>(Val->getValue()))) + if (RV != Val && (RV || isa<UnsetInit>(Val->getValue()))) return 0; Init *TheInit = Val->getValue(); assert(TheInit != this && "Infinite loop detected!"); @@ -1354,7 +1342,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV, /// Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const { if (RecordVal *Val = R.getValue(VarName)) - if (RV == Val || (RV == 0 && !dynamic_cast<UnsetInit*>(Val->getValue()))) + if (RV == Val || (RV == 0 && !isa<UnsetInit>(Val->getValue()))) return Val->getValue(); return const_cast<VarInit *>(this); } @@ -1422,8 +1410,7 @@ Init *VarListElementInit:: resolveListElementReference(Record &R, Init *Result = TI->resolveListElementReference(R, RV, Element); if (Result) { - TypedInit *TInit = dynamic_cast<TypedInit *>(Result); - if (TInit) { + if (TypedInit *TInit = dyn_cast<TypedInit>(Result)) { Init *Result2 = TInit->resolveListElementReference(R, RV, Elt); if (Result2) return Result2; return new VarListElementInit(TInit, Elt); @@ -1475,14 +1462,14 @@ Init *FieldInit::getBit(unsigned Bit) const { Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const { if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName)) - if (ListInit *LI = dynamic_cast<ListInit*>(ListVal)) { + if (ListInit *LI = dyn_cast<ListInit>(ListVal)) { if (Elt >= LI->getSize()) return 0; Init *E = LI->getElement(Elt); // If the element is set to some value, or if we are resolving a // reference to a specific variable and that variable is explicitly // unset, then replace the VarListElementInit with it. - if (RV || !dynamic_cast<UnsetInit*>(E)) + if (RV || !isa<UnsetInit>(E)) return E; } return 0; @@ -1611,7 +1598,7 @@ RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P) } const std::string &RecordVal::getName() const { - StringInit *NameString = dynamic_cast<StringInit *>(Name); + StringInit *NameString = dyn_cast<StringInit>(Name); assert(NameString && "RecordVal name is not a string!"); return NameString->getValue(); } @@ -1641,7 +1628,7 @@ void Record::init() { void Record::checkName() { // Ensure the record name has string type. - const TypedInit *TypedName = dynamic_cast<const TypedInit *>(Name); + const TypedInit *TypedName = dyn_cast<const TypedInit>(Name); assert(TypedName && "Record name is not typed!"); RecTy *Type = TypedName->getType(); if (!isa<StringRecTy>(Type)) @@ -1655,8 +1642,7 @@ DefInit *Record::getDefInit() { } const std::string &Record::getName() const { - const StringInit *NameString = - dynamic_cast<const StringInit *>(Name); + const StringInit *NameString = dyn_cast<StringInit>(Name); assert(NameString && "Record name is not a string!"); return NameString->getValue(); } @@ -1773,7 +1759,7 @@ std::string Record::getValueAsString(StringRef FieldName) const { throw "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"; - if (StringInit *SI = dynamic_cast<StringInit*>(R->getValue())) + if (StringInit *SI = dyn_cast<StringInit>(R->getValue())) return SI->getValue(); throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have a string initializer!"; @@ -1789,7 +1775,7 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const { throw "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"; - if (BitsInit *BI = dynamic_cast<BitsInit*>(R->getValue())) + if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue())) return BI; throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have a BitsInit initializer!"; @@ -1805,7 +1791,7 @@ ListInit *Record::getValueAsListInit(StringRef FieldName) const { throw "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"; - if (ListInit *LI = dynamic_cast<ListInit*>(R->getValue())) + if (ListInit *LI = dyn_cast<ListInit>(R->getValue())) return LI; throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have a list initializer!"; @@ -1820,7 +1806,7 @@ Record::getValueAsListOfDefs(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); std::vector<Record*> Defs; for (unsigned i = 0; i < List->getSize(); i++) { - if (DefInit *DI = dynamic_cast<DefInit*>(List->getElement(i))) { + if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i))) { Defs.push_back(DI->getDef()); } else { throw "Record `" + getName() + "', field `" + FieldName.str() + @@ -1840,7 +1826,7 @@ int64_t Record::getValueAsInt(StringRef FieldName) const { throw "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"; - if (IntInit *II = dynamic_cast<IntInit*>(R->getValue())) + if (IntInit *II = dyn_cast<IntInit>(R->getValue())) return II->getValue(); throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have an int initializer!"; @@ -1855,7 +1841,7 @@ Record::getValueAsListOfInts(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); std::vector<int64_t> Ints; for (unsigned i = 0; i < List->getSize(); i++) { - if (IntInit *II = dynamic_cast<IntInit*>(List->getElement(i))) { + if (IntInit *II = dyn_cast<IntInit>(List->getElement(i))) { Ints.push_back(II->getValue()); } else { throw "Record `" + getName() + "', field `" + FieldName.str() + @@ -1874,7 +1860,7 @@ Record::getValueAsListOfStrings(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); std::vector<std::string> Strings; for (unsigned i = 0; i < List->getSize(); i++) { - if (StringInit *II = dynamic_cast<StringInit*>(List->getElement(i))) { + if (StringInit *II = dyn_cast<StringInit>(List->getElement(i))) { Strings.push_back(II->getValue()); } else { throw "Record `" + getName() + "', field `" + FieldName.str() + @@ -1894,7 +1880,7 @@ Record *Record::getValueAsDef(StringRef FieldName) const { throw "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"; - if (DefInit *DI = dynamic_cast<DefInit*>(R->getValue())) + if (DefInit *DI = dyn_cast<DefInit>(R->getValue())) return DI->getDef(); throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have a def initializer!"; @@ -1910,7 +1896,7 @@ bool Record::getValueAsBit(StringRef FieldName) const { throw "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"; - if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue())) + if (BitInit *BI = dyn_cast<BitInit>(R->getValue())) return BI->getValue(); throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have a bit initializer!"; @@ -1927,7 +1913,7 @@ bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const { return false; } Unset = false; - if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue())) + if (BitInit *BI = dyn_cast<BitInit>(R->getValue())) return BI->getValue(); throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have a bit initializer!"; @@ -1943,7 +1929,7 @@ DagInit *Record::getValueAsDag(StringRef FieldName) const { throw "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"; - if (DagInit *DI = dynamic_cast<DagInit*>(R->getValue())) + if (DagInit *DI = dyn_cast<DagInit>(R->getValue())) return DI; throw "Record `" + getName() + "', field `" + FieldName.str() + "' does not have a dag initializer!"; @@ -2004,7 +1990,7 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const { /// to CurRec's name. Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *Name, const std::string &Scoper) { - RecTy *Type = dynamic_cast<TypedInit *>(Name)->getType(); + RecTy *Type = dyn_cast<TypedInit>(Name)->getType(); BinOpInit *NewName = BinOpInit::get(BinOpInit::STRCONCAT, diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 0ed75f014ee..b1f9f724efd 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -93,7 +93,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, // Do not allow assignments like 'X = X'. This will just cause infinite loops // in the resolution machinery. if (BitList.empty()) - if (VarInit *VI = dynamic_cast<VarInit*>(V)) + if (VarInit *VI = dyn_cast<VarInit>(V)) if (VI->getNameInit() == ValName) return false; @@ -102,7 +102,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, // initializer. // if (!BitList.empty()) { - BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue()); + BitsInit *CurVal = dyn_cast<BitsInit>(RV->getValue()); if (CurVal == 0) return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' is not a bits type"); @@ -114,7 +114,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, } // We should have a BitsInit type now. - BitsInit *BInit = dynamic_cast<BitsInit*>(BI); + BitsInit *BInit = dyn_cast<BitsInit>(BI); assert(BInit != 0); SmallVector<Init *, 16> NewBits(CurVal->getNumBits()); @@ -310,7 +310,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ if (IterVals.size() != Loops.size()) { assert(IterVals.size() < Loops.size()); ForeachLoop &CurLoop = Loops[IterVals.size()]; - ListInit *List = dynamic_cast<ListInit *>(CurLoop.ListValue); + ListInit *List = dyn_cast<ListInit>(CurLoop.ListValue); if (List == 0) { Error(Loc, "Loop list is not a list"); return true; @@ -335,7 +335,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ // Set the iterator values now. for (unsigned i = 0, e = IterVals.size(); i != e; ++i) { VarInit *IterVar = IterVals[i].IterVar; - TypedInit *IVal = dynamic_cast<TypedInit *>(IterVals[i].IterValue); + TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue); if (IVal == 0) { Error(Loc, "foreach iterator value is untyped"); return true; @@ -406,8 +406,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) { RecTy *Type = 0; if (CurRec) { - const TypedInit *CurRecName = - dynamic_cast<const TypedInit *>(CurRec->getNameInit()); + const TypedInit *CurRecName = dyn_cast<TypedInit>(CurRec->getNameInit()); if (!CurRecName) { TokError("Record name is not typed!"); return 0; @@ -780,7 +779,7 @@ Init *TGParser::ParseIDValue(Record *CurRec, for (LoopVector::iterator i = Loops.begin(), iend = Loops.end(); i != iend; ++i) { - VarInit *IterVar = dynamic_cast<VarInit *>(i->IterVar); + VarInit *IterVar = dyn_cast<VarInit>(i->IterVar); if (IterVar && IterVar->getName() == Name) return IterVar; } @@ -855,9 +854,9 @@ Init *TGParser::ParseOperation(Record *CurRec) { if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL || Code == UnOpInit::EMPTY) { - ListInit *LHSl = dynamic_cast<ListInit*>(LHS); - StringInit *LHSs = dynamic_cast<StringInit*>(LHS); - TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS); + ListInit *LHSl = dyn_cast<ListInit>(LHS); + StringInit *LHSs = dyn_cast<StringInit>(LHS); + TypedInit *LHSt = dyn_cast<TypedInit>(LHS); if (LHSl == 0 && LHSs == 0 && LHSt == 0) { TokError("expected list or string type argument in unary operator"); return 0; @@ -884,7 +883,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { } if (LHSl) { Init *Item = LHSl->getElement(0); - TypedInit *Itemt = dynamic_cast<TypedInit*>(Item); + TypedInit *Itemt = dyn_cast<TypedInit>(Item); if (Itemt == 0) { TokError("untyped list element in unary operator"); return 0; @@ -1046,24 +1045,24 @@ Init *TGParser::ParseOperation(Record *CurRec) { RecTy *MHSTy = 0; RecTy *RHSTy = 0; - if (TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS)) + if (TypedInit *MHSt = dyn_cast<TypedInit>(MHS)) MHSTy = MHSt->getType(); - if (BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS)) + if (BitsInit *MHSbits = dyn_cast<BitsInit>(MHS)) MHSTy = BitsRecTy::get(MHSbits->getNumBits()); - if (dynamic_cast<BitInit*>(MHS)) + if (isa<BitInit>(MHS)) MHSTy = BitRecTy::get(); - if (TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS)) + if (TypedInit *RHSt = dyn_cast<TypedInit>(RHS)) RHSTy = RHSt->getType(); - if (BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS)) + if (BitsInit *RHSbits = dyn_cast<BitsInit>(RHS)) RHSTy = BitsRecTy::get(RHSbits->getNumBits()); - if (dynamic_cast<BitInit*>(RHS)) + if (isa<BitInit>(RHS)) RHSTy = BitRecTy::get(); // For UnsetInit, it's typed from the other hand. - if (dynamic_cast<UnsetInit*>(MHS)) + if (isa<UnsetInit>(MHS)) MHSTy = RHSTy; - if (dynamic_cast<UnsetInit*>(RHS)) + if (isa<UnsetInit>(RHS)) RHSTy = MHSTy; if (!MHSTy || !RHSTy) { @@ -1082,7 +1081,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { break; } case tgtok::XForEach: { - TypedInit *MHSt = dynamic_cast<TypedInit *>(MHS); + TypedInit *MHSt = dyn_cast<TypedInit>(MHS); if (MHSt == 0) { TokError("could not get type for !foreach"); return 0; @@ -1091,7 +1090,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { break; } case tgtok::XSubst: { - TypedInit *RHSt = dynamic_cast<TypedInit *>(RHS); + TypedInit *RHSt = dyn_cast<TypedInit>(RHS); if (RHSt == 0) { TokError("could not get type for !subst"); return 0; @@ -1315,7 +1314,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end(); i != ie; ++i) { - TypedInit *TArg = dynamic_cast<TypedInit*>(*i); + TypedInit *TArg = dyn_cast<TypedInit>(*i); if (TArg == 0) { TokError("Untyped list element"); return 0; @@ -1498,7 +1497,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { // Create a !strconcat() operation, first casting each operand to // a string if necessary. - TypedInit *LHS = dynamic_cast<TypedInit *>(Result); + TypedInit *LHS = dyn_cast<TypedInit>(Result); if (!LHS) { Error(PasteLoc, "LHS of paste is not typed!"); return 0; @@ -1525,7 +1524,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { default: Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode); - RHS = dynamic_cast<TypedInit *>(RHSResult); + RHS = dyn_cast<TypedInit>(RHSResult); if (!RHS) { Error(PasteLoc, "RHS of paste is not typed!"); return 0; @@ -1716,7 +1715,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { default: TokError("Unknown token when expecting a range list"); return 0; case tgtok::l_square: { // '[' ValueList ']' Init *List = ParseSimpleValue(0, 0, ParseForeachMode); - ForeachListValue = dynamic_cast<ListInit*>(List); + ForeachListValue = dyn_cast<ListInit>(List); if (ForeachListValue == 0) { TokError("Expected a Value list"); return 0; @@ -2257,7 +2256,7 @@ InstantiateMulticlassDef(MultiClass &MC, Init *DefName = DefProto->getNameInit(); - StringInit *DefNameString = dynamic_cast<StringInit *>(DefName); + StringInit *DefNameString = dyn_cast<StringInit>(DefName); if (DefNameString != 0) { // We have a fully expanded string so there are no operators to diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 6b98d446b00..ae531c4ea88 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -102,8 +102,6 @@ public: virtual void print(raw_ostream &O) const; void print(raw_ostream *O) const { if (O) print(*O); } void dump() const; - - static bool classof(const ARMConstantPoolValue *) { return true; } }; inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { @@ -158,7 +156,6 @@ public: static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); } - static bool classof(const ARMConstantPoolConstant *) { return true; } }; /// ARMConstantPoolSymbol - ARM-specific constantpool values for external @@ -192,7 +189,6 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isExtSymbol(); } - static bool classof(const ARMConstantPoolSymbol *) { return true; } }; /// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic @@ -225,7 +221,6 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isMachineBasicBlock(); } - static bool classof(const ARMConstantPoolMBB *) { return true; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp index d88bf0c8fa1..7bca0edf915 100644 --- a/lib/Target/ARM/ARMELFWriterInfo.cpp +++ b/lib/Target/ARM/ARMELFWriterInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64, + : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64, TM.getDataLayout()->isLittleEndian()) { } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 051aab05cbd..b2eb5784879 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -122,6 +122,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); @@ -1655,22 +1656,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// and then confiscate the rest of the parameter registers to insure /// this. void -ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { +ARMTargetLowering::HandleByVal( + CCState *State, unsigned &size, unsigned Align) const { unsigned reg = State->AllocateReg(GPRArgRegs, 4); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); if ((!State->isFirstByValRegValid()) && (ARM::R0 <= reg) && (reg <= ARM::R3)) { - State->setFirstByValReg(reg); - // At a call site, a byval parameter that is split between - // registers and memory needs its size truncated here. In a - // function prologue, such byval parameters are reassembled in - // memory, and are not truncated. - if (State->getCallOrPrologue() == Call) { - unsigned excess = 4 * (ARM::R4 - reg); - assert(size >= excess && "expected larger existing stack allocation"); - size -= excess; + if (Subtarget->isAAPCS_ABI() && Align > 4) { + unsigned AlignInRegs = Align / 4; + unsigned Waste = (ARM::R4 - reg) % AlignInRegs; + for (unsigned i = 0; i < Waste; ++i) + reg = State->AllocateReg(GPRArgRegs, 4); + } + if (reg != 0) { + State->setFirstByValReg(reg); + // At a call site, a byval parameter that is split between + // registers and memory needs its size truncated here. In a + // function prologue, such byval parameters are reassembled in + // memory, and are not truncated. + if (State->getCallOrPrologue() == Call) { + unsigned excess = 4 * (ARM::R4 - reg); + assert(size >= excess && "expected larger existing stack allocation"); + size -= excess; + } } } // Confiscate any remaining parameter registers to preclude their @@ -1803,6 +1813,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } + // If Caller's vararg or byval argument has been split between registers and + // stack, do not perform tail call, since part of the argument is in caller's + // local frame. + const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). + getInfo<ARMFunctionInfo>(); + if (AFI_Caller->getVarArgsRegSaveSize()) + return false; + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { @@ -4221,9 +4239,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could // just use VDUPLANE. - if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) - N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // We need to create a new undef vector to use for the VDUPLANE if the + // size of the vector from which we get the value is different than the + // size of the vector that we need to create. We will insert the element + // such that the register coalescer will remove unnecessary copies. + if (VT != Value->getOperand(0).getValueType()) { + ConstantSDNode *constIndex; + constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); + assert(constIndex && "The index is not a constant!"); + unsigned index = constIndex->getAPIntValue().getLimitedValue() % + VT.getVectorNumElements(); + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), + Value, DAG.getConstant(index, MVT::i32)), + DAG.getConstant(index, MVT::i32)); + } else { + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, Value->getOperand(0), Value->getOperand(1)); + } + } else N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index b5020c97108..9acab0b0834 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -480,7 +480,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; /// HandleByVal - Target-specific cleanup for ByVal support. - virtual void HandleByVal(CCState *, unsigned &) const; + virtual void HandleByVal(CCState *, unsigned &, unsigned) const; /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index de655f1a0ee..ede4def2b73 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4500,12 +4500,25 @@ def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, Requires<[HasNEON]>; +def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, Requires<[HasNEON]>; +def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VCNTiQ, @@ -4525,11 +4538,23 @@ def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasNEON]>; +def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, Requires<[HasNEON]>; +def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 6fdf873a8f0..c51ae24c50e 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -71,7 +71,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { + FrameLowering(Subtarget), + STTI(&TLInfo) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -104,7 +105,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)), + STTI(&TLInfo){ } namespace { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index f91e5bbd477..7a65a7f062d 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -25,6 +25,7 @@ #include "Thumb1FrameLowering.h" #include "Thumb2InstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" @@ -67,6 +68,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -88,7 +91,12 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual const ARMFrameLowering *getFrameLowering() const { return &FrameLowering; } - + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const DataLayout *getDataLayout() const { return &DL; } virtual const ARMELFWriterInfo *getELFWriterInfo() const { @@ -110,6 +118,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr<ARMFrameLowering> FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -138,6 +148,12 @@ public: virtual const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } virtual const DataLayout *getDataLayout() const { return &DL; } virtual const ARMELFWriterInfo *getELFWriterInfo() const { return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 93e5eca6252..0eec8622e97 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -259,9 +259,10 @@ public: unsigned checkTargetMatchPredicate(MCInst &Inst); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); }; } // end anonymous namespace @@ -7474,17 +7475,14 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { static const char *getSubtargetFeatureName(unsigned Val); bool ARMAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; - unsigned Kind; - unsigned ErrorInfo; unsigned MatchResult; - MatchInstMapAndConstraints MapAndConstraints; - MatchResult = MatchInstructionImpl(Operands, Kind, Inst, - MapAndConstraints, ErrorInfo, - /*matchingInlineAsm*/ false); + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); switch (MatchResult) { default: break; case Match_Success: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 6bddc42b373..b404e6c6e01 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -67,9 +67,6 @@ public: static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } - - static bool classof(const ARMMCExpr *) { return true; } - }; } // end namespace llvm diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 096ef001ed3..48df199437b 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_library(LLVMTarget TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp + TargetTransformImpl.cpp ) foreach(t ${LLVM_TARGETS_TO_BUILD}) diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index a37ad7f85ae..e92ad01e1d5 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -43,7 +43,8 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + InstrItins(Subtarget.getInstrItineraryData()), + STTI(&TLInfo){ } //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 58699a30d26..7f53ea6fbeb 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -20,6 +20,7 @@ #include "SPUSelectionDAGInfo.h" #include "SPUFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" namespace llvm { @@ -34,6 +35,8 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUTargetLowering TLInfo; SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -77,6 +80,12 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 096e2bc13b0..61fb4e98ecd 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -474,9 +474,9 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, Out << "AttributeWithIndex PAWI;"; nl(Out); for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { unsigned index = PAL.getSlot(i).Index; - Attributes::Builder attrs(PAL.getSlot(i).Attrs); + AttrBuilder attrs(PAL.getSlot(i).Attrs); Out << "PAWI.Index = " << index << "U;\n"; - Out << " Attributes::Builder B;\n"; + Out << " AttrBuilder B;\n"; #define HANDLE_ATTR(X) \ if (attrs.hasAttribute(Attributes::X)) \ @@ -509,13 +509,11 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, HANDLE_ATTR(NonLazyBind); #undef HANDLE_ATTR if (attrs.hasAttribute(Attributes::StackAlignment)) - Out << "B.addStackAlignmentAttr(Attribute::constructStackAlignmentFromInt(" - << attrs.getStackAlignment() - << "))"; + Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")"; nl(Out); attrs.removeAttribute(Attributes::StackAlignment); assert(!attrs.hasAttributes() && "Unhandled attribute!"); - Out << "PAWI.Attrs = Attributes::get(B);"; + Out << "PAWI.Attrs = Attributes::get(mod->getContext(), B);"; nl(Out); Out << "Attrs.push_back(PAWI);"; nl(Out); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index d198a3f45b5..353542a8097 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -74,7 +74,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), - InstrItins(&Subtarget.getInstrItineraryData()) { + InstrItins(&Subtarget.getInstrItineraryData()), + STTI(&TLInfo) { setMCUseCFI(false); } @@ -87,7 +88,7 @@ bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { PM.add(createDeadCodeEliminationPass()); PM.add(createConstantPropagationPass()); PM.add(createLoopUnrollPass()); - PM.add(createLoopStrengthReducePass(getTargetLowering())); + PM.add(createLoopStrengthReducePass()); return true; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index ade5b3e9c1f..7a4215c119a 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -21,6 +21,7 @@ #include "HexagonFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -34,6 +35,8 @@ class HexagonTargetMachine : public LLVMTargetMachine { HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; const InstrItineraryData* InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, @@ -68,6 +71,14 @@ public: return &TSInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } + virtual const DataLayout *getDataLayout() const { return &DL; } static unsigned getModuleMatchQuality(const Module &M); diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index d1e18b24c39..9e28a3d7d09 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -44,9 +44,10 @@ class MBlazeAsmParser : public MCTargetAsmParser { bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); /// @name Auto-generated Match Functions /// { @@ -312,15 +313,13 @@ static unsigned MatchRegisterName(StringRef Name); /// } // bool MBlazeAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; - unsigned Kind; - unsigned ErrorInfo; - MatchInstMapAndConstraints MapAndConstraints; - switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints, - ErrorInfo, /*matchingInlineAsm*/ false)) { + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm)) { default: break; case Match_Success: Out.EmitInstruction(Inst); diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp index 4ca30ba81f7..6b575099e59 100644 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64, + : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64, TM.getDataLayout()->isLittleEndian()) { } diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 91aaf940e62..1c2e3b26613 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -83,7 +83,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { #undef GET_INTRINSIC_OVERLOAD_TABLE } -/// This defines the "getAttributes(ID id)" method. +/// This defines the "getAttributes(LLVMContext &C, ID id)" method. #define GET_INTRINSIC_ATTRIBUTES #include "MBlazeGenIntrinsics.inc" #undef GET_INTRINSIC_ATTRIBUTES @@ -104,7 +104,8 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, Type **Tys, unsigned numTy) const { assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); - AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID); + AttrListPtr AList = getAttributes(M->getContext(), + (mblazeIntrinsic::ID) IntrID); return cast<Function>(M->getOrInsertFunction(getName(IntrID), getType(M->getContext(), IntrID), AList)); diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 1f2cf6d9d2f..cb5f46062d9 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -42,7 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo) { } namespace { diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index d949e54f0d8..34648b9b9ae 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -25,6 +25,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -39,6 +40,8 @@ namespace llvm { MBlazeIntrinsicInfo IntrinsicInfo; MBlazeELFWriterInfo ELFWriterInfo; InstrItineraryData InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: MBlazeTargetMachine(const Target &T, StringRef TT, @@ -77,6 +80,10 @@ namespace llvm { virtual const MBlazeELFWriterInfo *getELFWriterInfo() const { return &ELFWriterInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const + { return &STTI; } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const + { return &VTTI; } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index fc677aec38e..113378a5f31 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -881,7 +881,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = TD->getPointerSize(); + uint64_t SlotSize = TD->getPointerSize(0); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, true); FuncInfo->setRAIndex(ReturnAddrIndex); @@ -901,7 +901,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(TD->getPointerSize(), MVT::i16); + DAG.getConstant(TD->getPointerSize(0), MVT::i16); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index da5899b86d5..29ea6812162 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, // FIXME: Check DataLayout string. DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { } + FrameLowering(Subtarget), STTI(&TLInfo) { } namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index ba3cef1f2ad..186172ede42 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -24,6 +24,7 @@ #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -36,6 +37,8 @@ class MSP430TargetMachine : public LLVMTargetMachine { MSP430TargetLowering TLInfo; MSP430SelectionDAGInfo TSInfo; MSP430FrameLowering FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: MSP430TargetMachine(const Target &T, StringRef TT, @@ -61,7 +64,12 @@ public: virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; // MSP430TargetMachine. diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index c2980ffeea8..00649d2f187 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -67,9 +67,10 @@ class MipsAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); @@ -452,16 +453,13 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, } bool MipsAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; - unsigned Kind; - unsigned ErrorInfo; - MatchInstMapAndConstraints MapAndConstraints; - unsigned MatchResult = MatchInstructionImpl(Operands, Kind, Inst, - MapAndConstraints, ErrorInfo, - /*matchingInlineAsm*/ false); + unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); switch (MatchResult) { default: break; diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 127c5b89e8d..8991433005d 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -58,12 +58,22 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - unsigned Opc = 0, ZeroReg = 0; + unsigned Opc = 0; + + if (Mips::CPU16RegsRegClass.contains(DestReg) && + Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::MoveR3216; + else if (Mips::CPURegsRegClass.contains(DestReg) && + Mips::CPU16RegsRegClass.contains(SrcReg)) + Opc = Mips::Move32R16; + else if ((SrcReg == Mips::HI) && + (Mips::CPU16RegsRegClass.contains(DestReg))) + Opc = Mips::Mfhi16, SrcReg = 0; + + else if ((SrcReg == Mips::LO) && + (Mips::CPU16RegsRegClass.contains(DestReg))) + Opc = Mips::Mflo16, SrcReg = 0; - if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. - if (Mips::CPURegsRegClass.contains(SrcReg)) - Opc = Mips::Move32R16; - } assert(Opc && "Cannot copy registers"); @@ -72,9 +82,6 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (DestReg) MIB.addReg(DestReg, RegState::Define); - if (ZeroReg) - MIB.addReg(ZeroReg); - if (SrcReg) MIB.addReg(SrcReg, getKillRegState(KillSrc)); } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index e1c90466fbf..eba201a0ea9 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -118,6 +118,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), !strconcat(asmstr, "\t$rx, $ry"), [], itin> { } + +// +// maybe refactor but need a $zero as a dummy first parameter +// +class FRR16_div_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs ), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$$zero, $rx, $ry"), [], itin> ; + class FRR16_M_ins<bits<5> f, string asmstr, InstrItinClass itin> : FRR16<f, (outs CPU16Regs:$rx), (ins), @@ -196,6 +204,24 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>; // To do a bitwise logical AND. def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; +// +// Format: DIV rx, ry MIPS16e +// Purpose: Divide Word +// To divide 32-bit signed integers. +// +def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { + let Defs = [HI, LO]; +} + +// +// Format: DIVU rx, ry MIPS16e +// Purpose: Divide Unsigned Word +// To divide 32-bit unsigned integers. +// +def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { + let Defs = [HI, LO]; +} + // // Format: JR ra MIPS16e @@ -551,5 +577,20 @@ def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>; // Small immediates def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; +// +// MipsDivRem +// +def: Mips16Pat + <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry), + (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; + +// +// MipsDivRemU +// +def: Mips16Pat + <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry), + (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; + + def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index bd472d6f67e..99a9f25abd2 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -315,3 +315,33 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; // Instruction aliases //===----------------------------------------------------------------------===// def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>; + +/// Move between CPU and coprocessor registers +let DecoderNamespace = "Mips64" in { +def MFC0_3OP64 : MFC3OP<0x10, 0, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">; +def MTC0_3OP64 : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">; +def MFC2_3OP64 : MFC3OP<0x12, 0, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">; +def MTC2_3OP64 : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">; +def DMFC0_3OP64 : MFC3OP<0x10, 1, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">; +def DMTC0_3OP64 : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">; +def DMFC2_3OP64 : MFC3OP<0x12, 1, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">; +def DMTC2_3OP64 : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">; +} +// Two operand (implicit 0 selector) versions: +def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; + diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 9c196dd82f3..4c3981d9f68 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT, InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(*this), TSInfo(*this), JITInfo(), - ELFWriterInfo(false, isLittle) { + ELFWriterInfo(false, isLittle), STTI(&TLInfo) { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 3a01828dd1d..60822d0c055 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -24,6 +24,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -38,6 +39,8 @@ class MipsTargetMachine : public LLVMTargetMachine { MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; MipsELFWriterInfo ELFWriterInfo; + ScalarTargetTransformImpl STTI; + VectorTargetTransformInfo VTTI; public: MipsTargetMachine(const Target &T, StringRef TT, @@ -74,6 +77,12 @@ public: virtual const MipsELFWriterInfo *getELFWriterInfo() const { return &ELFWriterInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index d3dfb35e261..c46094569e9 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -126,8 +126,10 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { return Base; // Truncate/sext the offset to the pointer size. - if (TD.getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(); + unsigned AS = PtrVal->getType()->isPointerTy() ? + cast<PointerType>(PtrVal->getType())->getAddressSpace() : 0; + if (TD.getPointerSizeInBits(AS) != 64) { + int SExtAmount = 64-TD.getPointerSizeInBits(AS); Offset = (Offset << SExtAmount) >> SExtAmount; } @@ -1378,7 +1380,7 @@ getOpenCLAlignment(const DataLayout *TD, const FunctionType *FTy = dyn_cast<FunctionType>(Ty); if (FTy) - return TD->getPointerPrefAlignment(); + return TD->getPointerPrefAlignment(0); return TD->getPrefTypeAlignment(Ty); } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index dbfc660687e..7519b4a0831 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -72,7 +72,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), + STTI(&TLInfo) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index d58a0768581..11bc9d4fa69 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -25,6 +25,7 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -44,6 +45,9 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // Hold Strings that can be free'd all together with NVPTXTargetMachine ManagedStringPool ManagedStrPool; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; + //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); @@ -72,6 +76,12 @@ public: virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } //virtual bool addInstSelector(PassManagerBase &PM, // CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 914a9b0dcea..d8abd9fba07 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -439,7 +439,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { bool PPCLinuxAsmPrinter::doFinalization(Module &M) { const DataLayout *TD = TM.getDataLayout(); - bool isPPC64 = TD->getPointerSizeInBits() == 64; + bool isPPC64 = TD->getPointerSizeInBits(0) == 64; if (isPPC64 && !TOC.empty()) { const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", @@ -451,8 +451,8 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), E = TOC.end(); I != E; ++I) { OutStreamer.EmitLabel(I->second); - OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) + - "[TC]," + I->first->getName()); + MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName()); + OutStreamer.EmitTCEntry(*S); } } @@ -545,7 +545,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64; const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); @@ -640,7 +640,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool PPCDarwinAsmPrinter::doFinalization(Module &M) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64; // Darwin/PPC always uses mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index b1c02e57f88..caf7bf2be79 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -50,6 +50,11 @@ static const uint16_t VRRegNo[] = { /// to manipulate the VRSAVE register, even though it uses vector registers. /// This can happen when the only registers used are known to be live in or out /// of the function. Remove all of the VRSAVE related code from the function. +/// FIXME: The removal of the code results in a compile failure at -O0 when the +/// function contains a function call, as the GPR containing original VRSAVE +/// contents is spilled and reloaded around the call. Without the prolog code, +/// the spill instruction refers to an undefined register. This code needs +/// to account for all uses of that GPR. static void RemoveVRSaveCode(MachineInstr *MI) { MachineBasicBlock *Entry = MI->getParent(); MachineFunction *MF = Entry->getParent(); @@ -283,12 +288,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. - for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { - if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { - HandleVRSaveUpdate(MBBI, TII); - break; + if (!Subtarget.isSVR4ABI()) + for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, TII); + break; + } } - } // Move MBBI back to the beginning of the function. MBBI = MBB.begin(); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b52452ce89b..6195441cfc0 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -53,7 +53,9 @@ namespace { GlobalBaseReg = 0; SelectionDAGISel::runOnMachineFunction(MF); - InsertVRSaveCode(MF); + if (!PPCSubTarget.isSVR4ABI()) + InsertVRSaveCode(MF); + return true; } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 64bbcdfa94e..c18250a78f7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2035,9 +2035,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // All aggregates smaller than 8 bytes must be passed right-justified. - if (ObjSize==1 || ObjSize==2) { - CurArgOffset = CurArgOffset + (4 - ObjSize); - } + if (ObjSize < PtrByteSize) + CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); // The value of the object is its address. int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -2087,7 +2086,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ++GPR_idx; ArgOffset += PtrByteSize; } else { - ArgOffset += ArgSize - (ArgOffset-CurArgOffset); + ArgOffset += ArgSize - j; break; } } @@ -2142,6 +2141,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ++FPR_idx; } else { needsLoad = true; + ArgSize = PtrByteSize; } ArgOffset += 8; @@ -3638,12 +3638,13 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee, ArgOffset += PtrByteSize; } else { - SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); - // This must go outside the CALLSEQ_START..END. + // The MEMCPY must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), @@ -3652,6 +3653,25 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee, ArgOffset += PtrByteSize; } continue; + } else if (isSVR4ABI && GPR_idx == NumGPRs && Size < 8) { + // Case: Size is 3, 5, 6, or 7 for SVR4 and we're out of registers. + // This is the same case as 1, 2, and 4 for SVR4 with no registers. + // FIXME: Separate into 64-bit SVR4 and Darwin versions of this + // function, and combine the duplicated code chunks. + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + // The MEMCPY must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + ArgOffset += PtrByteSize; + continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into @@ -3786,6 +3806,13 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee, ++GPR_idx; } } else { + // Single-precision floating-point values are mapped to the + // second (rightmost) word of the stack doubleword. + if (Arg.getValueType() == MVT::f32 && isPPC64 && isSVR4ABI) { + SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); + } + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, false, MemOpChains, TailCallArguments, dl); diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d2df6645bb0..d9d68446f53 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -570,12 +570,15 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // STVX VAL, 0, R0 // // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); + unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), FrameIdx, 0, 0)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) .addReg(SrcReg, getKillRegState(isKill)) - .addReg(PPC::R0) - .addReg(PPC::R0)); + .addReg(GPR0) + .addReg(GPR0)); } else { llvm_unreachable("Unknown regclass!"); } @@ -707,10 +710,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // Dest = LVX 0, R0 // // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); + unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0) - .addReg(PPC::R0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) + .addReg(GPR0)); } else { llvm_unreachable("Unknown regclass!"); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 459c3589d3f..d1232114732 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -498,7 +498,7 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, } else if (CRSpillFrameIdx) { FrameIdx = CRSpillFrameIdx; } else { - MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo(); + MachineFrameInfo *MFI = (const_cast<MachineFunction &>(MF)).getFrameInfo(); FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); CRSpillFrameIdx = FrameIdx; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 5f39b8d2c29..b8613834753 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -43,7 +43,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + InstrItins(Subtarget.getInstrItineraryData()), + STTI(&TLInfo){ // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 02d69fd15d1..c168433a71b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -21,6 +21,7 @@ #include "PPCISelLowering.h" #include "PPCSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" namespace llvm { @@ -36,6 +37,8 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -63,6 +66,12 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 8b7559c2f9e..1d8cc771ddf 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayout()), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { + FrameLowering(Subtarget),STTI(&TLInfo) { } namespace { diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index c9f2d68eb19..0fbe2d7cda3 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -22,6 +22,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -32,6 +33,8 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -52,6 +55,12 @@ public: virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index b0b5c875b82..393178a4692 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -26,6 +26,7 @@ using namespace llvm; void llvm::initializeTarget(PassRegistry &Registry) { initializeDataLayoutPass(Registry); initializeTargetLibraryInfoPass(Registry); + initializeTargetTransformInfoPass(Registry); } void LLVMInitializeTarget(LLVMPassRegistryRef R) { @@ -55,13 +56,21 @@ LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) { } unsigned LLVMPointerSize(LLVMTargetDataRef TD) { - return unwrap(TD)->getPointerSize(); + return unwrap(TD)->getPointerSize(0); +} + +unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) { + return unwrap(TD)->getPointerSize(AS); } LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) { return wrap(unwrap(TD)->getIntPtrType(getGlobalContext())); } +LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) { + return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS)); +} + unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) { return unwrap(TD)->getTypeSizeInBits(unwrap(Ty)); } diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp new file mode 100644 index 00000000000..1cb5edab9d0 --- /dev/null +++ b/lib/Target/TargetTransformImpl.cpp @@ -0,0 +1,43 @@ +// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetTransformImpl.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const { + return TLI->isLegalAddImmediate(imm); +} + +bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const { + return TLI->isLegalICmpImmediate(imm); +} + +bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + return TLI->isLegalAddressingMode(AM, Ty); +} + +bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const { + return TLI->isTruncateFree(Ty1, Ty2); +} + +bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const { + EVT T = TLI->getValueType(Ty); + return TLI->isTypeLegal(T); +} + +unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const { + return TLI->getJumpBufAlignment(); +} + +unsigned ScalarTargetTransformImpl::getJumpBufSize() const { + return TLI->getJumpBufSize(); +} diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index df34359a661..454664e3ed1 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -40,8 +40,8 @@ private: bool Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(), - bool matchingInlineAsm = false) { - if (matchingInlineAsm) return true; + bool MatchingInlineAsm = false) { + if (MatchingInlineAsm) return true; return Parser.Error(L, Msg, Ranges); } @@ -63,14 +63,10 @@ private: bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); - bool MatchInstruction(SMLoc IDLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &Kind, unsigned &Opcode, - MatchInstMapAndConstraintsImpl &MapAndConstraints, - unsigned &OrigErrorInfo, bool matchingInlineAsm = false); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. @@ -756,6 +752,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getParser().ParseExpression(Disp, End)) return 0; + End = Parser.getTok().getLoc(); return X86Operand::CreateMem(Disp, Start, End, Size); } @@ -1520,29 +1517,18 @@ processInstruction(MCInst &Inst, } bool X86AsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { - unsigned Kind; - unsigned Opcode; - unsigned ErrorInfo; - MatchInstMapAndConstraints MapAndConstraints; - bool Error = MatchInstruction(IDLoc, Operands, Out, Kind, Opcode, - MapAndConstraints, ErrorInfo); - return Error; -} - -bool X86AsmParser:: -MatchInstruction(SMLoc IDLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &Kind, unsigned &Opcode, - SmallVectorImpl<std::pair< unsigned, std::string > > &MapAndConstraints, - unsigned &OrigErrorInfo, bool matchingInlineAsm) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>(); + // Clear the opcode. + Opcode = ~0x0; + // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. // Also, MatchInstructionImpl should actually *do* the EmitInstruction @@ -1554,7 +1540,7 @@ MatchInstruction(SMLoc IDLoc, MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); - if (!matchingInlineAsm) + if (!MatchingInlineAsm) Out.EmitInstruction(Inst); const char *Repl = @@ -1577,26 +1563,26 @@ MatchInstruction(SMLoc IDLoc, MCInst Inst; // First, try a direct match. - switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints, - OrigErrorInfo, matchingInlineAsm, + switch (MatchInstructionImpl(Operands, Inst, + ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { default: break; case Match_Success: // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. - if (!matchingInlineAsm) + if (!MatchingInlineAsm) while (processInstruction(Inst, Operands)) ; Inst.setLoc(IDLoc); - if (!matchingInlineAsm) + if (!MatchingInlineAsm) Out.EmitInstruction(Inst); Opcode = Inst.getOpcode(); return false; case Match_MissingFeature: Error(IDLoc, "instruction requires a CPU feature not currently enabled", - EmptyRanges, matchingInlineAsm); + EmptyRanges, MatchingInlineAsm); return true; case Match_InvalidOperand: WasOriginallyInvalidOperand = true; @@ -1629,24 +1615,18 @@ MatchInstruction(SMLoc IDLoc, Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; - unsigned tKind; - MatchInstMapAndConstraints tMapAndConstraints[4]; - Match1 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[0], - ErrorInfoIgnore, isParsingIntelSyntax()); - if (Match1 == Match_Success) Kind = tKind; + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[1], - ErrorInfoIgnore, isParsingIntelSyntax()); - if (Match2 == Match_Success) Kind = tKind; + Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[2], - ErrorInfoIgnore, isParsingIntelSyntax()); - if (Match3 == Match_Success) Kind = tKind; + Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[3], - ErrorInfoIgnore, isParsingIntelSyntax()); - if (Match4 == Match_Success) Kind = tKind; + Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); // Restore the old token. Op->setTokenValue(Base); @@ -1659,10 +1639,9 @@ MatchInstruction(SMLoc IDLoc, (Match3 == Match_Success) + (Match4 == Match_Success); if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); - if (!matchingInlineAsm) + if (!MatchingInlineAsm) Out.EmitInstruction(Inst); Opcode = Inst.getOpcode(); - // FIXME: Handle the map and constraints. return false; } @@ -1689,7 +1668,7 @@ MatchInstruction(SMLoc IDLoc, OS << "'" << Base << MatchChars[i] << "'"; } OS << ")"; - Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm); + Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); return true; } @@ -1700,28 +1679,28 @@ MatchInstruction(SMLoc IDLoc, if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { - ArrayRef<SMRange> Ranges = matchingInlineAsm ? EmptyRanges : + ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges : Op->getLocRange(); return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", - Ranges, matchingInlineAsm); + Ranges, MatchingInlineAsm); } // Recover location info for the operand if we know which was the problem. - if (OrigErrorInfo != ~0U) { - if (OrigErrorInfo >= Operands.size()) + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction", - EmptyRanges, matchingInlineAsm); + EmptyRanges, MatchingInlineAsm); - X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo]; + X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; if (Operand->getStartLoc().isValid()) { SMRange OperandRange = Operand->getLocRange(); return Error(Operand->getStartLoc(), "invalid operand for instruction", - OperandRange, matchingInlineAsm); + OperandRange, MatchingInlineAsm); } } return Error(IDLoc, "invalid operand for instruction", EmptyRanges, - matchingInlineAsm); + MatchingInlineAsm); } // If one instruction matched with a missing feature, report this as a @@ -1729,7 +1708,7 @@ MatchInstruction(SMLoc IDLoc, if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ Error(IDLoc, "instruction requires a CPU feature not currently enabled", - EmptyRanges, matchingInlineAsm); + EmptyRanges, MatchingInlineAsm); return true; } @@ -1738,13 +1717,13 @@ MatchInstruction(SMLoc IDLoc, if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ Error(IDLoc, "invalid operand for instruction", EmptyRanges, - matchingInlineAsm); + MatchingInlineAsm); return true; } // If all of these were an outright failure, report it in a useless way. Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", - EmptyRanges, matchingInlineAsm); + EmptyRanges, MatchingInlineAsm); return true; } diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 3809f3d3853..0ca1209449f 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -307,7 +307,9 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { }; // This CPU doesnt support long nops. If needed add more. - if (CPU == "geode") { + // FIXME: Can we get this from the subtarget somehow? + if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" || + CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") { for (uint64_t i = 0; i < Count; ++i) OW->Write8(0x90); return true; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index c704ca17013..6b8385db6c6 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" #include "X86MachineFunctionInfo.h" @@ -693,7 +692,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(), 0); + TD->getPointerSize(0), 0); } Stubs.clear(); } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 8acef9dc7ba..e7f817e3a98 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -317,7 +317,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, bool HasFP = hasFP(MF); // Calculate amount of bytes used for return address storing. - int stackGrowth = -TD->getPointerSize(); + int stackGrowth = -TD->getPointerSize(0); // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. @@ -717,7 +717,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { std::vector<MachineMove> &Moves = MMI.getFrameMoves(); const DataLayout *TD = MF.getTarget().getDataLayout(); uint64_t NumBytes = 0; - int stackGrowth = -TD->getPointerSize(); + int stackGrowth = -TD->getPointerSize(0); if (HasFP) { // Calculate required stack adjustment. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 5a19f8ab981..0efeef20f2b 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1302,7 +1302,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, // that are not a MemSDNode, and thus don't have proper addrspace info. Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores - Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + Parent->getOpcode() != X86ISD::TLSCALL && // Fixme + Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp + Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7e43e5432d8..2f09e9e6ff1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -457,6 +457,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, not + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // Darwin ABI issue. setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); @@ -939,6 +947,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); } @@ -2649,7 +2660,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; - uint64_t SlotSize = TD->getPointerSize(); + uint64_t SlotSize = TD->getPointerSize(0); if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { // Number smaller than 12 so just add the difference. Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); @@ -3017,7 +3028,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = TD->getPointerSize(); + uint64_t SlotSize = TD->getPointerSize(0); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, false); FuncInfo->setRAIndex(ReturnAddrIndex); @@ -5161,86 +5172,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 -// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the -// constraint of matching input/output vector elements. -SDValue -X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - SDNode *N = Op.getNode(); - EVT VT = Op.getValueType(); - unsigned NumElts = Op.getNumOperands(); - - // Check supported types and sub-targets. - // - // Only v2f32 -> v2f64 needs special handling. - if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) - return SDValue(); - - SDValue VecIn; - EVT VecInVT; - SmallVector<int, 8> Mask; - EVT SrcVT = MVT::Other; - - // Check the patterns could be translated into X86vfpext. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue In = N->getOperand(i); - unsigned Opcode = In.getOpcode(); - - // Skip if the element is undefined. - if (Opcode == ISD::UNDEF) { - Mask.push_back(-1); - continue; - } - - // Quit if one of the elements is not defined from 'fpext'. - if (Opcode != ISD::FP_EXTEND) - return SDValue(); - - // Check how the source of 'fpext' is defined. - SDValue L2In = In.getOperand(0); - EVT L2InVT = L2In.getValueType(); - - // Check the original type - if (SrcVT == MVT::Other) - SrcVT = L2InVT; - else if (SrcVT != L2InVT) // Quit if non-homogenous typed. - return SDValue(); - - // Check whether the value being 'fpext'ed is extracted from the same - // source. - Opcode = L2In.getOpcode(); - - // Quit if it's not extracted with a constant index. - if (Opcode != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(L2In.getOperand(1))) - return SDValue(); - - SDValue ExtractedFromVec = L2In.getOperand(0); - - if (VecIn.getNode() == 0) { - VecIn = ExtractedFromVec; - VecInVT = ExtractedFromVec.getValueType(); - } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. - return SDValue(); - - Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue()); - } - - // Quit if all operands of BUILD_VECTOR are undefined. - if (!VecIn.getNode()) - return SDValue(); - - // Fill the remaining mask as undef. - for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) - Mask.push_back(-1); - - return DAG.getNode(X86ISD::VFPEXT, DL, VT, - DAG.getVectorShuffle(VecInVT, DL, - VecIn, DAG.getUNDEF(VecInVT), - &Mask[0])); -} - SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5273,10 +5204,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Broadcast.getNode()) return Broadcast; - SDValue FpExt = LowerVectorFpExtend(Op, DAG); - if (FpExt.getNode()) - return FpExt; - unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -7724,7 +7651,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(), false, false, false, 0); - SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), + SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize(0)), getPointerTy()); IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale); @@ -8215,6 +8142,20 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } +SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); + + assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); + + return DAG.getNode(X86ISD::VFPEXT, DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, + In, DAG.getUNDEF(SVT))); +} + SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); @@ -9215,6 +9156,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // X86 doesn't have an i8 cmov. If both operands are the result of a truncate + // widen the cmov and push the truncate through. This avoids introducing a new + // branch during isel and doesn't add any extensions. + if (Op.getValueType() == MVT::i8 && + Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { + SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); + if (T1.getValueType() == T2.getValueType() && + // Blacklist CopyFromReg to avoid partial register stalls. + T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ + SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue); + SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond); + return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); + } + } + // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); @@ -10345,7 +10301,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(TD->getPointerSize(), + DAG.getConstant(TD->getPointerSize(0), Subtarget->is64Bit() ? MVT::i64 : MVT::i32); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), @@ -10377,7 +10333,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { - return DAG.getIntPtrConstant(2*TD->getPointerSize()); + return DAG.getIntPtrConstant(2*TD->getPointerSize(0)); } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { @@ -10392,7 +10348,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(TD->getPointerSize())); + DAG.getIntPtrConstant(TD->getPointerSize(0))); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); @@ -10403,6 +10359,21 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); } +SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) { return Op.getOperand(0); } @@ -11407,6 +11378,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); @@ -11426,6 +11398,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -11535,6 +11509,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } + case ISD::FP_ROUND: { + SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); + Results.push_back(V); + return; + } case ISD::READCYCLECOUNTER: { SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue TheChain = N->getOperand(0); @@ -11713,6 +11692,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; + case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP"; + case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; @@ -11729,6 +11710,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; + case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -12389,12 +12371,9 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, // Hi MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) { + if (i == X86::AddrDisp) MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) - // Don't forget to transfer the target flag. - MachineOperand &MO = MIB->getOperand(MIB->getNumOperands()-1); - MO.setTargetFlags(MI->getOperand(MemOpndSlot + i).getTargetFlags()); - } else + else MIB.addOperand(MI->getOperand(MemOpndSlot + i)); } MIB.setMemRefs(MMOBegin, MMOEnd); @@ -13261,6 +13240,173 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, } MachineBasicBlock * +X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg; + unsigned MemOpndSlot = 0; + + unsigned CurOp = 0; + + DstReg = MI->getOperand(CurOp++).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MemOpndSlot = CurOp; + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + // For v = setjmp(buf), we generate + // + // thisMBB: + // buf[Label_Offset] = ljMBB + // SjLjSetup restoreMBB + // + // mainMBB: + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + // restoreMBB: + // v_restore = 1 + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + MF->push_back(restoreMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // thisMBB: + unsigned PtrImmStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; + const int64_t Label_Offset = 1 * PVT.getStoreSize(); + + // Store IP + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrImmStoreOpc)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(MemOpndSlot + i), Label_Offset); + else + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + } + MIB.addMBB(restoreMBB); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) + .addMBB(restoreMBB); + MIB.addRegMask(RegInfo->getNoPreservedMask()); + thisMBB->addSuccessor(mainMBB); + thisMBB->addSuccessor(restoreMBB); + + // mainMBB: + // EAX = 0 + BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(X86::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(restoreMBB); + + // restoreMBB: + BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); + BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB); + restoreMBB->addSuccessor(sinkMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; + unsigned SP = RegInfo->getStackRegister(); + + MachineInstrBuilder MIB; + + const int64_t Label_Offset = 1 * PVT.getStoreSize(); + const int64_t SP_Offset = 2 * PVT.getStoreSize(); + + unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; + unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; + + // Reload FP + MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(i)); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Reload IP + MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(i), Label_Offset); + else + MIB.addOperand(MI->getOperand(i)); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + // Reload SP + MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(i), SP_Offset); + else + MIB.addOperand(MI->getOperand(i)); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + // Jump + BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); + + MI->eraseFromParent(); + return MBB; +} + +MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { @@ -13475,6 +13621,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VAARG_64: return EmitVAARG64WithCustomInserter(MI, BB); + + case X86::EH_SjLj_SetJmp32: + case X86::EH_SjLj_SetJmp64: + return emitEHSjLjSetJmp(MI, BB); + + case X86::EH_SjLj_LongJmp32: + case X86::EH_SjLj_LongJmp64: + return emitEHSjLjLongJmp(MI, BB); } } @@ -14478,6 +14632,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueC, FalseC); + std::swap(TrueOp, FalseOp); } // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. @@ -14560,6 +14715,46 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, } } } + + // Handle these cases: + // (select (x != c), e, c) -> select (x != c), e, x), + // (select (x == c), c, e) -> select (x == c), x, e) + // where the c is an integer constant, and the "select" is the combination + // of CMOV and CMP. + // + // The rationale for this change is that the conditional-move from a constant + // needs two instructions, however, conditional-move from a register needs + // only one instruction. + // + // CAVEAT: By replacing a constant with a symbolic value, it may obscure + // some instruction-combining opportunities. This opt needs to be + // postponed as late as possible. + // + if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) { + // the DCI.xxxx conditions are provided to postpone the optimization as + // late as possible. + + ConstantSDNode *CmpAgainst = 0; + if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && + (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) && + dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) { + + if (CC == X86::COND_NE && + CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) { + CC = X86::GetOppositeBranchCondition(CC); + std::swap(TrueOp, FalseOp); + } + + if (CC == X86::COND_E && + CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) { + SDValue Ops[] = { FalseOp, Cond.getOperand(0), + DAG.getConstant(CC, MVT::i8), Cond }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops, + array_lengthof(Ops)); + } + } + } + return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1cae7ed2681..40e966ad676 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -217,6 +217,12 @@ namespace llvm { // EH_RETURN - Exception Handling helpers. EH_RETURN, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// TC_RETURN - Tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) @@ -233,6 +239,9 @@ namespace llvm { // VFPEXT - Vector FP extend. VFPEXT, + // VFPROUND - Vector FP round. + VFPROUND, + // VSHL, VSRL - 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -788,6 +797,7 @@ namespace llvm { SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; @@ -806,6 +816,8 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; @@ -818,8 +830,6 @@ namespace llvm { SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; - virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -904,6 +914,12 @@ namespace llvm { MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index f27b6f7f53a..9e6f27988f7 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -165,6 +165,33 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), } +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), + "#EH_SJLJ_SETJMP32", + [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), + "#EH_SJLJ_SETJMP64", + [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in { + def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), + "#EH_SJLJ_LONGJMP32", + [(X86eh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; + def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), + "#EH_SJLJ_LONGJMP64", + [(X86eh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; + } +} + +let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { + def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} + //===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 90354354367..46281efa571 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -93,6 +93,9 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>]>>; +def X86vfpround: SDNode<"X86ISD::VFPROUND", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisFP<1>]>>; def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 5c6084fe008..2f637685b3f 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -216,6 +216,14 @@ def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; +def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2aa4f3f4dbb..cc1291a8a0f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2125,6 +2125,10 @@ let Predicates = [HasAVX] in { (VCVTDQ2PSYrm addr:$src)>; // Match fround and fextend for 128/256-bit conversions + def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), + (VCVTPD2PSXrm addr:$src)>; def : Pat<(v4f32 (fround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), @@ -2139,7 +2143,12 @@ let Predicates = [HasAVX] in { } let Predicates = [UseSSE2] in { - // Match fextend for 128 conversions + // Match fround and fextend for 128 conversions + def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), + (CVTPD2PSrm addr:$src)>; + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), (CVTPS2PDrr VR128:$src)>; } diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 6e4db73c3a9..c44549c30ac 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" #include "InstPrinter/X86ATTInstPrinter.h" @@ -29,6 +28,31 @@ #include "llvm/ADT/SmallString.h" using namespace llvm; +namespace { + +/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. +class X86MCInstLower { + MCContext &Ctx; + Mangler *Mang; + const MachineFunction &MF; + const TargetMachine &TM; + const MCAsmInfo &MAI; + X86AsmPrinter &AsmPrinter; +public: + X86MCInstLower(Mangler *mang, const MachineFunction &MF, + X86AsmPrinter &asmprinter); + + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + +private: + MachineModuleInfoMachO &getMachOMMI() const; +}; + +} // end anonymous namespace + X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf, X86AsmPrinter &asmprinter) : Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()), diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h deleted file mode 100644 index b4d4cfd301a..00000000000 --- a/lib/Target/X86/X86MCInstLower.h +++ /dev/null @@ -1,52 +0,0 @@ -//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_MCINSTLOWER_H -#define X86_MCINSTLOWER_H - -#include "llvm/Support/Compiler.h" - -namespace llvm { - class MCAsmInfo; - class MCContext; - class MCInst; - class MCOperand; - class MCSymbol; - class MachineInstr; - class MachineFunction; - class MachineModuleInfoMachO; - class MachineOperand; - class Mangler; - class TargetMachine; - class X86AsmPrinter; - -/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. -class LLVM_LIBRARY_VISIBILITY X86MCInstLower { - MCContext &Ctx; - Mangler *Mang; - const MachineFunction &MF; - const TargetMachine &TM; - const MCAsmInfo &MAI; - X86AsmPrinter &AsmPrinter; -public: - X86MCInstLower(Mangler *mang, const MachineFunction &MF, - X86AsmPrinter &asmprinter); - - void Lower(const MachineInstr *MI, MCInst &OutMI) const; - - MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; - MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - -private: - MachineModuleInfoMachO &getMachOMMI() const; -}; - -} - -#endif diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c840ea21a89..4bcf6b1f19e 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -261,6 +261,11 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return CSR_64_RegMask; } +const uint32_t* +X86RegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; +} + BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 0287fa22062..7932ede8dd6 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -100,6 +100,7 @@ public: /// callee-save registers on this target. const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const uint32_t *getNoPreservedMask() const; /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses and diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f8cced885d1..655ede79ba3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -48,7 +48,8 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TSInfo(*this), TLInfo(*this), - JITInfo(*this) { + JITInfo(*this), + STTI(&TLInfo) { } void X86_64TargetMachine::anchor() { } @@ -64,7 +65,8 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TSInfo(*this), TLInfo(*this), - JITInfo(*this) { + JITInfo(*this), + STTI(&TLInfo) { } /// X86TargetMachine ctor - Create an X86 target. diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 5301299c1f3..4bad695b4c4 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -25,6 +25,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -85,6 +86,8 @@ class X86_32TargetMachine : public X86TargetMachine { X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; X86JITInfo JITInfo; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -103,6 +106,12 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } }; /// X86_64TargetMachine - X86 64-bit target machine. @@ -114,6 +123,8 @@ class X86_64TargetMachine : public X86TargetMachine { X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; X86JITInfo JITInfo; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -132,6 +143,12 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } }; } // End llvm namespace diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index c71d978ad81..0b7e3e10d4b 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, InstrInfo(), FrameLowering(Subtarget), TLInfo(*this), - TSInfo(*this) { + TSInfo(*this), STTI(&TLInfo) { } namespace { diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index f7fec29f544..c60c6a37f95 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -20,6 +20,7 @@ #include "XCoreISelLowering.h" #include "XCoreSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetTransformImpl.h" #include "llvm/DataLayout.h" namespace llvm { @@ -31,6 +32,8 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreFrameLowering FrameLowering; XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -53,6 +56,12 @@ public: virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 6f6ff9ca2d5..8a0274b5ff7 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -518,8 +518,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. - if (Attributes attrs = PAL.getRetAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + Attributes attrs = PAL.getRetAttributes(); + if (attrs.hasAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + attrs)); // First, determine the new argument list unsigned ArgIndex = 1; @@ -535,7 +537,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); - if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) + Attributes attrs = PAL.getParamAttributes(ArgIndex); + if (attrs.hasAttributes()) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) @@ -588,8 +591,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Add any function attributes. - if (Attributes attrs = PAL.getFnAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + attrs = PAL.getFnAttributes(); + if (attrs.hasAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + attrs)); Type *RetTy = FTy->getReturnType(); @@ -634,8 +639,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, const AttrListPtr &CallPAL = CS.getAttributes(); // Add any return attributes. - if (Attributes attrs = CallPAL.getRetAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + Attributes attrs = CallPAL.getRetAttributes(); + if (attrs.hasAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + attrs)); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. @@ -646,7 +653,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument - if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) + Attributes Attrs = CallPAL.getParamAttributes(ArgIndex); + if (Attrs.hasAttributes()) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } else if (ByValArgsToTransform.count(I)) { @@ -707,13 +715,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); - if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) + Attributes Attrs = CallPAL.getParamAttributes(ArgIndex); + if (Attrs.hasAttributes()) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Add any function attributes. - if (Attributes attrs = CallPAL.getFnAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + attrs = CallPAL.getFnAttributes(); + if (attrs.hasAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + attrs)); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index b107669b177..fc22548db70 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -276,8 +276,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { SmallVector<AttributeWithIndex, 8> AttributesVec; for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); - if (Attributes FnAttrs = PAL.getFnAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + Attributes FnAttrs = PAL.getFnAttributes(); + if (FnAttrs.hasAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + FnAttrs)); PAL = AttrListPtr::get(AttributesVec); } @@ -762,13 +764,17 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) - RAttrs &= ~Attributes::typeIncompatible(NRetTy); + RAttrs = + Attributes::get(NRetTy->getContext(), AttrBuilder(RAttrs). + removeAttributes(Attributes::typeIncompatible(NRetTy))); else - assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0 - && "Return attributes no longer compatible?"); + assert(!AttrBuilder(RAttrs). + hasAttributes(Attributes::typeIncompatible(NRetTy)) && + "Return attributes no longer compatible?"); - if (RAttrs) - AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); + if (RAttrs.hasAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + RAttrs)); // Remember which arguments are still alive. SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false); @@ -785,7 +791,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Get the original parameter attributes (skipping the first one, that is // for the return value. - if (Attributes Attrs = PAL.getParamAttributes(i + 1)) + Attributes Attrs = PAL.getParamAttributes(i + 1); + if (Attrs.hasAttributes()) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); } else { ++NumArgumentsEliminated; @@ -795,7 +802,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. AttrListPtr NewPAL = AttrListPtr::get(AttributesVec); @@ -831,9 +839,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Attributes RAttrs = CallPAL.getRetAttributes(); Attributes FnAttrs = CallPAL.getFnAttributes(); // Adjust in case the function was changed to return void. - RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType()); - if (RAttrs) - AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); + RAttrs = + Attributes::get(NF->getContext(), AttrBuilder(RAttrs). + removeAttributes(Attributes::typeIncompatible(NF->getReturnType()))); + if (RAttrs.hasAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + RAttrs)); // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. @@ -845,19 +856,22 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ArgAlive[i]) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. - if (Attributes Attrs = CallPAL.getParamAttributes(i + 1)) + Attributes Attrs = CallPAL.getParamAttributes(i + 1); + if (Attrs.hasAttributes()) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); - if (Attributes Attrs = CallPAL.getParamAttributes(i + 1)) + Attributes Attrs = CallPAL.getParamAttributes(i + 1); + if (Attrs.hasAttributes()) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 43e12d44441..d8f374c330e 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -212,15 +212,17 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { MadeChange = true; // Clear out any existing attributes. - Attributes::Builder B; + AttrBuilder B; B.addAttribute(Attributes::ReadOnly) .addAttribute(Attributes::ReadNone); - F->removeAttribute(~0, Attributes::get(B)); + F->removeAttribute(AttrListPtr::FunctionIndex, + Attributes::get(F->getContext(), B)); // Add in the new attribute. B.clear(); B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone); - F->addAttribute(~0, Attributes::get(B)); + F->addAttribute(AttrListPtr::FunctionIndex, + Attributes::get(F->getContext(), B)); if (ReadsMemory) ++NumReadOnly; @@ -355,7 +357,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { ArgumentGraph AG; - Attributes::Builder B; + AttrBuilder B; B.addAttribute(Attributes::NoCapture); // Check each function in turn, determining which pointer arguments are not @@ -379,7 +381,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { - A->addAttr(Attributes::get(B)); + A->addAttr(Attributes::get(F->getContext(), B)); ++NumNoCapture; Changed = true; } @@ -394,7 +396,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr(Attributes::get(B)); + A->addAttr(Attributes::get(F->getContext(), B)); ++NumNoCapture; Changed = true; } else { @@ -427,7 +429,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { // eg. "void f(int* x) { if (...) f(x); }" if (ArgumentSCC[0]->Uses.size() == 1 && ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { - ArgumentSCC[0]->Definition->addAttr(Attributes::get(B)); + ArgumentSCC[0]-> + Definition-> + addAttr(Attributes::get(ArgumentSCC[0]->Definition->getContext(), B)); ++NumNoCapture; Changed = true; } @@ -469,7 +473,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - A->addAttr(Attributes::get(B)); + A->addAttr(Attributes::get(A->getContext(), B)); ++NumNoCapture; Changed = true; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index a1b976577a7..678189b3d6c 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2061,28 +2061,26 @@ static void ChangeCalleesToFastCall(Function *F) { } } -static AttrListPtr StripNest(const AttrListPtr &Attrs) { - Attributes::Builder B; - B.addAttribute(Attributes::Nest); - +static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) { for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest)) continue; // There can be only one. - return Attrs.removeAttr(Attrs.getSlot(i).Index, Attributes::get(B)); + return Attrs.removeAttr(C, Attrs.getSlot(i).Index, + Attributes::get(C, Attributes::Nest)); } return Attrs; } static void RemoveNestAttribute(Function *F) { - F->setAttributes(StripNest(F->getAttributes())); + F->setAttributes(StripNest(F->getContext(), F->getAttributes())); for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ if (isa<BlockAddress>(*UI)) continue; CallSite User(cast<Instruction>(*UI)); - User.setAttributes(StripNest(User.getAttributes())); + User.setAttributes(StripNest(F->getContext(), User.getAttributes())); } } diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 6233922db92..86c76f0c0a0 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -1,4 +1,4 @@ -//===-- Scalar.cpp --------------------------------------------------------===// +//===-- IPO.cpp -----------------------------------------------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 3e598abfcf6..fb4ecbfe7b0 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -137,7 +137,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Attributes::Builder NewAttributes; + AttrBuilder NewAttributes; if (!SCCMightUnwind) NewAttributes.addAttribute(Attributes::NoUnwind); @@ -146,7 +146,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { Function *F = (*I)->getFunction(); const AttrListPtr &PAL = F->getAttributes(); - const AttrListPtr &NPAL = PAL.addAttr(~0, Attributes::get(NewAttributes)); + const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0, + Attributes::get(F->getContext(), + NewAttributes)); if (PAL != NPAL) { MadeChange = true; F->setAttributes(NPAL); diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 325bb20fbe8..41017c52879 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -18,6 +18,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" namespace llvm { class CallSite; @@ -74,6 +75,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner DataLayout *TD; TargetLibraryInfo *TLI; bool MadeIRChange; + LibCallSimplifier *Simplifier; public: /// Worklist - All of the instructions that need to be simplified. InstCombineWorklist Worklist; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index f92c4baeba7..5ad6f9111c8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -111,10 +111,13 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // get the TBAA tag describing our copy. if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { if (M->getNumOperands() == 3 && + M->getOperand(0) && isa<ConstantInt>(M->getOperand(0)) && cast<ConstantInt>(M->getOperand(0))->isNullValue() && + M->getOperand(1) && isa<ConstantInt>(M->getOperand(1)) && cast<ConstantInt>(M->getOperand(1))->getValue() == Size && + M->getOperand(2) && isa<MDNode>(M->getOperand(2))) CopyMD = cast<MDNode>(M->getOperand(2)); } @@ -775,39 +778,6 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, return true; } -namespace { -class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls { - InstCombiner *IC; -protected: - void replaceCall(Value *With) { - NewInstruction = IC->ReplaceInstUsesWith(*CI, With); - } - bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { - if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) - return true; - if (ConstantInt *SizeCI = - dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { - if (SizeCI->isAllOnesValue()) - return true; - if (isString) { - uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); - // If the length is 0 we don't know how long it is and so we can't - // remove the check. - if (Len == 0) return false; - return SizeCI->getZExtValue() >= Len; - } - if (ConstantInt *Arg = dyn_cast<ConstantInt>( - CI->getArgOperand(SizeArgOp))) - return SizeCI->getZExtValue() >= Arg->getZExtValue(); - } - return false; - } -public: - InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { } - Instruction *NewInstruction; -}; -} // end anonymous namespace - // Try to fold some different type of calls here. // Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, @@ -815,9 +785,10 @@ public: Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) { if (CI->getCalledFunction() == 0) return 0; - InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD, TLI); - return Simplifier.NewInstruction; + if (Value *With = Simplifier->optimizeCall(CI)) + return ReplaceInstUsesWith(*CI, With); + + return 0; } static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { @@ -1036,7 +1007,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this return value. if (!CallerPAL.isEmpty() && !Caller->use_empty()) { - Attributes::Builder RAttrs = CallerPAL.getRetAttributes(); + AttrBuilder RAttrs = CallerPAL.getRetAttributes(); if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy))) return false; // Attribute not compatible with transformed value. } @@ -1067,7 +1038,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. Attributes Attrs = CallerPAL.getParamAttributes(i + 1); - if (Attrs & Attributes::typeIncompatible(ParamTy)) + if (AttrBuilder(Attrs). + hasAttributes(Attributes::typeIncompatible(ParamTy))) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a @@ -1137,7 +1109,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { attrVec.reserve(NumCommonArgs); // Get any return attributes. - Attributes::Builder RAttrs = CallerPAL.getRetAttributes(); + AttrBuilder RAttrs = CallerPAL.getRetAttributes(); // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. @@ -1145,7 +1117,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Add the new return attributes. if (RAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(0, Attributes::get(RAttrs))); + attrVec.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(FT->getContext(), RAttrs))); AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { @@ -1159,7 +1133,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + Attributes PAttrs = CallerPAL.getParamAttributes(i + 1); + if (PAttrs.hasAttributes()) attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); } @@ -1187,14 +1162,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + Attributes PAttrs = CallerPAL.getParamAttributes(i + 1); + if (PAttrs.hasAttributes()) attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); } } } - if (Attributes FnAttrs = CallerPAL.getFnAttributes()) - attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + Attributes FnAttrs = CallerPAL.getFnAttributes(); + if (FnAttrs.hasAttributes()) + attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. @@ -1302,8 +1280,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // mean appending it. Likewise for attributes. // Add any result attributes. - if (Attributes Attr = Attrs.getRetAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); + Attributes Attr = Attrs.getRetAttributes(); + if (Attr.hasAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attr)); { unsigned Idx = 1; @@ -1323,7 +1303,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original argument and attributes. NewArgs.push_back(*I); - if (Attributes Attr = Attrs.getParamAttributes(Idx)) + Attr = Attrs.getParamAttributes(Idx); + if (Attr.hasAttributes()) NewAttrs.push_back (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); @@ -1332,8 +1313,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, } // Add any function attributes. - if (Attributes Attr = Attrs.getFnAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); + Attr = Attrs.getFnAttributes(); + if (Attr.hasAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attr)); // The trampoline may have been bitcast to a bogus type (FTy). // Handle this by synthesizing a new function type, equal to FTy diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index b59210a9df1..f3f3f8f585d 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1293,15 +1293,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. + unsigned AS = CI.getAddressSpace(); if (TD) { if (CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { + TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreateTrunc(CI.getOperand(0), TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); } if (CI.getOperand(0)->getType()->getScalarSizeInBits() < - TD->getPointerSizeInBits()) { + TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreateZExt(CI.getOperand(0), TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); @@ -1368,13 +1369,14 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. + unsigned AS = CI.getPointerAddressSpace(); if (TD) { - if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { + if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), TD->getIntPtrType(CI.getContext())); return new TruncInst(P, CI.getType()); } - if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { + if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), TD->getIntPtrType(CI.getContext())); return new ZExtInst(P, CI.getType()); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 4d5ffddc4c7..e3e5ddae80b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -365,11 +365,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // order the state machines in complexity of the generated code. Value *Idx = GEP->getOperand(2); + unsigned AS = GEP->getPointerAddressSpace(); // If the index is larger than the pointer size of the target, truncate the // index down like the GEP would do implicitly. We don't have to do this for // an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds() && - Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) + Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS)) Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); // If the comparison is only true for one or two elements, emit direct @@ -528,10 +529,11 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { } } + unsigned AS = cast<GetElementPtrInst>(GEP)->getPointerAddressSpace(); // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); if (Offset == 0) { // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the @@ -1552,7 +1554,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && - TD->getPointerSizeInBits() == + TD->getPointerSizeInBits( + cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) == cast<IntegerType>(DestTy)->getBitWidth()) { Value *RHSOp = 0; if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index edfc060888b..5356fdcba7c 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2130,6 +2130,9 @@ bool InstCombiner::runOnFunction(Function &F) { InstCombineIRInserter(Worklist)); Builder = &TheBuilder; + LibCallSimplifier TheSimplifier(TD, TLI); + Simplifier = &TheSimplifier; + bool EverMadeChange = false; // Lower dbg.declare intrinsics otherwise their value may be clobbered diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 10ab9cb6039..b566994edfc 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -148,38 +148,29 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), cl::Hidden, cl::init(-1)); namespace { - -/// An object of this type is created while instrumenting every function. -struct AsanFunctionContext { - AsanFunctionContext(Function &Function) : F(Function) { } - - Function &F; -}; - /// AddressSanitizer: instrument the code in module to find memory bugs. -struct AddressSanitizer : public ModulePass { +struct AddressSanitizer : public FunctionPass { AddressSanitizer(); virtual const char *getPassName() const; - void instrumentMop(AsanFunctionContext &AFC, Instruction *I); - void instrumentAddress(AsanFunctionContext &AFC, - Instruction *OrigIns, IRBuilder<> &IRB, + void instrumentMop(Instruction *I); + void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex); - bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI); - void instrumentMemIntrinsicParam(AsanFunctionContext &AFC, - Instruction *OrigIns, Value *Addr, + bool instrumentMemIntrinsic(MemIntrinsic *MI); + void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); - bool handleFunction(Module &M, Function &F); + bool runOnFunction(Function &F); void createInitializerPoisonCalls(Module &M, Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); - bool poisonStackInFunction(Module &M, Function &F); - virtual bool runOnModule(Module &M); + bool poisonStackInFunction(Function &F); + virtual bool doInitialization(Module &M); + virtual bool doFinalization(Module &M); bool insertGlobalRedzones(Module &M); static char ID; // Pass identification, replacement for typeid @@ -216,6 +207,8 @@ struct AddressSanitizer : public ModulePass { Type *IntptrPtrTy; Function *AsanCtorFunction; Function *AsanInitFunction; + Function *AsanStackMallocFunc, *AsanStackFreeFunc; + Function *AsanHandleNoReturnFunc; Instruction *CtorInsertBefore; OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). @@ -230,8 +223,8 @@ char AddressSanitizer::ID = 0; INITIALIZE_PASS(AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) -AddressSanitizer::AddressSanitizer() : ModulePass(ID) { } -ModulePass *llvm::createAddressSanitizerPass() { +AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { } +FunctionPass *llvm::createAddressSanitizerPass() { return new AddressSanitizer(); } @@ -295,12 +288,12 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { } void AddressSanitizer::instrumentMemIntrinsicParam( - AsanFunctionContext &AFC, Instruction *OrigIns, + Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { // Check the first byte. { IRBuilder<> IRB(InsertBefore); - instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite); + instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); } // Check the last byte. { @@ -310,13 +303,12 @@ void AddressSanitizer::instrumentMemIntrinsicParam( SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); + instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); } } // Instrument memset/memmove/memcpy -bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, - MemIntrinsic *MI) { +bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { Value *Dst = MI->getDest(); MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI); Value *Src = MemTran ? MemTran->getSource() : 0; @@ -335,9 +327,9 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, InsertBefore = splitBlockAndInsertIfThen(Cmp, false); } - instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true); + instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); if (Src) - instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false); + instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); return true; } @@ -391,7 +383,7 @@ bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) { return DynamicallyInitializedGlobals.count(G); } -void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { +void AddressSanitizer::instrumentMop(Instruction *I) { bool IsWrite = false; Value *Addr = isInterestingMemoryAccess(I, &IsWrite); assert(Addr); @@ -424,7 +416,7 @@ void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { } IRBuilder<> IRB(I); - instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite); + instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); } // Validate the result of Module::getOrInsertFunction called for an interface @@ -469,8 +461,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); } -void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, - Instruction *OrigIns, +void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite) { Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); @@ -494,7 +485,8 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); - BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB); + BasicBlock *CrashBlock = + BasicBlock::Create(*C, "", NextBB->getParent(), NextBB); CrashTerm = new UnreachableInst(*C, CrashBlock); BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); ReplaceInstWithInst(CheckTerm, NewTerm); @@ -734,15 +726,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { } // virtual -bool AddressSanitizer::runOnModule(Module &M) { +bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) return false; BL.reset(new BlackList(ClBlackListFile)); C = &(M.getContext()); - LongSize = TD->getPointerSizeInBits(); + LongSize = TD->getPointerSizeInBits(0); IntptrTy = Type::getIntNTy(*C, LongSize); IntptrPtrTy = PointerType::get(IntptrTy, 0); @@ -771,6 +764,15 @@ bool AddressSanitizer::runOnModule(Module &M) { M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); } } + + AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL)); + AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanStackFreeName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy, NULL)); + AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); + // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), @@ -797,10 +799,6 @@ bool AddressSanitizer::runOnModule(Module &M) { // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. RedzoneSize = std::max(32, (int)(1 << MappingScale)); - bool Res = false; - - if (ClGlobals) - Res |= insertGlobalRedzones(M); if (ClMappingOffsetLog >= 0) { // Tell the run-time the current values of mapping offset and scale. @@ -820,17 +818,20 @@ bool AddressSanitizer::runOnModule(Module &M) { IRB.CreateLoad(asan_mapping_scale, true); } - - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - Res |= handleFunction(M, *F); - } - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); - return Res; + return true; +} + +bool AddressSanitizer::doFinalization(Module &M) { + // We transform the globals at the very end so that the optimization analysis + // works on the original globals. + if (ClGlobals) + return insertGlobalRedzones(M); + return false; } + bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. @@ -847,7 +848,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { return false; } -bool AddressSanitizer::handleFunction(Module &M, Function &F) { +bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; @@ -899,8 +900,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { } } - AsanFunctionContext AFC(F); - // Instrument. int NumInstrumented = 0; for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { @@ -908,24 +907,23 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { if (isInterestingMemoryAccess(Inst, &IsWrite)) - instrumentMop(AFC, Inst); + instrumentMop(Inst); else - instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst)); + instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); } NumInstrumented++; } DEBUG(dbgs() << F); - bool ChangedStack = poisonStackInFunction(M, F); + bool ChangedStack = poisonStackInFunction(F); // We must unpoison the stack before every NoReturn call (throw, _exit, etc). // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37 for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) { Instruction *CI = NoReturnCalls[i]; IRBuilder<> IRB(CI); - IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName, - IRB.getVoidTy(), NULL)); + IRB.CreateCall(AsanHandleNoReturnFunc); } return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty(); @@ -1039,7 +1037,7 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { // compiler hoists the load of the shadow value somewhere too high. // This causes asan to report a non-existing bug on 453.povray. // It sounds like an LLVM bug. -bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { +bool AddressSanitizer::poisonStackInFunction(Function &F) { if (!ClStack) return false; SmallVector<AllocaInst*, 16> AllocaVec; SmallVector<Instruction*, 8> RetVec; @@ -1089,8 +1087,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { Value *LocalStackBase = OrigStackBase; if (DoStackMalloc) { - Value *AsanStackMallocFunc = M.getOrInsertFunction( - kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL); LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc, ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); } @@ -1126,7 +1122,7 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { ConstantInt::get(IntptrTy, LongSize/8)); BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); Value *Description = IRB.CreatePointerCast( - createPrivateGlobalForString(M, StackDescription.str()), + createPrivateGlobalForString(*F.getParent(), StackDescription.str()), IntptrTy); IRB.CreateStore(Description, BasePlus1); @@ -1134,13 +1130,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { Value *ShadowBase = memToShadow(LocalStackBase, IRB); PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true); - Value *AsanStackFreeFunc = NULL; - if (DoStackMalloc) { - AsanStackFreeFunc = M.getOrInsertFunction( - kAsanStackFreeName, IRB.getVoidTy(), - IntptrTy, IntptrTy, IntptrTy, NULL); - } - // Unpoison the stack before all ret instructions. for (size_t i = 0, n = RetVec.size(); i < n; i++) { Instruction *Ret = RetVec[i]; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 4d31444b764..4d8176bc6c7 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -774,8 +774,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes(); - if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr) - .removeAttribute(Attributes::NoAlias).hasAttributes()) + if (AttrBuilder(CalleeRetAttr). + removeAttribute(Attributes::NoAlias) != + AttrBuilder(CallerRetAttr). + removeAttribute(Attributes::NoAlias)) continue; // Make sure the call instruction is followed by an unconditional branch to diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 602e5a4785c..736cc05e043 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -701,6 +701,22 @@ bool DSE::HandleFree(CallInst *F) { return MadeChange; } +namespace { + struct CouldRef { + typedef Value *argument_type; + const CallSite CS; + AliasAnalysis *AA; + + bool operator()(Value *I) { + // See if the call site touches the value. + AliasAnalysis::ModRefResult A = + AA->getModRefInfo(CS, I, getPointerSize(I, *AA)); + + return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref; + } + }; +} + /// handleEndBlock - Remove dead stores to stack-allocated locations in the /// function end block. Ex: /// %A = alloca i32 @@ -802,26 +818,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If the call might load from any of our allocas, then any store above // the call is live. - SmallVector<Value*, 8> LiveAllocas; - for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(), - E = DeadStackObjects.end(); I != E; ++I) { - // See if the call site touches it. - AliasAnalysis::ModRefResult A = - AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); - - if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) - LiveAllocas.push_back(*I); - } + CouldRef Pred = { CS, AA }; + DeadStackObjects.remove_if(Pred); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. - if (DeadStackObjects.size() == LiveAllocas.size()) + if (DeadStackObjects.empty()) break; - for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), - E = LiveAllocas.end(); I != E; ++I) - DeadStackObjects.remove(*I); - continue; } @@ -858,6 +862,20 @@ bool DSE::handleEndBlock(BasicBlock &BB) { return MadeChange; } +namespace { + struct CouldAlias { + typedef Value *argument_type; + const AliasAnalysis::Location &LoadedLoc; + AliasAnalysis *AA; + + bool operator()(Value *I) { + // See if the loaded location could alias the stack location. + AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA)); + return !AA->isNoAlias(StackLoc, LoadedLoc); + } + }; +} + /// RemoveAccessedObjects - Check to see if the specified location may alias any /// of the stack objects in the DeadStackObjects set. If so, they become live /// because the location is being loaded. @@ -876,16 +894,7 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, return; } - SmallVector<Value*, 16> NowLive; - for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(), - E = DeadStackObjects.end(); I != E; ++I) { - // See if the loaded location could alias the stack location. - AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA)); - if (!AA->isNoAlias(StackLoc, LoadedLoc)) - NowLive.push_back(*I); - } - - for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end(); - I != E; ++I) - DeadStackObjects.remove(*I); + // Remove objects that could alias LoadedLoc. + CouldAlias Pred = { LoadedLoc, AA }; + DeadStackObjects.remove_if(Pred); } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 958348d9faa..99a62dbe62f 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -37,7 +37,7 @@ // // TODO: Handle multiple loops at a time. // -// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr +// TODO: Should AddrMode::BaseGV be changed to a ConstantExpr // instead of a GlobalValue? // // TODO: When truncation is free, truncate ICmp users' operands to make it a @@ -67,6 +67,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/DenseSet.h" @@ -74,7 +75,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; @@ -1118,7 +1118,7 @@ public: enum KindType { Basic, ///< A normal use, with no folding. Special, ///< A special case of basic, allowing -1 scales. - Address, ///< An address use; folding according to TargetLowering + Address, ///< An address use; folding according to ScalarTargetTransformInfo. ICmpZero ///< An equality icmp with both operands folded into one. // TODO: Add a generic icmp too? }; @@ -1272,12 +1272,12 @@ void LSRUse::dump() const { /// address-mode folding and special icmp tricks. static bool isLegalUse(const AddrMode &AM, LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { + const ScalarTargetTransformInfo *STTI) { switch (Kind) { case LSRUse::Address: // If we have low-level target information, ask the target if it can // completely fold this address. - if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy); + if (STTI) return STTI->isLegalAddressingMode(AM, AccessTy); // Otherwise, just guess that reg+reg addressing is legal. return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1; @@ -1300,7 +1300,7 @@ static bool isLegalUse(const AddrMode &AM, // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. if (AM.BaseOffs != 0) { - if (!TLI) + if (!STTI) return false; // We have one of: // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset @@ -1309,7 +1309,7 @@ static bool isLegalUse(const AddrMode &AM, int64_t Offs = AM.BaseOffs; if (AM.Scale == 0) Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN. - return TLI->isLegalICmpImmediate(Offs); + return STTI->isLegalICmpImmediate(Offs); } // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg @@ -1330,20 +1330,20 @@ static bool isLegalUse(const AddrMode &AM, static bool isLegalUse(AddrMode AM, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { + const ScalarTargetTransformInfo *LTTI) { // Check for overflow. if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != (MinOffset > 0)) return false; AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset; - if (isLegalUse(AM, Kind, AccessTy, TLI)) { + if (isLegalUse(AM, Kind, AccessTy, LTTI)) { AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset; // Check for overflow. if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) != (MaxOffset > 0)) return false; AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset; - return isLegalUse(AM, Kind, AccessTy, TLI); + return isLegalUse(AM, Kind, AccessTy, LTTI); } return false; } @@ -1352,7 +1352,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs, GlobalValue *BaseGV, bool HasBaseReg, LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { + const ScalarTargetTransformInfo *LTTI) { // Fast-path: zero is always foldable. if (BaseOffs == 0 && !BaseGV) return true; @@ -1371,14 +1371,14 @@ static bool isAlwaysFoldable(int64_t BaseOffs, AM.HasBaseReg = true; } - return isLegalUse(AM, Kind, AccessTy, TLI); + return isLegalUse(AM, Kind, AccessTy, LTTI); } static bool isAlwaysFoldable(const SCEV *S, int64_t MinOffset, int64_t MaxOffset, bool HasBaseReg, LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI, + const ScalarTargetTransformInfo *LTTI, ScalarEvolution &SE) { // Fast-path: zero is always foldable. if (S->isZero()) return true; @@ -1402,7 +1402,7 @@ static bool isAlwaysFoldable(const SCEV *S, AM.HasBaseReg = HasBaseReg; AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); + return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, LTTI); } namespace { @@ -1502,7 +1502,7 @@ class LSRInstance { ScalarEvolution &SE; DominatorTree &DT; LoopInfo &LI; - const TargetLowering *const TLI; + const ScalarTargetTransformInfo *const STTI; Loop *const L; bool Changed; @@ -1638,7 +1638,7 @@ class LSRInstance { Pass *P); public: - LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); + LSRInstance(const ScalarTargetTransformInfo *ltti, Loop *l, Pass *P); bool getChanged() const { return Changed; } @@ -1688,11 +1688,10 @@ void LSRInstance::OptimizeShadowIV() { } if (!DestTy) continue; - if (TLI) { + if (STTI) { // If target does not support DestTy natively then do not apply // this transformation. - EVT DVT = TLI->getValueType(DestTy); - if (!TLI->isTypeLegal(DVT)) continue; + if (!STTI->isTypeLegal(DestTy)) continue; } PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); @@ -2015,18 +2014,18 @@ LSRInstance::OptimizeLoopTermCond() { if (C->getValue().getMinSignedBits() >= 64 || C->getValue().isMinSignedValue()) goto decline_post_inc; - // Without TLI, assume that any stride might be valid, and so any + // Without STTI, assume that any stride might be valid, and so any // use might be shared. - if (!TLI) + if (!STTI) goto decline_post_inc; // Check for possible scaled-address reuse. Type *AccessTy = getAccessType(UI->getUser()); AddrMode AM; AM.Scale = C->getSExtValue(); - if (TLI->isLegalAddressingMode(AM, AccessTy)) + if (STTI->isLegalAddressingMode(AM, AccessTy)) goto decline_post_inc; AM.Scale = -AM.Scale; - if (TLI->isLegalAddressingMode(AM, AccessTy)) + if (STTI->isLegalAddressingMode(AM, AccessTy)) goto decline_post_inc; } } @@ -2097,12 +2096,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg, - Kind, AccessTy, TLI)) + Kind, AccessTy, STTI)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg, - Kind, AccessTy, TLI)) + Kind, AccessTy, STTI)) return false; NewMaxOffset = NewOffset; } @@ -2131,7 +2130,7 @@ LSRInstance::getUse(const SCEV *&Expr, int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) { + if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, STTI)) { Expr = Copy; Offset = 0; } @@ -2396,7 +2395,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr, /// TODO: Consider IVInc free if it's already used in another chains. static bool isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users, - ScalarEvolution &SE, const TargetLowering *TLI) { + ScalarEvolution &SE, const ScalarTargetTransformInfo *STTI) { if (StressIVChain) return true; @@ -2654,7 +2653,7 @@ void LSRInstance::CollectChains() { for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); UsersIdx < NChains; ++UsersIdx) { if (!isProfitableChain(IVChainVec[UsersIdx], - ChainUsersVec[UsersIdx].FarUsers, SE, TLI)) + ChainUsersVec[UsersIdx].FarUsers, SE, STTI)) continue; // Preserve the chain at UsesIdx. if (ChainIdx != UsersIdx) @@ -2681,7 +2680,8 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { /// Return true if the IVInc can be folded into an addressing mode. static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, - Value *Operand, const TargetLowering *TLI) { + Value *Operand, + const ScalarTargetTransformInfo *STTI) { const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr); if (!IncConst || !isAddressUse(UserInst, Operand)) return false; @@ -2691,7 +2691,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false, - LSRUse::Address, getAccessType(UserInst), TLI)) + LSRUse::Address, getAccessType(UserInst), STTI)) return false; return true; @@ -2762,7 +2762,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // If an IV increment can't be folded, use it as the next IV value. if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand, - TLI)) { + STTI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); IVSrc = IVOper; LeftOverExpr = 0; @@ -3108,7 +3108,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // into an immediate field. if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, TLI, SE)) + LU.Kind, LU.AccessTy, STTI, SE)) continue; // Collect all operands except *J. @@ -3122,7 +3122,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, if (InnerAddOps.size() == 1 && isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset, Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, TLI, SE)) + LU.Kind, LU.AccessTy, STTI, SE)) continue; const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); @@ -3132,9 +3132,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add the remaining pieces of the add back into the new formula. const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); - if (TLI && InnerSumSC && + if (STTI && InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue())) { F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); @@ -3144,8 +3144,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add J as its own register, or an unfolded immediate. const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); - if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + if (STTI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue())) F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); @@ -3205,7 +3205,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula F = Base; F.AM.BaseGV = GV; if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + LU.Kind, LU.AccessTy, STTI)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3230,7 +3230,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula F = Base; F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, - LU.Kind, LU.AccessTy, TLI)) { + LU.Kind, LU.AccessTy, STTI)) { // Add the offset to the base register. const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. @@ -3250,7 +3250,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula F = Base; F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm; if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + LU.Kind, LU.AccessTy, STTI)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3297,7 +3297,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, F.AM.BaseOffs = NewBaseOffs; // Check that this scale is legal. - if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) + if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, STTI)) continue; // Compensate for the use having MinOffset built into it. @@ -3353,12 +3353,12 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { Base.AM.HasBaseReg = Base.BaseRegs.size() > 1; // Check whether this scale is going to be legal. if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) { + LU.Kind, LU.AccessTy, STTI)) { // As a special-case, handle special out-of-loop Basic users specially. // TODO: Reconsider this special case. if (LU.Kind == LSRUse::Basic && isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LSRUse::Special, LU.AccessTy, TLI) && + LSRUse::Special, LU.AccessTy, STTI) && LU.AllFixupsOutsideLoop) LU.Kind = LSRUse::Special; else @@ -3391,8 +3391,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { /// GenerateTruncates - Generate reuse formulae from different IV types. void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { - // This requires TargetLowering to tell us which truncates are free. - if (!TLI) return; + // This requires ScalarTargetTransformInfo to tell us which truncates are free. + if (!STTI) return; // Don't bother truncating symbolic values. if (Base.AM.BaseGV) return; @@ -3405,7 +3405,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { Type *SrcTy = *I; - if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { + if (SrcTy != DstTy && STTI->isTruncateFree(SrcTy, DstTy)) { Formula F = Base; if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I); @@ -3561,7 +3561,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Formula NewF = F; NewF.AM.BaseOffs = Offs; if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + LU.Kind, LU.AccessTy, STTI)) continue; NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); @@ -3586,9 +3586,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Formula NewF = F; NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) { - if (!TLI || - !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) + LU.Kind, LU.AccessTy, STTI)) { + if (!STTI || + !STTI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) continue; NewF = F; NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; @@ -3900,7 +3900,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { Formula &F = LUThatHas->Formulae[i]; if (!isLegalUse(F.AM, LUThatHas->MinOffset, LUThatHas->MaxOffset, - LUThatHas->Kind, LUThatHas->AccessTy, TLI)) { + LUThatHas->Kind, LUThatHas->AccessTy, STTI)) { DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); LUThatHas->DeleteFormula(F); @@ -4589,12 +4589,12 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Changed |= DeleteTriviallyDeadInstructions(DeadInsts); } -LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) +LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P) : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()), DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()), - TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { + STTI(stti), L(l), Changed(false), IVIncInsertPos(0) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) @@ -4684,7 +4684,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), JE = LU.Formulae.end(); J != JE; ++J) assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI) && + LU.Kind, LU.AccessTy, STTI) && "Illegal formula generated!"); }; #endif @@ -4757,13 +4757,13 @@ void LSRInstance::dump() const { namespace { class LoopStrengthReduce : public LoopPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. - const TargetLowering *const TLI; + /// ScalarTargetTransformInfo provides target information that is needed + /// for strength reducing loops. + const ScalarTargetTransformInfo *STTI; public: static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = 0); + LoopStrengthReduce(); private: bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -4783,13 +4783,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) - -Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { - return new LoopStrengthReduce(TLI); +Pass *llvm::createLoopStrengthReducePass() { + return new LoopStrengthReduce(); } -LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(ID), TLI(tli) { +LoopStrengthReduce::LoopStrengthReduce() + : LoopPass(ID), STTI(0) { initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); } @@ -4815,8 +4814,13 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { bool Changed = false; + TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + + if (TTI) + STTI = TTI->getScalarTargetTransformInfo(); + // Run the main LSR transformation. - Changed |= LSRInstance(TLI, L, this).getChanged(); + Changed |= LSRInstance(STTI, L, this).getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); @@ -4827,7 +4831,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { Rewriter.setDebugType(DEBUG_TYPE); #endif unsigned numFolded = Rewriter. - replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI); + replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, STTI); if (numFolded) { Changed = true; DeleteTriviallyDeadInstructions(DeadInsts); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 517657cf526..97fff9edd68 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -174,10 +174,11 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { // this width can be stored. If so, check to see whether we will end up // actually reducing the number of stores used. unsigned Bytes = unsigned(End-Start); - unsigned NumPointerStores = Bytes/TD.getPointerSize(); + unsigned AS = cast<StoreInst>(TheStores[0])->getPointerAddressSpace(); + unsigned NumPointerStores = Bytes/TD.getPointerSize(AS); // Assume the remaining bytes if any are done a byte at a time. - unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); + unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(AS); // If we will reduce the # stores (according to this heuristic), do the // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 629f9d2ff57..dfdf50549da 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1788,9 +1788,9 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - Attributes::Builder B; - B.addAttribute(Attributes::NoUnwind); - AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B)); + AttrListPtr Attributes = + AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)); RetainRVCallee = M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, Attributes); @@ -1804,9 +1804,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - Attributes::Builder B; - B.addAttribute(Attributes::NoUnwind); - AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B)); + AttrListPtr Attributes = + AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)); AutoreleaseRVCallee = M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, Attributes); @@ -1818,9 +1818,9 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) { if (!ReleaseCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - Attributes::Builder B; - B.addAttribute(Attributes::NoUnwind); - AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B)); + AttrListPtr Attributes = + AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)); ReleaseCallee = M->getOrInsertFunction( "objc_release", @@ -1834,9 +1834,9 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { if (!RetainCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - Attributes::Builder B; - B.addAttribute(Attributes::NoUnwind); - AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B)); + AttrListPtr Attributes = + AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)); RetainCallee = M->getOrInsertFunction( "objc_retain", @@ -1865,9 +1865,9 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - Attributes::Builder B; - B.addAttribute(Attributes::NoUnwind); - AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B)); + AttrListPtr Attributes = + AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)); AutoreleaseCallee = M->getOrInsertFunction( "objc_autorelease", @@ -3840,13 +3840,10 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { Type *I8XX = PointerType::getUnqual(I8X); Type *Params[] = { I8XX, I8X }; - Attributes::Builder BNoUnwind; - BNoUnwind.addAttribute(Attributes::NoUnwind); - Attributes::Builder BNoCapture; - BNoCapture.addAttribute(Attributes::NoCapture); AttrListPtr Attributes = AttrListPtr() - .addAttr(~0u, Attributes::get(BNoUnwind)) - .addAttr(1, Attributes::get(BNoCapture)); + .addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)) + .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture)); StoreStrongCallee = M->getOrInsertFunction( @@ -3863,9 +3860,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - Attributes::Builder B; - B.addAttribute(Attributes::NoUnwind); - AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B)); + AttrListPtr Attributes = + AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)); RetainAutoreleaseCallee = M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); } @@ -3878,9 +3875,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - Attributes::Builder B; - B.addAttribute(Attributes::NoUnwind); - AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B)); + AttrListPtr Attributes = + AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(C, Attributes::NoUnwind)); RetainAutoreleaseRVCallee = M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, Attributes); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index ca762514929..3e84a91c1db 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -447,6 +447,7 @@ protected: bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) { GEPOffset = Offset; + unsigned int AS = GEPI.getPointerAddressSpace(); for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); GTI != GTE; ++GTI) { ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); @@ -476,7 +477,7 @@ protected: continue; } - APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits()); + APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits(AS)); Index *= APInt(Index.getBitWidth(), TD.getTypeAllocSize(GTI.getIndexedType())); Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset, @@ -1784,7 +1785,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, break; if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) { ElementTy = SeqTy->getElementType(); - Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0))); + Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits( + ElementTy->isPointerTy() ? + cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0))); } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { if (STy->element_begin() == STy->element_end()) break; // Nothing left to descend into. @@ -2004,6 +2007,51 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, return Ptr; } +/// \brief Test whether we can convert a value from the old to the new type. +/// +/// This predicate should be used to guard calls to convertValue in order to +/// ensure that we only try to convert viable values. The strategy is that we +/// will peel off single element struct and array wrappings to get to an +/// underlying value, and convert that value. +static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { + if (OldTy == NewTy) + return true; + if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) + return false; + if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) + return false; + + if (NewTy->isPointerTy() || OldTy->isPointerTy()) { + if (NewTy->isPointerTy() && OldTy->isPointerTy()) + return true; + if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) + return true; + return false; + } + + return true; +} + +/// \brief Generic routine to convert an SSA value to a value of a different +/// type. +/// +/// This will try various different casting techniques, such as bitcasts, +/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test +/// two types for viability with this routine. +static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, + Type *Ty) { + assert(canConvertValue(DL, V->getType(), Ty) && + "Value not convertable to type"); + if (V->getType() == Ty) + return V; + if (V->getType()->isIntegerTy() && Ty->isPointerTy()) + return IRB.CreateIntToPtr(V, Ty); + if (V->getType()->isPointerTy() && Ty->isIntegerTy()) + return IRB.CreatePtrToInt(V, Ty); + + return IRB.CreateBitCast(V, Ty); +} + /// \brief Test whether the given alloca partition can be promoted to a vector. /// /// This is a quick test to check whether we can rewrite a particular alloca @@ -2075,47 +2123,74 @@ static bool isVectorPromotionViable(const DataLayout &TD, return true; } -/// \brief Test whether the given alloca partition can be promoted to an int. +/// \brief Test whether the given alloca partition's integer operations can be +/// widened to promotable ones. /// -/// This is a quick test to check whether we can rewrite a particular alloca -/// partition (and its newly formed alloca) into an integer alloca suitable for -/// promotion to an SSA value. We only can ensure this for a limited set of -/// operations, and we don't want to do the rewrites unless we are confident -/// that the result will be promotable, so we have an early test here. -static bool isIntegerPromotionViable(const DataLayout &TD, - Type *AllocaTy, - uint64_t AllocBeginOffset, - AllocaPartitioning &P, - AllocaPartitioning::const_use_iterator I, - AllocaPartitioning::const_use_iterator E) { - IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy); - if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth()) +/// This is a quick test to check whether we can rewrite the integer loads and +/// stores to a particular alloca into wider loads and stores and be able to +/// promote the resulting alloca. +static bool isIntegerWideningViable(const DataLayout &TD, + Type *AllocaTy, + uint64_t AllocBeginOffset, + AllocaPartitioning &P, + AllocaPartitioning::const_use_iterator I, + AllocaPartitioning::const_use_iterator E) { + uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy); + + // Don't try to handle allocas with bit-padding. + if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) return false; + uint64_t Size = TD.getTypeStoreSize(AllocaTy); + // Check the uses to ensure the uses are (likely) promoteable integer uses. // Also ensure that the alloca has a covering load or store. We don't want - // promote because of some other unsplittable entry (which we may make - // splittable later) and lose the ability to promote each element access. + // to widen the integer operotains only to fail to promote due to some other + // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { if (!I->U) continue; // Skip dead use. + uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; + uint64_t RelEnd = I->EndOffset - AllocBeginOffset; + // We can't reasonably handle cases where the load or store extends past // the end of the aloca's type and into its padding. - if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty)) + if (RelEnd > Size) return false; if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { - if (LI->isVolatile() || !LI->getType()->isIntegerTy()) + if (LI->isVolatile()) return false; - if (LI->getType() == Ty) + if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; + if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { + if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + return false; + continue; + } + // Non-integer loads need to be convertible from the alloca type so that + // they are promotable. + if (RelBegin != 0 || RelEnd != Size || + !canConvertValue(TD, AllocaTy, LI->getType())) + return false; } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { - if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy()) + Type *ValueTy = SI->getValueOperand()->getType(); + if (SI->isVolatile()) return false; - if (SI->getValueOperand()->getType() == Ty) + if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; + if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { + if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + return false; + continue; + } + // Non-integer stores need to be convertible to the alloca type so that + // they are promotable. + if (RelBegin != 0 || RelEnd != Size || + !canConvertValue(TD, ValueTy, AllocaTy)) + return false; } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { if (MI->isVolatile()) return false; @@ -2125,6 +2200,10 @@ static bool isIntegerPromotionViable(const DataLayout &TD, if (!MTO.IsSplittable) return false; } + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; } else { return false; } @@ -2149,6 +2228,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, SROA &Pass; AllocaInst &OldAI, &NewAI; const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; + Type *NewAllocaTy; // If we are rewriting an alloca partition which can be written as pure // vector operations, we stash extra information here. When VecTy is @@ -2164,10 +2244,10 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, uint64_t ElementSize; // This is a convenience and flag variable that will be null unless the new - // alloca has a promotion-targeted integer type due to passing - // isIntegerPromotionViable above. If it is non-null does, the desired + // alloca's integer operations should be widened to this integer type due to + // passing isIntegerWideningViable above. If it is non-null, the desired // integer type will be stored here for easy access during rewriting. - IntegerType *IntPromotionTy; + IntegerType *IntTy; // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; @@ -2186,7 +2266,8 @@ public: OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset), - VecTy(), ElementTy(), ElementSize(), IntPromotionTy(), + NewAllocaTy(NewAI.getAllocatedType()), + VecTy(), ElementTy(), ElementSize(), IntTy(), BeginOffset(), EndOffset() { } @@ -2202,9 +2283,10 @@ public: assert((VecTy->getScalarSizeInBits() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); ElementSize = VecTy->getScalarSizeInBits() / 8; - } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(), - NewAllocaBeginOffset, P, I, E)) { - IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType()); + } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(), + NewAllocaBeginOffset, P, I, E)) { + IntTy = Type::getIntNTy(NewAI.getContext(), + TD.getTypeSizeInBits(NewAI.getAllocatedType())); } bool CanSROA = true; for (; I != E; ++I) { @@ -2223,6 +2305,10 @@ public: ElementTy = 0; ElementSize = 0; } + if (IntTy) { + assert(CanSROA); + IntTy = 0; + } return CanSROA; } @@ -2239,7 +2325,8 @@ private: Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); - APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); + unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace(); + APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset); return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); } @@ -2286,55 +2373,56 @@ private: Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, uint64_t Offset) { - assert(IntPromotionTy && "Alloca is not an integer we can extract from"); + assert(IntTy && "We cannot extract an integer from the alloca"); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); + V = convertValue(TD, IRB, V, IntTy); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; assert(TD.getTypeStoreSize(TargetTy) + RelOffset <= - TD.getTypeStoreSize(IntPromotionTy) && + TD.getTypeStoreSize(IntTy) && "Element load outside of alloca store"); uint64_t ShAmt = 8*RelOffset; if (TD.isBigEndian()) - ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) - + ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(TargetTy) - RelOffset); if (ShAmt) V = IRB.CreateLShr(V, ShAmt, getName(".shift")); - if (TargetTy != IntPromotionTy) { - assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() && - "Cannot extract to a larger integer!"); + assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() && + "Cannot extract to a larger integer!"); + if (TargetTy != IntTy) V = IRB.CreateTrunc(V, TargetTy, getName(".trunc")); - } return V; } StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { IntegerType *Ty = cast<IntegerType>(V->getType()); - if (Ty == IntPromotionTy) - return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - - assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && + assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!"); - V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext")); + if (Ty != IntTy) + V = IRB.CreateZExt(V, IntTy, getName(".ext")); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; assert(TD.getTypeStoreSize(Ty) + RelOffset <= - TD.getTypeStoreSize(IntPromotionTy) && + TD.getTypeStoreSize(IntTy) && "Element store outside of alloca store"); uint64_t ShAmt = 8*RelOffset; if (TD.isBigEndian()) - ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) - TD.getTypeStoreSize(Ty) + ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty) - RelOffset); if (ShAmt) V = IRB.CreateShl(V, ShAmt, getName(".shift")); - APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()).shl(ShAmt); - Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI, - NewAI.getAlignment(), - getName(".oldload")), - Mask, getName(".mask")); - return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")), - &NewAI, NewAI.getAlignment()); + if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { + APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + Old = IRB.CreateAnd(Old, Mask, getName(".mask")); + V = IRB.CreateOr(Old, V, getName(".insert")); + } + V = convertValue(TD, IRB, V, NewAllocaTy); + return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); } void deleteIfTriviallyDead(Value *V) { @@ -2343,15 +2431,6 @@ private: Pass.DeadInsts.push_back(I); } - Value *getValueCast(IRBuilder<> &IRB, Value *V, Type *Ty) { - if (V->getType()->isIntegerTy() && Ty->isPointerTy()) - return IRB.CreateIntToPtr(V, Ty); - if (V->getType()->isPointerTy() && Ty->isIntegerTy()) - return IRB.CreatePtrToInt(V, Ty); - - return IRB.CreateBitCast(V, Ty); - } - bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { Value *Result; if (LI.getType() == VecTy->getElementType() || @@ -2364,7 +2443,7 @@ private: getName(".load")); } if (Result->getType() != LI.getType()) - Result = getValueCast(IRB, Result, LI.getType()); + Result = convertValue(TD, IRB, Result, LI.getType()); LI.replaceAllUsesWith(Result); Pass.DeadInsts.push_back(&LI); @@ -2390,9 +2469,23 @@ private: if (VecTy) return rewriteVectorizedLoadInst(IRB, LI, OldOp); - if (IntPromotionTy) + if (IntTy && LI.getType()->isIntegerTy()) return rewriteIntegerLoad(IRB, LI); + if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, NewAllocaTy, LI.getType())) { + Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), getName(".load")); + Value *NewV = convertValue(TD, IRB, NewLI, LI.getType()); + LI.replaceAllUsesWith(NewV); + Pass.DeadInsts.push_back(&LI); + + DEBUG(dbgs() << " to: " << *NewLI << "\n"); + return !LI.isVolatile(); + } + + assert(!IntTy && "Invalid load found with int-op widening enabled"); + Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); @@ -2409,13 +2502,13 @@ private: if (V->getType() == ElementTy || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { if (V->getType() != ElementTy) - V = getValueCast(IRB, V, ElementTy); + V = convertValue(TD, IRB, V, ElementTy); LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), getName(".insert")); } else if (V->getType() != VecTy) { - V = getValueCast(IRB, V, VecTy); + V = convertValue(TD, IRB, V, VecTy); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.push_back(&SI); @@ -2442,16 +2535,31 @@ private: if (VecTy) return rewriteVectorizedStoreInst(IRB, SI, OldOp); - if (IntPromotionTy) + Type *ValueTy = SI.getValueOperand()->getType(); + if (IntTy && ValueTy->isIntegerTy()) return rewriteIntegerStore(IRB, SI); // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after promoting this alloca. - if (SI.getValueOperand()->getType()->isPointerTy()) + if (ValueTy->isPointerTy()) if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand() ->stripInBoundsOffsets())) Pass.PostPromotionWorklist.insert(AI); + if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, ValueTy, NewAllocaTy)) { + Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy); + StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(), + SI.isVolatile()); + (void)NewSI; + Pass.DeadInsts.push_back(&SI); + + DEBUG(dbgs() << " to: " << *NewSI << "\n"); + return !SI.isVolatile(); + } + + assert(!IntTy && "Invalid store found with int-op widening enabled"); + Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); @@ -2487,10 +2595,11 @@ private: // If this doesn't map cleanly onto the alloca type, and that type isn't // a single value type, just emit a memset. - if (!VecTy && (BeginOffset != NewAllocaBeginOffset || - EndOffset != NewAllocaEndOffset || - !AllocaTy->isSingleValueType() || - !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { + if (!VecTy && !IntTy && + (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaEndOffset || + !AllocaTy->isSingleValueType() || + !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); CallInst *New @@ -2508,32 +2617,24 @@ private: // a sensible representation for the alloca type. This is essentially // splatting the byte to a sufficiently wide integer, bitcasting to the // desired scalar type, and splatting it across any desired vector type. + uint64_t Size = EndOffset - BeginOffset; Value *V = II.getValue(); IntegerType *VTy = cast<IntegerType>(V->getType()); - Type *IntTy = Type::getIntNTy(VTy->getContext(), - TD.getTypeSizeInBits(ScalarTy)); - if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth()) - V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")), + Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); + if (Size*8 > VTy->getBitWidth()) + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), ConstantExpr::getUDiv( - Constant::getAllOnesValue(IntTy), + Constant::getAllOnesValue(SplatIntTy), ConstantExpr::getZExt( Constant::getAllOnesValue(V->getType()), - IntTy)), + SplatIntTy)), getName(".isplat")); - if (V->getType() != ScalarTy) { - if (ScalarTy->isPointerTy()) - V = IRB.CreateIntToPtr(V, ScalarTy); - else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy()) - V = IRB.CreateBitCast(V, ScalarTy); - else if (ScalarTy->isIntegerTy()) - llvm_unreachable("Computed different integer types with equal widths"); - else - llvm_unreachable("Invalid scalar type"); - } // If this is an element-wide memset of a vectorizable alloca, insert it. if (VecTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { + if (V->getType() != ScalarTy) + V = convertValue(TD, IRB, V, ScalarTy); StoreInst *Store = IRB.CreateAlignedStore( IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2546,18 +2647,20 @@ private: return true; } - // Splat to a vector if needed. - if (VectorType *VecTy = dyn_cast<VectorType>(AllocaTy)) { - VectorType *SplatSourceTy = VectorType::get(V->getType(), 1); - V = IRB.CreateShuffleVector( - IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V, - IRB.getInt32(0), getName(".vsplat.insert")), - UndefValue::get(SplatSourceTy), - ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)), - getName(".vsplat.shuffle")); - assert(V->getType() == VecTy); + // If this is a memset on an alloca where we can widen stores, insert the + // set integer. + if (IntTy && (BeginOffset > NewAllocaBeginOffset || + EndOffset < NewAllocaEndOffset)) { + assert(!II.isVolatile()); + StoreInst *Store = insertInteger(IRB, V, BeginOffset); + (void)Store; + DEBUG(dbgs() << " to: " << *Store << "\n"); + return true; } + if (V->getType() != AllocaTy) + V = convertValue(TD, IRB, V, AllocaTy); + Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), II.isVolatile()); (void)New; @@ -2578,8 +2681,10 @@ private: const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(II); + assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!"); + unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace(); // Compute the relative offset within the transfer. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin : MTO.SourceBegin)); @@ -2618,9 +2723,9 @@ private: // If this doesn't map cleanly onto the alloca type, and that type isn't // a single value type, just emit a memcpy. bool EmitMemCpy - = !VecTy && (BeginOffset != NewAllocaBeginOffset || - EndOffset != NewAllocaEndOffset || - !NewAI.getAllocatedType()->isSingleValueType()); + = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaEndOffset || + !NewAI.getAllocatedType()->isSingleValueType()); // If we're just going to emit a memcpy, the alloca hasn't changed, and the // size hasn't been shrunk based on analysis of the viable range, this is @@ -2642,14 +2747,23 @@ private: if (Pass.DeadSplitInsts.insert(&II)) Pass.DeadInsts.push_back(&II); - bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset); + bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset; + bool IsVectorElement = VecTy && !IsWholeAlloca; + uint64_t Size = EndOffset - BeginOffset; + IntegerType *SubIntTy + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() : II.getRawDest()->getType(); - if (!EmitMemCpy) - OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo() - : NewAI.getType(); + if (!EmitMemCpy) { + if (IsVectorElement) + OtherPtrTy = VecTy->getElementType()->getPointerTo(); + else if (IntTy && !IsWholeAlloca) + OtherPtrTy = SubIntTy->getPointerTo(); + else + OtherPtrTy = NewAI.getType(); + } // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. @@ -2696,11 +2810,20 @@ private: IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), getIndex(IRB, BeginOffset), getName(".copyextract")); + } else if (IntTy && !IsWholeAlloca && !IsDest) { + Src = extractInteger(IRB, SubIntTy, BeginOffset); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), getName(".copyload")); } + if (IntTy && !IsWholeAlloca && IsDest) { + StoreInst *Store = insertInteger(IRB, Src, BeginOffset); + (void)Store; + DEBUG(dbgs() << " to: " << *Store << "\n"); + return true; + } + if (IsVectorElement && IsDest) { // We have to insert into a loaded copy before storing. Src = IRB.CreateInsertElement( @@ -2993,6 +3116,36 @@ private: }; } +/// \brief Strip aggregate type wrapping. +/// +/// This removes no-op aggregate types wrapping an underlying type. It will +/// strip as many layers of types as it can without changing either the type +/// size or the allocated size. +static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { + if (Ty->isSingleValueType()) + return Ty; + + uint64_t AllocSize = DL.getTypeAllocSize(Ty); + uint64_t TypeSize = DL.getTypeSizeInBits(Ty); + + Type *InnerTy; + if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { + InnerTy = ArrTy->getElementType(); + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned Index = SL->getElementContainingOffset(0); + InnerTy = STy->getElementType(Index); + } else { + return Ty; + } + + if (AllocSize > DL.getTypeAllocSize(InnerTy) || + TypeSize > DL.getTypeSizeInBits(InnerTy)) + return Ty; + + return stripAggregateTypeWrapping(DL, InnerTy); +} + /// \brief Try to find a partition of the aggregate type passed in for a given /// offset and size. /// @@ -3009,7 +3162,7 @@ private: static Type *getTypePartition(const DataLayout &TD, Type *Ty, uint64_t Offset, uint64_t Size) { if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size) - return Ty; + return stripAggregateTypeWrapping(TD, Ty); if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { // We can't partition pointers... @@ -3038,7 +3191,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, assert(Offset == 0); if (Size == ElementSize) - return ElementTy; + return stripAggregateTypeWrapping(TD, ElementTy); assert(Size > ElementSize); uint64_t NumElements = Size / ElementSize; if (NumElements * ElementSize != Size) @@ -3074,7 +3227,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, assert(Offset == 0); if (Size == ElementSize) - return ElementTy; + return stripAggregateTypeWrapping(TD, ElementTy); StructType::element_iterator EI = STy->element_begin() + Index, EE = STy->element_end(); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 73f53b7cecc..d86c4cbc9f6 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -133,295 +133,7 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// -//===---------------------------------------===// -// 'strcat' Optimizations namespace { -struct StrCatOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "strcat" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - FT->getParamType(1) != FT->getReturnType()) - return 0; - - // Extract some information from the instruction - Value *Dst = CI->getArgOperand(0); - Value *Src = CI->getArgOperand(1); - - // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; - --Len; // Unbias length. - - // Handle the simple, do-nothing case: strcat(x, "") -> x - if (Len == 0) - return Dst; - - // These optimizations require DataLayout. - if (!TD) return 0; - - return EmitStrLenMemCpy(Src, Dst, Len, B); - } - - Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { - // We need to find the end of the destination string. That's where the - // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B, TD, TLI); - if (!DstLen) - return 0; - - // Now that we have the destination's length, we must index into the - // destination's pointer to get the actual memcpy destination (end of - // the string .. we're concatenating). - Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); - - // We have enough information to now generate the memcpy call to do the - // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); - return Dst; - } -}; - -//===---------------------------------------===// -// 'strncat' Optimizations - -struct StrNCatOpt : public StrCatOpt { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "strncat" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || - FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - FT->getParamType(1) != FT->getReturnType() || - !FT->getParamType(2)->isIntegerTy()) - return 0; - - // Extract some information from the instruction - Value *Dst = CI->getArgOperand(0); - Value *Src = CI->getArgOperand(1); - uint64_t Len; - - // We don't do anything if length is not constant - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) - Len = LengthArg->getZExtValue(); - else - return 0; - - // See if we can get the length of the input string. - uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; - --SrcLen; // Unbias length. - - // Handle the simple, do-nothing cases: - // strncat(x, "", c) -> x - // strncat(x, c, 0) -> x - if (SrcLen == 0 || Len == 0) return Dst; - - // These optimizations require DataLayout. - if (!TD) return 0; - - // We don't optimize this case - if (Len < SrcLen) return 0; - - // strncat(x, s, c) -> strcat(x, s) - // s is constant so the strcat can be optimized further - return EmitStrLenMemCpy(Src, Dst, SrcLen, B); - } -}; - -//===---------------------------------------===// -// 'strchr' Optimizations - -struct StrChrOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "strchr" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - !FT->getParamType(1)->isIntegerTy(32)) - return 0; - - Value *SrcStr = CI->getArgOperand(0); - - // If the second operand is non-constant, see if we can compute the length - // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (CharC == 0) { - // These optimizations require DataLayout. - if (!TD) return 0; - - uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. - return 0; - - return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(TD->getIntPtrType(*Context), Len), - B, TD, TLI); - } - - // Otherwise, the character is a constant, see if the first argument is - // a string literal. If so, we can constant fold. - StringRef Str; - if (!getConstantStringInfo(SrcStr, Str)) - return 0; - - // Compute the offset, make sure to handle the case when we're searching for - // zero (a weird way to spell strlen). - size_t I = CharC->getSExtValue() == 0 ? - Str.size() : Str.find(CharC->getSExtValue()); - if (I == StringRef::npos) // Didn't find the char. strchr returns null. - return Constant::getNullValue(CI->getType()); - - // strchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); - } -}; - -//===---------------------------------------===// -// 'strrchr' Optimizations - -struct StrRChrOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "strrchr" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - !FT->getParamType(1)->isIntegerTy(32)) - return 0; - - Value *SrcStr = CI->getArgOperand(0); - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - - // Cannot fold anything if we're not looking for a constant. - if (!CharC) - return 0; - - StringRef Str; - if (!getConstantStringInfo(SrcStr, Str)) { - // strrchr(s, 0) -> strchr(s, 0) - if (TD && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, TD, TLI); - return 0; - } - - // Compute the offset. - size_t I = CharC->getSExtValue() == 0 ? - Str.size() : Str.rfind(CharC->getSExtValue()); - if (I == StringRef::npos) // Didn't find the char. Return null. - return Constant::getNullValue(CI->getType()); - - // strrchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); - } -}; - -//===---------------------------------------===// -// 'strcmp' Optimizations - -struct StrCmpOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "strcmp" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - !FT->getReturnType()->isIntegerTy(32) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy()) - return 0; - - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); - if (Str1P == Str2P) // strcmp(x,x) -> 0 - return ConstantInt::get(CI->getType(), 0); - - StringRef Str1, Str2; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - bool HasStr2 = getConstantStringInfo(Str2P, Str2); - - // strcmp(x, y) -> cnst (if both x and y are constant strings) - if (HasStr1 && HasStr2) - return ConstantInt::get(CI->getType(), Str1.compare(Str2)); - - if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x - return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), - CI->getType())); - - if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - - // strcmp(P, "x") -> memcmp(P, "x", 2) - uint64_t Len1 = GetStringLength(Str1P); - uint64_t Len2 = GetStringLength(Str2P); - if (Len1 && Len2) { - // These optimizations require DataLayout. - if (!TD) return 0; - - return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B, TD, TLI); - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'strncmp' Optimizations - -struct StrNCmpOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "strncmp" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || - !FT->getReturnType()->isIntegerTy(32) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy() || - !FT->getParamType(2)->isIntegerTy()) - return 0; - - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); - if (Str1P == Str2P) // strncmp(x,x,n) -> 0 - return ConstantInt::get(CI->getType(), 0); - - // Get the length argument if it is constant. - uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) - Length = LengthArg->getZExtValue(); - else - return 0; - - if (Length == 0) // strncmp(x,y,0) -> 0 - return ConstantInt::get(CI->getType(), 0); - - if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); - - StringRef Str1, Str2; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - bool HasStr2 = getConstantStringInfo(Str2P, Str2); - - // strncmp(x, y) -> cnst (if both x and y are constant strings) - if (HasStr1 && HasStr2) { - StringRef SubStr1 = Str1.substr(0, Length); - StringRef SubStr2 = Str2.substr(0, Length); - return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); - } - - if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x - return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), - CI->getType())); - - if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - - return 0; - } -}; - - //===---------------------------------------===// // 'strcpy' Optimizations @@ -636,9 +348,8 @@ struct StrToOpt : public LibCallOptimization { if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - Attributes::Builder B; - B.addAttribute(Attributes::NoCapture); - CI->addAttribute(1, Attributes::get(B)); + CI->addAttribute(1, Attributes::get(Callee->getContext(), + Attributes::NoCapture)); } return 0; @@ -1564,8 +1275,6 @@ namespace { StringMap<LibCallOptimization*> Optimizations; // String and Memory LibCall Optimizations - StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr; - StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; StpCpyOpt StpCpy; StpCpyOpt StpCpyChk; StrNCpyOpt StrNCpy; @@ -1639,12 +1348,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, /// we know. void SimplifyLibCalls::InitOptimizations() { // String and Memory LibCall Optimizations - Optimizations["strcat"] = &StrCat; - Optimizations["strncat"] = &StrNCat; - Optimizations["strchr"] = &StrChr; - Optimizations["strrchr"] = &StrRChr; - Optimizations["strcmp"] = &StrCmp; - Optimizations["strncmp"] = &StrNCmp; Optimizations["strcpy"] = &StrCpy; Optimizations["strncpy"] = &StrNCpy; Optimizations["stpcpy"] = &StpCpy; diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 26240d4dfe4..fa2faa2dad8 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -41,9 +41,10 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2)); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), @@ -67,9 +68,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2)); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), @@ -95,7 +97,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; AttributeWithIndex AWI = - AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2)); + AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); @@ -117,10 +120,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2)); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), @@ -147,8 +151,9 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), I8Ptr, I8Ptr, I8Ptr, NULL); @@ -169,8 +174,9 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), I8Ptr, I8Ptr, I8Ptr, @@ -193,7 +199,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(~0u, Attributes::NoUnwind); + AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", AttrListPtr::get(AWI), @@ -221,7 +228,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2)); + AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), B.getInt8PtrTy(), @@ -246,10 +254,11 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; - AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2)); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), @@ -325,8 +334,9 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI), B.getInt32Ty(), @@ -347,8 +357,9 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI), @@ -378,9 +389,10 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) @@ -409,9 +421,10 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture); - AWI[1] = AttributeWithIndex::get(4, Attributes::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index c3f72b13afc..620209bccbc 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_library(LLVMTransformUtils SimplifyCFG.cpp SimplifyIndVar.cpp SimplifyInstructions.cpp + SimplifyLibCalls.cpp UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index e2932501f31..7ba9f6d9d25 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -98,10 +98,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(0, OldFunc->getAttributes() + .addAttr(NewFunc->getContext(), + AttrListPtr::ReturnIndex, + OldFunc->getAttributes() .getRetAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(~0, OldFunc->getAttributes() + .addAttr(NewFunc->getContext(), + AttrListPtr::FunctionIndex, + OldFunc->getAttributes() .getFnAttributes())); } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a954d82c05b..9729687a83e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -806,7 +806,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout *TD) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + unsigned AS = cast<PointerType>(V->getType())->getAddressSpace(); + unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(V, KnownZero, KnownOne, TD); unsigned TrailZ = KnownZero.countTrailingOnes(); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 930555424de..f35cbbdde5e 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -45,10 +45,10 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" #include <csetjmp> #include <set> using namespace llvm; @@ -70,15 +70,14 @@ namespace { Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; bool useExpensiveEHSupport; - // We peek in TLI to grab the target's jmp_buf size and alignment - const TargetLowering *TLI; + // We peek in STTI to grab the target's jmp_buf size and alignment + const ScalarTargetTransformInfo *STTI; public: static char ID; // Pass identification, replacement for typeid - explicit LowerInvoke(const TargetLowering *tli = NULL, - bool useExpensiveEHSupport = ExpensiveEHSupport) + explicit LowerInvoke(bool useExpensiveEHSupport = ExpensiveEHSupport) : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), - TLI(tli) { + STTI(0) { initializeLowerInvokePass(*PassRegistry::getPassRegistry()); } bool doInitialization(Module &M); @@ -108,21 +107,24 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke", char &llvm::LowerInvokePassID = LowerInvoke::ID; // Public Interface To the LowerInvoke pass. -FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { - return new LowerInvoke(TLI, ExpensiveEHSupport); +FunctionPass *llvm::createLowerInvokePass() { + return new LowerInvoke(ExpensiveEHSupport); } -FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, - bool useExpensiveEHSupport) { - return new LowerInvoke(TLI, useExpensiveEHSupport); +FunctionPass *llvm::createLowerInvokePass(bool useExpensiveEHSupport) { + return new LowerInvoke(useExpensiveEHSupport); } // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { + TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + if (TTI) + STTI = TTI->getScalarTargetTransformInfo(); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. - unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; + unsigned JBSize = STTI ? STTI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); @@ -430,7 +432,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an // alloca because the value needs to be live across invokes. - unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; + unsigned Align = STTI ? STTI->getJumpBufAlignment() : 0; AllocaInst *JmpBuf = new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); @@ -575,6 +577,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { } bool LowerInvoke::runOnFunction(Function &F) { + TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + if (TTI) + STTI = TTI->getScalarTargetTransformInfo(); + if (useExpensiveEHSupport) return insertExpensiveEHSupport(F); else diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index af8d1128523..a008da67e92 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -76,6 +76,8 @@ namespace { // Comparing pointers is ok as we only rely on the order for uniquing. return Value < RHS.Value; } + + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } }; class SimplifyCFGOpt { @@ -564,11 +566,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, /// in the list that match the specified block. static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - for (unsigned i = 0, e = Cases.size(); i != e; ++i) - if (Cases[i].Dest == BB) { - Cases.erase(Cases.begin()+i); - --i; --e; - } + Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); } /// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as @@ -695,7 +693,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, SI->removeCase(i); } } - if (HasWeight) + if (HasWeight && Weights.size() >= 2) SI->setMetadata(LLVMContext::MD_prof, MDBuilder(SI->getParent()->getContext()). createBranchWeights(Weights)); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp new file mode 100644 index 00000000000..bd28ec35273 --- /dev/null +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -0,0 +1,579 @@ +//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/DataLayout.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" + +using namespace llvm; + +/// This class is the abstract base class for the set of optimizations that +/// corresponds to one library call. +namespace { +class LibCallOptimization { +protected: + Function *Caller; + const DataLayout *TD; + const TargetLibraryInfo *TLI; + LLVMContext* Context; +public: + LibCallOptimization() { } + virtual ~LibCallOptimization() {} + + /// callOptimizer - This pure virtual method is implemented by base classes to + /// do various optimizations. If this returns null then no transformation was + /// performed. If it returns CI, then it transformed the call and CI is to be + /// deleted. If it returns something else, replace CI with the new value and + /// delete CI. + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + =0; + + Value *optimizeCall(CallInst *CI, const DataLayout *TD, + const TargetLibraryInfo *TLI, IRBuilder<> &B) { + Caller = CI->getParent()->getParent(); + this->TD = TD; + this->TLI = TLI; + if (CI->getCalledFunction()) + Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + + return callOptimizer(CI->getCalledFunction(), CI, B); + } +}; + +//===----------------------------------------------------------------------===// +// Fortified Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct FortifiedLibCallOptimization : public LibCallOptimization { +protected: + virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, + bool isString) const = 0; +}; + +struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization { + CallInst *CI; + + bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) + return true; + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { + if (SizeCI->isAllOnesValue()) + return true; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) + return SizeCI->getZExtValue() >= Arg->getZExtValue(); + } + return false; + } +}; + +struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemSetChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), + false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(Context)) + return 0; + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our + // st[rp]cpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, + TLI, Name.substr(2, 6)); + return Ret; + } + return 0; + } +}; + +struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + !FT->getParamType(2)->isIntegerTy() || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + return Ret; + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// +// String and Memory Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct StrCatOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + return emitStrLenMemCpy(Src, Dst, Len, B); + } + + Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, + IRBuilder<> &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; + } +}; + +struct StrNCatOpt : public StrCatOpt { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + uint64_t Len; + + // We don't do anything if length is not constant + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Len = LengthArg->getZExtValue(); + else + return 0; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; // Unbias length. + + // Handle the simple, do-nothing cases: + // strncat(x, "", c) -> x + // strncat(x, c, 0) -> x + if (SrcLen == 0 || Len == 0) return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // We don't optimize this case + if (Len < SrcLen) return 0; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further + return emitStrLenMemCpy(Src, Dst, SrcLen, B); + } +}; + +struct StrChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (CharC == 0) { + // These optimizations require DataLayout. + if (!TD) return 0; + + uint64_t Len = GetStringLength(SrcStr); + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. + return 0; + + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(TD->getIntPtrType(*Context), Len), + B, TD, TLI); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) + return 0; + + // Compute the offset, make sure to handle the case when we're searching for + // zero (a weird way to spell strlen). + size_t I = CharC->getSExtValue() == 0 ? + Str.size() : Str.find(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + + // strchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + } +}; + +struct StrRChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strrchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + + // Cannot fold anything if we're not looking for a constant. + if (!CharC) + return 0; + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (TD && CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TD, TLI); + return 0; + } + + // Compute the offset. + size_t I = CharC->getSExtValue() == 0 ? + Str.size() : Str.rfind(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. Return null. + return Constant::getNullValue(CI->getType()); + + // strrchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + } +}; + +struct StrCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), Str1.compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + uint64_t Len2 = GetStringLength(Str2P); + if (Len1 && Len2) { + // These optimizations require DataLayout. + if (!TD) return 0; + + return EmitMemCmp(Str1P, Str2P, + ConstantInt::get(TD->getIntPtrType(*Context), + std::min(Len1, Len2)), B, TD, TLI); + } + + return 0; + } +}; + +struct StrNCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Length = LengthArg->getZExtValue(); + else + return 0; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = Str1.substr(0, Length); + StringRef SubStr2 = Str2.substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + return 0; + } +}; + +} // End anonymous namespace. + +namespace llvm { + +class LibCallSimplifierImpl { + const DataLayout *TD; + const TargetLibraryInfo *TLI; + StringMap<LibCallOptimization*> Optimizations; + + // Fortified library call optimizations. + MemCpyChkOpt MemCpyChk; + MemMoveChkOpt MemMoveChk; + MemSetChkOpt MemSetChk; + StrCpyChkOpt StrCpyChk; + StrNCpyChkOpt StrNCpyChk; + + // String and memory library call optimizations. + StrCatOpt StrCat; + StrNCatOpt StrNCat; + StrChrOpt StrChr; + StrRChrOpt StrRChr; + StrCmpOpt StrCmp; + StrNCmpOpt StrNCmp; + + void initOptimizations(); +public: + LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI) { + this->TD = TD; + this->TLI = TLI; + } + + Value *optimizeCall(CallInst *CI); +}; + +void LibCallSimplifierImpl::initOptimizations() { + // Fortified library call optimizations. + Optimizations["__memcpy_chk"] = &MemCpyChk; + Optimizations["__memmove_chk"] = &MemMoveChk; + Optimizations["__memset_chk"] = &MemSetChk; + Optimizations["__strcpy_chk"] = &StrCpyChk; + Optimizations["__stpcpy_chk"] = &StrCpyChk; + Optimizations["__strncpy_chk"] = &StrNCpyChk; + Optimizations["__stpncpy_chk"] = &StrNCpyChk; + Optimizations["strcmp"] = &StrCmp; + Optimizations["strncmp"] = &StrNCmp; + + // String and memory library call optimizations. + Optimizations["strcat"] = &StrCat; + Optimizations["strncat"] = &StrNCat; + Optimizations["strchr"] = &StrChr; + Optimizations["strrchr"] = &StrRChr; +} + +Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { + if (Optimizations.empty()) + initOptimizations(); + + Function *Callee = CI->getCalledFunction(); + LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + if (LCO) { + IRBuilder<> Builder(CI); + return LCO->optimizeCall(CI, TD, TLI, Builder); + } + return 0; +} + +LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI) { + Impl = new LibCallSimplifierImpl(TD, TLI); +} + +LibCallSimplifier::~LibCallSimplifier() { + delete Impl; +} + +Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + return Impl->optimizeCall(CI); +} + +} diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index 0625ef3f09c..4f55fb203bd 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -7,11 +7,13 @@ // //===----------------------------------------------------------------------===// // -// This file implements the AttributesList class and Attribute utilities. +// This file implements the Attributes, AttributeImpl, AttrBuilder, +// AttributeListImpl, and AttrListPtr classes. // //===----------------------------------------------------------------------===// #include "llvm/Attributes.h" +#include "AttributesImpl.h" #include "LLVMContextImpl.h" #include "llvm/Type.h" #include "llvm/ADT/StringExtras.h" @@ -27,27 +29,23 @@ using namespace llvm; // Attributes Implementation //===----------------------------------------------------------------------===// -Attributes::Attributes(uint64_t Val) : Attrs(Val) {} - -Attributes::Attributes(AttributesImpl *A) : Attrs(A->Bits) {} - -Attributes::Attributes(const Attributes &A) : Attrs(A.Attrs) {} - -// FIXME: This is temporary until we have implemented the uniquified version of -// AttributesImpl. -Attributes Attributes::get(Attributes::Builder &B) { - return Attributes(B.Bits); +Attributes Attributes::get(LLVMContext &Context, ArrayRef<AttrVal> Vals) { + AttrBuilder B; + for (ArrayRef<AttrVal>::iterator I = Vals.begin(), E = Vals.end(); + I != E; ++I) + B.addAttribute(*I); + return Attributes::get(Context, B); } -Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) { +Attributes Attributes::get(LLVMContext &Context, AttrBuilder &B) { // If there are no attributes, return an empty Attributes class. - if (B.Bits == 0) + if (!B.hasAttributes()) return Attributes(); // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - ID.AddInteger(B.Bits); + ID.AddInteger(B.Raw()); void *InsertPoint; AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); @@ -55,7 +53,7 @@ Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new AttributesImpl(B.Bits); + PA = new AttributesImpl(B.Raw()); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -64,18 +62,22 @@ Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) { } bool Attributes::hasAttribute(AttrVal Val) const { - return Attrs.hasAttribute(Val); + return Attrs && Attrs->hasAttribute(Val); +} + +bool Attributes::hasAttributes() const { + return Attrs && Attrs->hasAttributes(); } bool Attributes::hasAttributes(const Attributes &A) const { - return Attrs.hasAttributes(A); + return Attrs && Attrs->hasAttributes(A); } /// This returns the alignment field of an attribute as a byte alignment value. unsigned Attributes::getAlignment() const { if (!hasAttribute(Attributes::Alignment)) return 0; - return 1U << ((Attrs.getAlignment() >> 16) - 1); + return 1U << ((Attrs->getAlignment() >> 16) - 1); } /// This returns the stack alignment field of an attribute as a byte alignment @@ -83,46 +85,21 @@ unsigned Attributes::getAlignment() const { unsigned Attributes::getStackAlignment() const { if (!hasAttribute(Attributes::StackAlignment)) return 0; - return 1U << ((Attrs.getStackAlignment() >> 26) - 1); -} - -bool Attributes::isEmptyOrSingleton() const { - return Attrs.isEmptyOrSingleton(); -} - -Attributes Attributes::operator | (const Attributes &A) const { - return Attributes(Raw() | A.Raw()); -} -Attributes Attributes::operator & (const Attributes &A) const { - return Attributes(Raw() & A.Raw()); -} -Attributes Attributes::operator ^ (const Attributes &A) const { - return Attributes(Raw() ^ A.Raw()); -} -Attributes &Attributes::operator |= (const Attributes &A) { - Attrs.Bits |= A.Raw(); - return *this; -} -Attributes &Attributes::operator &= (const Attributes &A) { - Attrs.Bits &= A.Raw(); - return *this; -} -Attributes Attributes::operator ~ () const { - return Attributes(~Raw()); + return 1U << ((Attrs->getStackAlignment() >> 26) - 1); } uint64_t Attributes::Raw() const { - return Attrs.Bits; + return Attrs ? Attrs->Raw() : 0; } Attributes Attributes::typeIncompatible(Type *Ty) { - Attributes::Builder Incompatible; - + AttrBuilder Incompatible; + if (!Ty->isIntegerTy()) // Attributes that only apply to integers. Incompatible.addAttribute(Attributes::SExt) .addAttribute(Attributes::ZExt); - + if (!Ty->isPointerTy()) // Attributes that only apply to pointers. Incompatible.addAttribute(Attributes::ByVal) @@ -130,8 +107,46 @@ Attributes Attributes::typeIncompatible(Type *Ty) { .addAttribute(Attributes::NoAlias) .addAttribute(Attributes::NoCapture) .addAttribute(Attributes::StructRet); - - return Attributes(Incompatible.Bits); // FIXME: Use Attributes::get(). + + return Attributes::get(Ty->getContext(), Incompatible); +} + +/// encodeLLVMAttributesForBitcode - This returns an integer containing an +/// encoding of all the LLVM attributes found in the given attribute bitset. +/// Any change to this encoding is a breaking change to bitcode compatibility. +uint64_t Attributes::encodeLLVMAttributesForBitcode(Attributes Attrs) { + // FIXME: It doesn't make sense to store the alignment information as an + // expanded out value, we should store it as a log2 value. However, we can't + // just change that here without breaking bitcode compatibility. If this ever + // becomes a problem in practice, we should introduce new tag numbers in the + // bitcode file and have those tags use a more efficiently encoded alignment + // field. + + // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit + // log2 encoded value. Shift the bits above the alignment up by 11 bits. + uint64_t EncodedAttrs = Attrs.Raw() & 0xffff; + if (Attrs.hasAttribute(Attributes::Alignment)) + EncodedAttrs |= Attrs.getAlignment() << 16; + EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11; + return EncodedAttrs; +} + +/// decodeLLVMAttributesForBitcode - This returns an attribute bitset containing +/// the LLVM attributes that have been decoded from the given integer. This +/// function must stay in sync with 'encodeLLVMAttributesForBitcode'. +Attributes Attributes::decodeLLVMAttributesForBitcode(LLVMContext &C, + uint64_t EncodedAttrs) { + // The alignment is stored as a 16-bit raw value from bits 31--16. We shift + // the bits above 31 down by 11 bits. + unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; + assert((!Alignment || isPowerOf2_32(Alignment)) && + "Alignment must be a power of two."); + + AttrBuilder B(EncodedAttrs & 0xffff); + if (Alignment) + B.addAlignmentAttr(Alignment); + B.addRawValue((EncodedAttrs & (0xfffULL << 32)) >> 11); + return Attributes::get(C, B); } std::string Attributes::getAsString() const { @@ -203,61 +218,72 @@ std::string Attributes::getAsString() const { } //===----------------------------------------------------------------------===// -// Attributes::Builder Implementation +// AttrBuilder Implementation //===----------------------------------------------------------------------===// -Attributes::Builder &Attributes::Builder:: -addAttribute(Attributes::AttrVal Val) { +AttrBuilder &AttrBuilder::addAttribute(Attributes::AttrVal Val){ Bits |= AttributesImpl::getAttrMask(Val); return *this; } -void Attributes::Builder::addAlignmentAttr(unsigned Align) { - if (Align == 0) return; +AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { + Bits |= Val; + return *this; +} + +AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { + if (Align == 0) return *this; assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x40000000 && "Alignment too large."); Bits |= (Log2_32(Align) + 1) << 16; + return *this; } -void Attributes::Builder::addStackAlignmentAttr(unsigned Align) { +AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align){ // Default alignment, allow the target to define how to align it. - if (Align == 0) return; + if (Align == 0) return *this; assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x100 && "Alignment too large."); Bits |= (Log2_32(Align) + 1) << 26; + return *this; } -Attributes::Builder &Attributes::Builder:: -removeAttribute(Attributes::AttrVal Val) { +AttrBuilder &AttrBuilder::removeAttribute(Attributes::AttrVal Val) { Bits &= ~AttributesImpl::getAttrMask(Val); return *this; } -void Attributes::Builder::removeAttributes(const Attributes &A) { +AttrBuilder &AttrBuilder::addAttributes(const Attributes &A) { + Bits |= A.Raw(); + return *this; +} + +AttrBuilder &AttrBuilder::removeAttributes(const Attributes &A){ Bits &= ~A.Raw(); + return *this; } -bool Attributes::Builder::hasAttribute(Attributes::AttrVal A) const { +bool AttrBuilder::hasAttribute(Attributes::AttrVal A) const { return Bits & AttributesImpl::getAttrMask(A); } -bool Attributes::Builder::hasAttributes() const { +bool AttrBuilder::hasAttributes() const { return Bits != 0; } -bool Attributes::Builder::hasAttributes(const Attributes &A) const { +bool AttrBuilder::hasAttributes(const Attributes &A) const { return Bits & A.Raw(); } -bool Attributes::Builder::hasAlignmentAttr() const { +bool AttrBuilder::hasAlignmentAttr() const { return Bits & AttributesImpl::getAttrMask(Attributes::Alignment); } -uint64_t Attributes::Builder::getAlignment() const { +uint64_t AttrBuilder::getAlignment() const { if (!hasAlignmentAttr()) return 0; return 1U << (((Bits & AttributesImpl::getAttrMask(Attributes::Alignment)) >> 16) - 1); } -uint64_t Attributes::Builder::getStackAlignment() const { +uint64_t AttrBuilder::getStackAlignment() const { if (!hasAlignmentAttr()) return 0; return 1U << @@ -322,10 +348,6 @@ uint64_t AttributesImpl::getStackAlignment() const { return Bits & getAttrMask(Attributes::StackAlignment); } -bool AttributesImpl::isEmptyOrSingleton() const { - return (Bits & (Bits - 1)) == 0; -} - //===----------------------------------------------------------------------===// // AttributeListImpl Definition //===----------------------------------------------------------------------===// @@ -341,19 +363,19 @@ static ManagedStatic<sys::SmartMutex<true> > ALMutex; class AttributeListImpl : public FoldingSetNode { sys::cas_flag RefCount; - + // AttributesList is uniqued, these should not be publicly available. void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION; AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION; ~AttributeListImpl(); // Private implementation public: SmallVector<AttributeWithIndex, 4> Attrs; - + AttributeListImpl(ArrayRef<AttributeWithIndex> attrs) : Attrs(attrs.begin(), attrs.end()) { RefCount = 0; } - + void AddRef() { sys::SmartScopedLock<true> Lock(*ALMutex); ++RefCount; @@ -366,7 +388,7 @@ public: if (new_val == 0) delete this; } - + void Profile(FoldingSetNodeID &ID) const { Profile(ID, Attrs); } @@ -377,50 +399,49 @@ public: } } }; -} + +} // end llvm namespace AttributeListImpl::~AttributeListImpl() { // NOTE: Lock must be acquired by caller. AttributesLists->RemoveNode(this); } - AttrListPtr AttrListPtr::get(ArrayRef<AttributeWithIndex> Attrs) { // If there are no attributes then return a null AttributesList pointer. if (Attrs.empty()) return AttrListPtr(); - + #ifndef NDEBUG for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - assert(Attrs[i].Attrs.hasAttributes() && + assert(Attrs[i].Attrs.hasAttributes() && "Pointless attribute!"); assert((!i || Attrs[i-1].Index < Attrs[i].Index) && "Misordered AttributesList!"); } #endif - + // Otherwise, build a key to look up the existing attributes. FoldingSetNodeID ID; AttributeListImpl::Profile(ID, Attrs); void *InsertPos; - + sys::SmartScopedLock<true> Lock(*ALMutex); - + AttributeListImpl *PAL = AttributesLists->FindNodeOrInsertPos(ID, InsertPos); - + // If we didn't find any existing attributes of the same shape then // create a new one and insert it. if (!PAL) { PAL = new AttributeListImpl(Attrs); AttributesLists->InsertNode(PAL, InsertPos); } - + // Return the AttributesList that we found or created. return AttrListPtr(PAL); } - //===----------------------------------------------------------------------===// // AttrListPtr Method Implementations //===----------------------------------------------------------------------===// @@ -430,7 +451,7 @@ AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) { } AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) { - if (AttrList) AttrList->AddRef(); + if (AttrList) AttrList->AddRef(); } const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) { @@ -446,7 +467,7 @@ AttrListPtr::~AttrListPtr() { if (AttrList) AttrList->DropRef(); } -/// getNumSlots - Return the number of slots used in this attribute list. +/// getNumSlots - Return the number of slots used in this attribute list. /// This is the number of arguments that have an attribute set on them /// (including the function itself). unsigned AttrListPtr::getNumSlots() const { @@ -460,13 +481,12 @@ const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const { return AttrList->Attrs[Slot]; } - -/// getAttributes - The attributes for the specified index are -/// returned. Attributes for the result are denoted with Idx = 0. -/// Function notes are denoted with idx = ~0. +/// getAttributes - The attributes for the specified index are returned. +/// Attributes for the result are denoted with Idx = 0. Function notes are +/// denoted with idx = ~0. Attributes AttrListPtr::getAttributes(unsigned Idx) const { if (AttrList == 0) return Attributes(); - + const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs; for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i) if (Attrs[i].Index == Idx) @@ -497,7 +517,8 @@ Attributes &AttrListPtr::getAttributesAtIndex(unsigned i) const { return AttrList->Attrs[i].Attrs; } -AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const { +AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx, + Attributes Attrs) const { Attributes OldAttrs = getAttributes(Idx); #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. @@ -507,11 +528,12 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const { assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif - - Attributes NewAttrs = OldAttrs | Attrs; - if (NewAttrs == OldAttrs) + + AttrBuilder NewAttrs = + AttrBuilder(OldAttrs).addAttributes(Attrs); + if (NewAttrs == AttrBuilder(OldAttrs)) return *this; - + SmallVector<AttributeWithIndex, 8> NewAttrList; if (AttrList == 0) NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); @@ -524,21 +546,24 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const { // If there are attributes already at this index, merge them in. if (i != e && OldAttrList[i].Index == Idx) { - Attrs |= OldAttrList[i].Attrs; + Attrs = + Attributes::get(C, AttrBuilder(Attrs). + addAttributes(OldAttrList[i].Attrs)); ++i; } - + NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); - + // Copy attributes for arguments after this one. - NewAttrList.insert(NewAttrList.end(), + NewAttrList.insert(NewAttrList.end(), OldAttrList.begin()+i, OldAttrList.end()); } - + return get(NewAttrList); } -AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const { +AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx, + Attributes Attrs) const { #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. // For now, say we can't pass in alignment, which no current use does. @@ -546,31 +571,33 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const { "Attempt to exclude alignment!"); #endif if (AttrList == 0) return AttrListPtr(); - + Attributes OldAttrs = getAttributes(Idx); - Attributes NewAttrs = OldAttrs & ~Attrs; - if (NewAttrs == OldAttrs) + AttrBuilder NewAttrs = + AttrBuilder(OldAttrs).removeAttributes(Attrs); + if (NewAttrs == AttrBuilder(OldAttrs)) return *this; SmallVector<AttributeWithIndex, 8> NewAttrList; const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs; unsigned i = 0, e = OldAttrList.size(); - + // Copy attributes for arguments before this one. for (; i != e && OldAttrList[i].Index < Idx; ++i) NewAttrList.push_back(OldAttrList[i]); - + // If there are attributes already at this index, merge them in. assert(OldAttrList[i].Index == Idx && "Attribute isn't set?"); - Attrs = OldAttrList[i].Attrs & ~Attrs; + Attrs = Attributes::get(C, AttrBuilder(OldAttrList[i].Attrs). + removeAttributes(Attrs)); ++i; - if (Attrs) // If any attributes left for this parameter, add them. + if (Attrs.hasAttributes()) // If any attributes left for this param, add them. NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); - + // Copy attributes for arguments after this one. - NewAttrList.insert(NewAttrList.end(), + NewAttrList.insert(NewAttrList.end(), OldAttrList.begin()+i, OldAttrList.end()); - + return get(NewAttrList); } @@ -578,8 +605,8 @@ void AttrListPtr::dump() const { dbgs() << "PAL[ "; for (unsigned i = 0; i < getNumSlots(); ++i) { const AttributeWithIndex &PAWI = getSlot(i); - dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} "; + dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs.getAsString() << "} "; } - + dbgs() << "]\n"; } diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h new file mode 100644 index 00000000000..b4a0f615f36 --- /dev/null +++ b/lib/VMCore/AttributesImpl.h @@ -0,0 +1,51 @@ +//===-- AttributesImpl.h - Attributes Internals -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines various helper methods and classes used by LLVMContextImpl +// for creating and managing attributes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ATTRIBUTESIMPL_H +#define LLVM_ATTRIBUTESIMPL_H + +#include "llvm/ADT/FoldingSet.h" + +namespace llvm { + +class Attributes; + +class AttributesImpl : public FoldingSetNode { + uint64_t Bits; // FIXME: We will be expanding this. +public: + AttributesImpl(uint64_t bits) : Bits(bits) {} + + bool hasAttribute(uint64_t A) const; + + bool hasAttributes() const; + bool hasAttributes(const Attributes &A) const; + + uint64_t getAlignment() const; + uint64_t getStackAlignment() const; + + uint64_t Raw() const { return Bits; } // FIXME: Remove. + + static uint64_t getAttrMask(uint64_t Val); + + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, Bits); + } + static void Profile(FoldingSetNodeID &ID, uint64_t Bits) { + ID.AddInteger(Bits); + } +}; + +} // end llvm namespace + +#endif diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 094ca755132..5fff460e8bc 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -148,7 +148,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { if (NewFn) F = NewFn; if (unsigned id = F->getIntrinsicID()) - F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id)); + F->setAttributes(Intrinsic::getAttributes(F->getContext(), + (Intrinsic::ID)id)); return Upgraded; } diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 6c309679740..ba807fcacca 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_library(LLVMCore PrintModulePass.cpp Type.cpp TypeFinder.cpp + TargetTransformInfo.cpp Use.cpp User.cpp Value.cpp diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 90ecdaecf41..847bc134ddb 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1381,14 +1381,20 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) { void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { Function *Func = unwrap<Function>(Fn); const AttrListPtr PAL = Func->getAttributes(); - const AttrListPtr PALnew = PAL.addAttr(~0U, Attributes(PA)); + AttrBuilder B(PA); + const AttrListPtr PALnew = + PAL.addAttr(Func->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(Func->getContext(), B)); Func->setAttributes(PALnew); } void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { Function *Func = unwrap<Function>(Fn); const AttrListPtr PAL = Func->getAttributes(); - const AttrListPtr PALnew = PAL.removeAttr(~0U, Attributes(PA)); + AttrBuilder B(PA); + const AttrListPtr PALnew = + PAL.removeAttr(Func->getContext(), AttrListPtr::FunctionIndex, + Attributes::get(Func->getContext(), B)); Func->setAttributes(PALnew); } @@ -1458,11 +1464,15 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { } void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) { - unwrap<Argument>(Arg)->addAttr(Attributes(PA)); + Argument *A = unwrap<Argument>(Arg); + AttrBuilder B(PA); + A->addAttr(Attributes::get(A->getContext(), B)); } void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) { - unwrap<Argument>(Arg)->removeAttr(Attributes(PA)); + Argument *A = unwrap<Argument>(Arg); + AttrBuilder B(PA); + A->removeAttr(Attributes::get(A->getContext(), B)); } LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) { @@ -1474,8 +1484,10 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) { void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { - unwrap<Argument>(Arg)->addAttr( - Attributes::constructAlignmentFromInt(align)); + AttrBuilder B; + B.addAlignmentAttr(align); + unwrap<Argument>(Arg)->addAttr(Attributes:: + get(unwrap<Argument>(Arg)->getContext(), B)); } /*--.. Operations on basic blocks ..........................................--*/ @@ -1664,23 +1676,28 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) { void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute PA) { CallSite Call = CallSite(unwrap<Instruction>(Instr)); + AttrBuilder B(PA); Call.setAttributes( - Call.getAttributes().addAttr(index, Attributes(PA))); + Call.getAttributes().addAttr(Call->getContext(), index, + Attributes::get(Call->getContext(), B))); } void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute PA) { CallSite Call = CallSite(unwrap<Instruction>(Instr)); + AttrBuilder B(PA); Call.setAttributes( - Call.getAttributes().removeAttr(index, Attributes(PA))); + Call.getAttributes().removeAttr(Call->getContext(), index, + Attributes::get(Call->getContext(), B))); } void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align) { CallSite Call = CallSite(unwrap<Instruction>(Instr)); - Call.setAttributes( - Call.getAttributes().addAttr(index, - Attributes::constructAlignmentFromInt(align))); + AttrBuilder B; + B.addAlignmentAttr(align); + Call.setAttributes(Call.getAttributes().addAttr(Call->getContext(), index, + Attributes::get(Call->getContext(), B))); } /*--.. Operations on call instructions (only) ..............................--*/ diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 5c2a03ce091..9c4f2d93995 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -185,7 +185,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, // Ensure intrinsics have the right parameter attributes. if (unsigned IID = getIntrinsicID()) - setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID))); + setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID))); } @@ -249,13 +249,13 @@ void Function::dropAllReferences() { void Function::addAttribute(unsigned i, Attributes attr) { AttrListPtr PAL = getAttributes(); - PAL = PAL.addAttr(i, attr); + PAL = PAL.addAttr(getContext(), i, attr); setAttributes(PAL); } void Function::removeAttribute(unsigned i, Attributes attr) { AttrListPtr PAL = getAttributes(); - PAL = PAL.removeAttr(i, attr); + PAL = PAL.removeAttr(getContext(), i, attr); setAttributes(PAL); } diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 74c0c6e1d97..13c4a5d2574 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -332,21 +332,22 @@ CallInst::CallInst(const CallInst &CI) void CallInst::addAttribute(unsigned i, Attributes attr) { AttrListPtr PAL = getAttributes(); - PAL = PAL.addAttr(i, attr); + PAL = PAL.addAttr(getContext(), i, attr); setAttributes(PAL); } void CallInst::removeAttribute(unsigned i, Attributes attr) { AttrListPtr PAL = getAttributes(); - PAL = PAL.removeAttr(i, attr); + PAL = PAL.removeAttr(getContext(), i, attr); setAttributes(PAL); } bool CallInst::hasFnAttr(Attributes::AttrVal A) const { - if (AttributeList.getParamAttributes(~0U).hasAttribute(A)) + if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex) + .hasAttribute(A)) return true; if (const Function *F = getCalledFunction()) - return F->getParamAttributes(~0U).hasAttribute(A); + return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A); return false; } @@ -571,10 +572,11 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) { } bool InvokeInst::hasFnAttr(Attributes::AttrVal A) const { - if (AttributeList.getParamAttributes(~0U).hasAttribute(A)) + if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex). + hasAttribute(A)) return true; if (const Function *F = getCalledFunction()) - return F->getParamAttributes(~0U).hasAttribute(A); + return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A); return false; } @@ -588,13 +590,13 @@ bool InvokeInst::paramHasAttr(unsigned i, Attributes::AttrVal A) const { void InvokeInst::addAttribute(unsigned i, Attributes attr) { AttrListPtr PAL = getAttributes(); - PAL = PAL.addAttr(i, attr); + PAL = PAL.addAttr(getContext(), i, attr); setAttributes(PAL); } void InvokeInst::removeAttribute(unsigned i, Attributes attr) { AttrListPtr PAL = getAttributes(); - PAL = PAL.removeAttr(i, attr); + PAL = PAL.removeAttr(getContext(), i, attr); setAttributes(PAL); } diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp index a86363b632a..74247bdde13 100644 --- a/lib/VMCore/LLVMContextImpl.cpp +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -97,9 +97,11 @@ LLVMContextImpl::~LLVMContextImpl() { // Destroy attributes. for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(), - E = AttrsSet.end(); I != E; ++I) - delete &*I; - + E = AttrsSet.end(); I != E;) { + FoldingSetIterator<AttributesImpl> Elem = I++; + delete &*Elem; + } + // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet // and the NonUniquedMDNodes sets, so copy the values out first. SmallVector<MDNode*, 8> MDNodes; diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 524f7e54bb4..ee31814c055 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -16,9 +16,9 @@ #define LLVM_LLVMCONTEXT_IMPL_H #include "llvm/LLVMContext.h" +#include "AttributesImpl.h" #include "ConstantsContext.h" #include "LeaksContext.h" -#include "llvm/AttributesImpl.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Metadata.h" diff --git a/lib/VMCore/TargetTransformInfo.cpp b/lib/VMCore/TargetTransformInfo.cpp new file mode 100644 index 00000000000..3af0222a211 --- /dev/null +++ b/lib/VMCore/TargetTransformInfo.cpp @@ -0,0 +1,27 @@ +//===- llvm/VMCore/TargetTransformInfo.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TargetTransformInfo.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +/// Default ctor. +/// +/// @note This has to exist, because this is a pass, but it should never be +/// used. +TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) { + report_fatal_error("Bad TargetTransformInfo ctor used. " + "Tool did not specify a TargetTransformInfo to use?"); +} + +INITIALIZE_PASS(TargetTransformInfo, "TargetTransformInfo", + "Target Transform Info", false, true) +char TargetTransformInfo::ID = 0; + diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 53744b48691..fd629b485aa 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -567,9 +567,10 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty, Attrs.hasAttribute(Attributes::AlwaysInline)), "Attributes " "'noinline and alwaysinline' are incompatible!", V); - Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty); - Assert1(!TypeI, "Wrong type for attribute " + - TypeI.getAsString(), V); + Assert1(!AttrBuilder(Attrs). + hasAttributes(Attributes::typeIncompatible(Ty)), + "Wrong types for attribute: " + + Attributes::typeIncompatible(Ty).getAsString(), V); if (PointerType *PTy = dyn_cast<PointerType>(Ty)) Assert1(!Attrs.hasAttribute(Attributes::ByVal) || @@ -614,10 +615,10 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, } Attributes FAttrs = Attrs.getFnAttributes(); - Attributes::Builder NotFn(FAttrs); + AttrBuilder NotFn(FAttrs); NotFn.removeFunctionOnlyAttrs(); Assert1(!NotFn.hasAttributes(), "Attributes '" + - Attributes::get(NotFn).getAsString() + + Attributes::get(V->getContext(), NotFn).getAsString() + "' do not apply to the function!", V); // Check for mutually incompatible attributes. |