summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authortstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-16 17:52:57 +0000
committertstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-16 17:52:57 +0000
commit47ca737da5de48d621fa88adb89b6265fe224453 (patch)
tree7e60638529c99989f22bff7a0f40a71a9c3a7399 /lib
parent5f65515268b0a646b3e5ccedd85b6e0b5f831ac3 (diff)
Merge master branch
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@166033 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp7
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/CodeMetrics.cpp6
-rw-r--r--lib/Analysis/ConstantFolding.cpp12
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp3781
-rw-r--r--lib/Analysis/InlineCost.cpp15
-rw-r--r--lib/Analysis/InstructionSimplify.cpp11
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp12
-rw-r--r--lib/Analysis/ValueTracking.cpp6
-rw-r--r--lib/AsmParser/LLParser.cpp60
-rw-r--r--lib/AsmParser/LLParser.h2
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp94
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h67
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp123
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp4
-rw-r--r--lib/CodeGen/AllocationOrder.cpp5
-rw-r--r--lib/CodeGen/Analysis.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp2
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp2
-rw-r--r--lib/CodeGen/CallingConvLower.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp8
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp720
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--lib/CodeGen/LiveVariables.cpp6
-rw-r--r--lib/CodeGen/MachineCSE.cpp8
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp13
-rw-r--r--lib/CodeGen/MachineFunction.cpp4
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp14
-rw-r--r--lib/CodeGen/MachineScheduler.cpp126
-rw-r--r--lib/CodeGen/MachineSink.cpp2
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp14
-rw-r--r--lib/CodeGen/MachineTraceMetrics.h2
-rw-r--r--lib/CodeGen/MachineVerifier.cpp7
-rw-r--r--lib/CodeGen/Passes.cpp4
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp5
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp5
-rw-r--r--lib/CodeGen/RegAllocFast.cpp8
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp6
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp10
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp86
-rw-r--r--lib/CodeGen/RegisterPressure.cpp12
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp7
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp93
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp81
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp5
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp2
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp4
-rw-r--r--lib/CodeGen/TargetSchedule.cpp40
-rw-r--r--lib/CodeGen/VirtRegMap.cpp5
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp12
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp3
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp2
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp14
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp14
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp22
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp11
-rw-r--r--lib/MC/MCAsmStreamer.cpp9
-rw-r--r--lib/MC/MCELFStreamer.cpp8
-rw-r--r--lib/MC/MCParser/AsmParser.cpp56
-rw-r--r--lib/MC/MCStreamer.cpp4
-rw-r--r--lib/Support/Triple.cpp4
-rw-r--r--lib/TableGen/CMakeLists.txt2
-rw-r--r--lib/TableGen/Makefile2
-rw-r--r--lib/TableGen/Record.cpp150
-rw-r--r--lib/TableGen/TGParser.cpp53
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h5
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp59
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td25
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h18
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp18
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h3
-rw-r--r--lib/Target/CMakeLists.txt1
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp3
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h9
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h11
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp17
-rw-r--r--lib/Target/MBlaze/MBlazeELFWriterInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h7
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h10
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp18
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp23
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td41
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td30
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h9
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp10
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp16
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp18
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp3
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h9
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h9
-rw-r--r--lib/Target/Target.cpp11
-rw-r--r--lib/Target/TargetTransformImpl.cpp43
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp99
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp4
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp3
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp4
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp385
-rw-r--r--lib/Target/X86/X86ISelLowering.h20
-rw-r--r--lib/Target/X86/X86InstrCompiler.td27
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--lib/Target/X86/X86InstrInfo.td8
-rw-r--r--lib/Target/X86/X86InstrSSE.td11
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp26
-rw-r--r--lib/Target/X86/X86MCInstLower.h52
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp5
-rw-r--r--lib/Target/X86/X86RegisterInfo.h1
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp6
-rw-r--r--lib/Target/X86/X86TargetMachine.h17
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h9
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp33
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp44
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp20
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp12
-rw-r--r--lib/Transforms/IPO/IPO.cpp2
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp81
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp9
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp3
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp117
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp6
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp63
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp134
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp5
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp51
-rw-r--r--lib/Transforms/Scalar/SROA.cpp371
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp301
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp67
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp8
-rw-r--r--lib/Transforms/Utils/Local.cpp3
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp32
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp10
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp579
-rw-r--r--lib/VMCore/Attributes.cpp255
-rw-r--r--lib/VMCore/AttributesImpl.h51
-rw-r--r--lib/VMCore/AutoUpgrade.cpp3
-rw-r--r--lib/VMCore/CMakeLists.txt1
-rw-r--r--lib/VMCore/Core.cpp39
-rw-r--r--lib/VMCore/Function.cpp6
-rw-r--r--lib/VMCore/Instructions.cpp18
-rw-r--r--lib/VMCore/LLVMContextImpl.cpp8
-rw-r--r--lib/VMCore/LLVMContextImpl.h2
-rw-r--r--lib/VMCore/TargetTransformInfo.cpp27
-rw-r--r--lib/VMCore/Verifier.cpp11
178 files changed, 7401 insertions, 2215 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 87a75fd3b11..588206e9159 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -31,6 +31,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
initializePrintDbgInfoPass(Registry);
+ initializeDependenceAnalysisPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 263bfc031fc..36903f94e25 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -286,7 +286,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = GEPOp->getOperand(0);
continue;
}
-
+
+ unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
for (User::const_op_iterator I = GEPOp->op_begin()+1,
@@ -315,7 +316,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
- if (TD->getPointerSizeInBits() > Width)
+ if (TD->getPointerSizeInBits(AS) > Width)
Extension = EK_SignExt;
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
@@ -344,7 +345,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ if (unsigned ShiftBits = 64-TD->getPointerSizeInBits(AS)) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index e461848e861..3ce888fefa4 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_library(LLVMAnalysis
CodeMetrics.cpp
ConstantFolding.cpp
DbgInfoPrinter.cpp
+ DependenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
IVUsers.cpp
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 651a54be1b9..d6692684960 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -91,14 +91,16 @@ bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) {
// which doesn't contain values outside the range of a pointer.
if (isa<IntToPtrInst>(CI) && TD &&
TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
- Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits())
+ Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits(
+ cast<IntToPtrInst>(CI)->getAddressSpace()))
return true;
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
if (isa<PtrToIntInst>(CI) && TD &&
TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
- Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits())
+ Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits(
+ cast<PtrToIntInst>(CI)->getPointerAddressSpace()))
return true;
// trunc to a native type is free (assuming the target has compare and
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index b7bf044a368..146897ad675 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -916,10 +916,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
if (TD && CE->getOpcode() == Instruction::IntToPtr) {
Constant *Input = CE->getOperand(0);
unsigned InWidth = Input->getType()->getScalarSizeInBits();
- if (TD->getPointerSizeInBits() < InWidth) {
+ unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
+ if (TD->getPointerSizeInBits(AS) < InWidth) {
Constant *Mask =
ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
- TD->getPointerSizeInBits()));
+ TD->getPointerSizeInBits(AS)));
Input = ConstantExpr::getAnd(Input, Mask);
}
// Do a zext or trunc to get to the dest size.
@@ -932,9 +933,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
// the int size is >= the ptr size. This requires knowing the width of a
// pointer, so it can't be done in ConstantExpr::getCast.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
- if (TD &&
- TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
- CE->getOpcode() == Instruction::PtrToInt)
+ if (TD && CE->getOpcode() == Instruction::PtrToInt &&
+ TD->getPointerSizeInBits(
+ cast<PointerType>(CE->getOperand(0)->getType())->getAddressSpace())
+ <= CE->getType()->getScalarSizeInBits())
return FoldBitCast(CE->getOperand(0), DestTy, *TD);
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
new file mode 100644
index 00000000000..016fe396e7d
--- /dev/null
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -0,0 +1,3781 @@
+//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DependenceAnalysis is an LLVM pass that analyses dependences between memory
+// accesses. Currently, it is an (incomplete) implementation of the approach
+// described in
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+//
+// There's a single entry point that analyzes the dependence between a pair
+// of memory references in a function, returning either NULL, for no dependence,
+// or a more-or-less detailed description of the dependence between them.
+//
+// Currently, the implementation cannot propagate constraints between
+// coupled RDIV subscripts and lacks a multi-subscript MIV test.
+// Both of these are conservative weaknesses;
+// that is, not a source of correctness problems.
+//
+// The implementation depends on the GEP instruction to
+// differentiate subscripts. Since Clang linearizes subscripts
+// for most arrays, we give up some precision (though the existing MIV tests
+// will help). We trust that the GEP instruction will eventually be extended.
+// In the meantime, we should explore Maslov's ideas about delinearization.
+//
+// We should pay some careful attention to the possibility of integer overflow
+// in the implementation of the various tests. This could happen with Add,
+// Subtract, or Multiply, with both APInt's and SCEV's.
+//
+// Some non-linear subscript pairs can be handled by the GCD test
+// (and perhaps other tests).
+// Should explore how often these things occur.
+//
+// Finally, it seems like certain test cases expose weaknesses in the SCEV
+// simplification, especially in the handling of sign and zero extensions.
+// It could be useful to spend time exploring these.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+//===----------------------------------------------------------------------===//
+// //
+// In memory of Ken Kennedy, 1945 - 2007 //
+// //
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "da"
+
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// statistics
+
+STATISTIC(TotalArrayPairs, "Array pairs tested");
+STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
+STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
+STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
+STATISTIC(ZIVapplications, "ZIV applications");
+STATISTIC(ZIVindependence, "ZIV independence");
+STATISTIC(StrongSIVapplications, "Strong SIV applications");
+STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
+STATISTIC(StrongSIVindependence, "Strong SIV independence");
+STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
+STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
+STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
+STATISTIC(ExactSIVapplications, "Exact SIV applications");
+STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
+STATISTIC(ExactSIVindependence, "Exact SIV independence");
+STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
+STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
+STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
+STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
+STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
+STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
+STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
+STATISTIC(DeltaApplications, "Delta applications");
+STATISTIC(DeltaSuccesses, "Delta successes");
+STATISTIC(DeltaIndependence, "Delta independence");
+STATISTIC(DeltaPropagations, "Delta propagations");
+STATISTIC(GCDapplications, "GCD applications");
+STATISTIC(GCDsuccesses, "GCD successes");
+STATISTIC(GCDindependence, "GCD independence");
+STATISTIC(BanerjeeApplications, "Banerjee applications");
+STATISTIC(BanerjeeIndependence, "Banerjee independence");
+STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+
+//===----------------------------------------------------------------------===//
+// basics
+
+INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
+ "Dependence Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DependenceAnalysis, "da",
+ "Dependence Analysis", true, true)
+
+char DependenceAnalysis::ID = 0;
+
+
+FunctionPass *llvm::createDependenceAnalysisPass() {
+ return new DependenceAnalysis();
+}
+
+
+bool DependenceAnalysis::runOnFunction(Function &F) {
+ this->F = &F;
+ AA = &getAnalysis<AliasAnalysis>();
+ SE = &getAnalysis<ScalarEvolution>();
+ LI = &getAnalysis<LoopInfo>();
+ return false;
+}
+
+
+void DependenceAnalysis::releaseMemory() {
+}
+
+
+void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<ScalarEvolution>();
+ AU.addRequiredTransitive<LoopInfo>();
+}
+
+
+// Used to test the dependence analyzer.
+// Looks through the function, noting the first store instruction
+// and the first load instruction
+// (which always follows the first load in our tests).
+// Calls depends() and prints out the result.
+// Ignores all other instructions.
+static
+void dumpExampleDependence(raw_ostream &OS, Function *F,
+ DependenceAnalysis *DA) {
+ for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
+ SrcI != SrcE; ++SrcI) {
+ if (const StoreInst *Src = dyn_cast<StoreInst>(&*SrcI)) {
+ for (inst_iterator DstI = SrcI, DstE = inst_end(F);
+ DstI != DstE; ++DstI) {
+ if (const LoadInst *Dst = dyn_cast<LoadInst>(&*DstI)) {
+ OS << "da analyze - ";
+ if (Dependence *D = DA->depends(Src, Dst, true)) {
+ D->dump(OS);
+ for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
+ if (D->isSplitable(Level)) {
+ OS << "da analyze - split level = " << Level;
+ OS << ", iteration = " << *DA->getSplitIteration(D, Level);
+ OS << "!\n";
+ }
+ }
+ delete D;
+ }
+ else
+ OS << "none!\n";
+ return;
+ }
+ }
+ }
+ }
+}
+
+
+void DependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+ dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this));
+}
+
+//===----------------------------------------------------------------------===//
+// Dependence methods
+
+// Returns true if this is an input dependence.
+bool Dependence::isInput() const {
+ return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an output dependence.
+bool Dependence::isOutput() const {
+ return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if this is an flow (aka true) dependence.
+bool Dependence::isFlow() const {
+ return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an anti dependence.
+bool Dependence::isAnti() const {
+ return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+// Leave this out of line, so it will serve as a virtual method anchor
+bool Dependence::isScalar(unsigned level) const {
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FullDependence methods
+
+FullDependence::FullDependence(const Instruction *Source,
+ const Instruction *Destination,
+ bool PossiblyLoopIndependent,
+ unsigned CommonLevels) :
+ Dependence(Source, Destination),
+ Levels(CommonLevels),
+ LoopIndependent(PossiblyLoopIndependent) {
+ Consistent = true;
+ DV = CommonLevels ? new DVEntry[CommonLevels] : NULL;
+}
+
+// The rest are simple getters that hide the implementation.
+
+// getDirection - Returns the direction associated with a particular level.
+unsigned FullDependence::getDirection(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Direction;
+}
+
+
+// Returns the distance (or NULL) associated with a particular level.
+const SCEV *FullDependence::getDistance(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Distance;
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+bool FullDependence::isScalar(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Scalar;
+}
+
+
+// Returns true if peeling the first iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelFirst(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].PeelFirst;
+}
+
+
+// Returns true if peeling the last iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelLast(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].PeelLast;
+}
+
+
+// Returns true if splitting this loop will break the dependence.
+bool FullDependence::isSplitable(unsigned Level) const {
+ assert(0 < Level && Level <= Levels && "Level out of range");
+ return DV[Level - 1].Splitable;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis::Constraint methods
+
+// If constraint is a point <X, Y>, returns X.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getX() const {
+ assert(Kind == Point && "Kind should be Point");
+ return A;
+}
+
+
+// If constraint is a point <X, Y>, returns Y.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getY() const {
+ assert(Kind == Point && "Kind should be Point");
+ return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns A.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getA() const {
+ assert((Kind == Line || Kind == Distance) &&
+ "Kind should be Line (or Distance)");
+ return A;
+}
+
+
+// If constraint is a line AX + BY = C, returns B.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getB() const {
+ assert((Kind == Line || Kind == Distance) &&
+ "Kind should be Line (or Distance)");
+ return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns C.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getC() const {
+ assert((Kind == Line || Kind == Distance) &&
+ "Kind should be Line (or Distance)");
+ return C;
+}
+
+
+// If constraint is a distance, returns D.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getD() const {
+ assert(Kind == Distance && "Kind should be Distance");
+ return SE->getNegativeSCEV(C);
+}
+
+
+// Returns the loop associated with this constraint.
+const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const {
+ assert((Kind == Distance || Kind == Line || Kind == Point) &&
+ "Kind should be Distance, Line, or Point");
+ return AssociatedLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setPoint(const SCEV *X,
+ const SCEV *Y,
+ const Loop *CurLoop) {
+ Kind = Point;
+ A = X;
+ B = Y;
+ AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
+ const SCEV *BB,
+ const SCEV *CC,
+ const Loop *CurLoop) {
+ Kind = Line;
+ A = AA;
+ B = BB;
+ C = CC;
+ AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
+ const Loop *CurLoop) {
+ Kind = Distance;
+ A = SE->getConstant(D->getType(), 1);
+ B = SE->getNegativeSCEV(A);
+ C = SE->getNegativeSCEV(D);
+ AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setEmpty() {
+ Kind = Empty;
+}
+
+
+void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) {
+ SE = NewSE;
+ Kind = Any;
+}
+
+
+// For debugging purposes. Dumps the constraint out to OS.
+void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const {
+ if (isEmpty())
+ OS << " Empty\n";
+ else if (isAny())
+ OS << " Any\n";
+ else if (isPoint())
+ OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
+ else if (isDistance())
+ OS << " Distance is " << *getD() <<
+ " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
+ else if (isLine())
+ OS << " Line is " << *getA() << "*X + " <<
+ *getB() << "*Y = " << *getC() << "\n";
+ else
+ llvm_unreachable("unknown constraint type in Constraint::dump");
+}
+
+
+// Updates X with the intersection
+// of the Constraints X and Y. Returns true if X has changed.
+// Corresponds to Figure 4 from the paper
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+bool DependenceAnalysis::intersectConstraints(Constraint *X,
+ const Constraint *Y) {
+ ++DeltaApplications;
+ DEBUG(dbgs() << "\tintersect constraints\n");
+ DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
+ DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
+ assert(!Y->isPoint() && "Y must not be a Point");
+ if (X->isAny()) {
+ if (Y->isAny())
+ return false;
+ *X = *Y;
+ return true;
+ }
+ if (X->isEmpty())
+ return false;
+ if (Y->isEmpty()) {
+ X->setEmpty();
+ return true;
+ }
+
+ if (X->isDistance() && Y->isDistance()) {
+ DEBUG(dbgs() << "\t intersect 2 distances\n");
+ if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
+ return false;
+ if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ // Hmmm, interesting situation.
+ // I guess if either is constant, keep it and ignore the other.
+ if (isa<SCEVConstant>(Y->getD())) {
+ *X = *Y;
+ return true;
+ }
+ return false;
+ }
+
+ // At this point, the pseudo-code in Figure 4 of the paper
+ // checks if (X->isPoint() && Y->isPoint()).
+ // This case can't occur in our implementation,
+ // since a Point can only arise as the result of intersecting
+ // two Line constraints, and the right-hand value, Y, is never
+ // the result of an intersection.
+ assert(!(X->isPoint() && Y->isPoint()) &&
+ "We shouldn't ever see X->isPoint() && Y->isPoint()");
+
+ if (X->isLine() && Y->isLine()) {
+ DEBUG(dbgs() << "\t intersect 2 lines\n");
+ const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
+ const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
+ // slopes are equal, so lines are parallel
+ DEBUG(dbgs() << "\t\tsame slope\n");
+ Prod1 = SE->getMulExpr(X->getC(), Y->getB());
+ Prod2 = SE->getMulExpr(X->getB(), Y->getC());
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
+ return false;
+ if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ return false;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+ // slopes differ, so lines intersect
+ DEBUG(dbgs() << "\t\tdifferent slopes\n");
+ const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
+ const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
+ const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
+ const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
+ const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
+ const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
+ const SCEVConstant *C1A2_C2A1 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
+ const SCEVConstant *C1B2_C2B1 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
+ const SCEVConstant *A1B2_A2B1 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
+ const SCEVConstant *A2B1_A1B2 =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
+ if (!C1B2_C2B1 || !C1A2_C2A1 ||
+ !A1B2_A2B1 || !A2B1_A1B2)
+ return false;
+ APInt Xtop = C1B2_C2B1->getValue()->getValue();
+ APInt Xbot = A1B2_A2B1->getValue()->getValue();
+ APInt Ytop = C1A2_C2A1->getValue()->getValue();
+ APInt Ybot = A2B1_A1B2->getValue()->getValue();
+ DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
+ DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
+ DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
+ DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
+ APInt Xq = Xtop; // these need to be initialized, even
+ APInt Xr = Xtop; // though they're just going to be overwritten
+ APInt::sdivrem(Xtop, Xbot, Xq, Xr);
+ APInt Yq = Ytop;
+ APInt Yr = Ytop;;
+ APInt::sdivrem(Ytop, Ybot, Yq, Yr);
+ if (Xr != 0 || Yr != 0) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
+ if (Xq.slt(0) || Yq.slt(0)) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ if (const SCEVConstant *CUB =
+ collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
+ APInt UpperBound = CUB->getValue()->getValue();
+ DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
+ if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ }
+ X->setPoint(SE->getConstant(Xq),
+ SE->getConstant(Yq),
+ X->getAssociatedLoop());
+ ++DeltaSuccesses;
+ return true;
+ }
+ return false;
+ }
+
+ // if (X->isLine() && Y->isPoint()) This case can't occur.
+ assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
+
+ if (X->isPoint() && Y->isLine()) {
+ DEBUG(dbgs() << "\t intersect Point and Line\n");
+ const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
+ const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
+ const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
+ return false;
+ if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
+ X->setEmpty();
+ ++DeltaSuccesses;
+ return true;
+ }
+ return false;
+ }
+
+ llvm_unreachable("shouldn't reach the end of Constraint intersection");
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis methods
+
+// For debugging purposes. Dumps a dependence to OS.
+void Dependence::dump(raw_ostream &OS) const {
+ bool Splitable = false;
+ if (isConfused())
+ OS << "confused";
+ else {
+ if (isConsistent())
+ OS << "consistent ";
+ if (isFlow())
+ OS << "flow";
+ else if (isOutput())
+ OS << "output";
+ else if (isAnti())
+ OS << "anti";
+ else if (isInput())
+ OS << "input";
+ unsigned Levels = getLevels();
+ if (Levels) {
+ OS << " [";
+ for (unsigned II = 1; II <= Levels; ++II) {
+ if (isSplitable(II))
+ Splitable = true;
+ if (isPeelFirst(II))
+ OS << 'p';
+ const SCEV *Distance = getDistance(II);
+ if (Distance)
+ OS << *Distance;
+ else if (isScalar(II))
+ OS << "S";
+ else {
+ unsigned Direction = getDirection(II);
+ if (Direction == DVEntry::ALL)
+ OS << "*";
+ else {
+ if (Direction & DVEntry::LT)
+ OS << "<";
+ if (Direction & DVEntry::EQ)
+ OS << "=";
+ if (Direction & DVEntry::GT)
+ OS << ">";
+ }
+ }
+ if (isPeelLast(II))
+ OS << 'p';
+ if (II < Levels)
+ OS << " ";
+ }
+ if (isLoopIndependent())
+ OS << "|<";
+ OS << "]";
+ if (Splitable)
+ OS << " splitable";
+ }
+ }
+ OS << "!\n";
+}
+
+
+
+static
+AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+ const Value *A,
+ const Value *B) {
+ const Value *AObj = GetUnderlyingObject(A);
+ const Value *BObj = GetUnderlyingObject(B);
+ return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
+ BObj, AA->getTypeStoreSize(BObj->getType()));
+}
+
+
+// Returns true if the load or store can be analyzed. Atomic and volatile
+// operations have properties which this analysis does not understand.
+static
+bool isLoadOrStore(const Instruction *I) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isUnordered();
+ else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isUnordered();
+ return false;
+}
+
+
+static
+const Value *getPointerOperand(const Instruction *I) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerOperand();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ llvm_unreachable("Value is not load or store instruction");
+ return 0;
+}
+
+
+// Examines the loop nesting of the Src and Dst
+// instructions and establishes their shared loops. Sets the variables
+// CommonLevels, SrcLevels, and MaxLevels.
+// The source and destination instructions needn't be contained in the same
+// loop. The routine establishNestingLevels finds the level of most deeply
+// nested loop that contains them both, CommonLevels. An instruction that's
+// not contained in a loop is at level = 0. MaxLevels is equal to the level
+// of the source plus the level of the destination, minus CommonLevels.
+// This lets us allocate vectors MaxLevels in length, with room for every
+// distinct loop referenced in both the source and destination subscripts.
+// The variable SrcLevels is the nesting depth of the source instruction.
+// It's used to help calculate distinct loops referenced by the destination.
+// Here's the map from loops to levels:
+// 0 - unused
+// 1 - outermost common loop
+// ... - other common loops
+// CommonLevels - innermost common loop
+// ... - loops containing Src but not Dst
+// SrcLevels - innermost loop containing Src but not Dst
+// ... - loops containing Dst but not Src
+// MaxLevels - innermost loops containing Dst but not Src
+// Consider the follow code fragment:
+// for (a = ...) {
+// for (b = ...) {
+// for (c = ...) {
+// for (d = ...) {
+// A[] = ...;
+// }
+// }
+// for (e = ...) {
+// for (f = ...) {
+// for (g = ...) {
+// ... = A[];
+// }
+// }
+// }
+// }
+// }
+// If we're looking at the possibility of a dependence between the store
+// to A (the Src) and the load from A (the Dst), we'll note that they
+// have 2 loops in common, so CommonLevels will equal 2 and the direction
+// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
+// A map from loop names to loop numbers would look like
+// a - 1
+// b - 2 = CommonLevels
+// c - 3
+// d - 4 = SrcLevels
+// e - 5
+// f - 6
+// g - 7 = MaxLevels
+void DependenceAnalysis::establishNestingLevels(const Instruction *Src,
+ const Instruction *Dst) {
+ const BasicBlock *SrcBlock = Src->getParent();
+ const BasicBlock *DstBlock = Dst->getParent();
+ unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
+ unsigned DstLevel = LI->getLoopDepth(DstBlock);
+ const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
+ const Loop *DstLoop = LI->getLoopFor(DstBlock);
+ SrcLevels = SrcLevel;
+ MaxLevels = SrcLevel + DstLevel;
+ while (SrcLevel > DstLevel) {
+ SrcLoop = SrcLoop->getParentLoop();
+ SrcLevel--;
+ }
+ while (DstLevel > SrcLevel) {
+ DstLoop = DstLoop->getParentLoop();
+ DstLevel--;
+ }
+ while (SrcLoop != DstLoop) {
+ SrcLoop = SrcLoop->getParentLoop();
+ DstLoop = DstLoop->getParentLoop();
+ SrcLevel--;
+ }
+ CommonLevels = SrcLevel;
+ MaxLevels -= CommonLevels;
+}
+
+
+// Given one of the loops containing the source, return
+// its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const {
+ return SrcLoop->getLoopDepth();
+}
+
+
+// Given one of the loops containing the destination,
+// return its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const {
+ unsigned D = DstLoop->getLoopDepth();
+ if (D > CommonLevels)
+ return D - CommonLevels + SrcLevels;
+ else
+ return D;
+}
+
+
+// Returns true if Expression is loop invariant in LoopNest.
+bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression,
+ const Loop *LoopNest) const {
+ if (!LoopNest)
+ return true;
+ return SE->isLoopInvariant(Expression, LoopNest) &&
+ isLoopInvariant(Expression, LoopNest->getParentLoop());
+}
+
+
+
+// Finds the set of loops from the LoopNest that
+// have a level <= CommonLevels and are referred to by the SCEV Expression.
+void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
+ const Loop *LoopNest,
+ SmallBitVector &Loops) const {
+ while (LoopNest) {
+ unsigned Level = LoopNest->getLoopDepth();
+ if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
+ Loops.set(Level);
+ LoopNest = LoopNest->getParentLoop();
+ }
+}
+
+
+// removeMatchingExtensions - Examines a subscript pair.
+// If the source and destination are identically sign (or zero)
+// extended, it strips off the extension in an effect to simplify
+// the actual analysis.
+void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
+ const SCEV *Src = Pair->Src;
+ const SCEV *Dst = Pair->Dst;
+ if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
+ (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
+ const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
+ const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
+ if (SrcCast->getType() == DstCast->getType()) {
+ Pair->Src = SrcCast->getOperand();
+ Pair->Dst = DstCast->getOperand();
+ }
+ }
+}
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
+ const Loop *LoopNest,
+ SmallBitVector &Loops) {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
+ if (!AddRec)
+ return isLoopInvariant(Src, LoopNest);
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ if (!isLoopInvariant(Step, LoopNest))
+ return false;
+ Loops.set(mapSrcLoop(AddRec->getLoop()));
+ return checkSrcSubscript(Start, LoopNest, Loops);
+}
+
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
+ const Loop *LoopNest,
+ SmallBitVector &Loops) {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+ if (!AddRec)
+ return isLoopInvariant(Dst, LoopNest);
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ if (!isLoopInvariant(Step, LoopNest))
+ return false;
+ Loops.set(mapDstLoop(AddRec->getLoop()));
+ return checkDstSubscript(Start, LoopNest, Loops);
+}
+
+
+// Examines the subscript pair (the Src and Dst SCEVs)
+// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
+// Collects the associated loops in a set.
+DependenceAnalysis::Subscript::ClassificationKind
+DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
+ const SCEV *Dst, const Loop *DstLoopNest,
+ SmallBitVector &Loops) {
+ SmallBitVector SrcLoops(MaxLevels + 1);
+ SmallBitVector DstLoops(MaxLevels + 1);
+ if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
+ return Subscript::NonLinear;
+ if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
+ return Subscript::NonLinear;
+ Loops = SrcLoops;
+ Loops |= DstLoops;
+ unsigned N = Loops.count();
+ if (N == 0)
+ return Subscript::ZIV;
+ if (N == 1)
+ return Subscript::SIV;
+ if (N == 2 && (SrcLoops.count() == 0 ||
+ DstLoops.count() == 0 ||
+ (SrcLoops.count() == 1 && DstLoops.count() == 1)))
+ return Subscript::RDIV;
+ return Subscript::MIV;
+}
+
+
+// A wrapper around SCEV::isKnownPredicate.
+// Looks for cases where we're interested in comparing for equality.
+// If both X and Y have been identically sign or zero extended,
+// it strips off the (confusing) extensions before invoking
+// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
+// will be similarly updated.
+//
+// If SCEV::isKnownPredicate can't prove the predicate,
+// we try simple subtraction, which seems to help in some cases
+// involving symbolics.
+bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
+ const SCEV *X,
+ const SCEV *Y) const {
+ if (Pred == CmpInst::ICMP_EQ ||
+ Pred == CmpInst::ICMP_NE) {
+ if ((isa<SCEVSignExtendExpr>(X) &&
+ isa<SCEVSignExtendExpr>(Y)) ||
+ (isa<SCEVZeroExtendExpr>(X) &&
+ isa<SCEVZeroExtendExpr>(Y))) {
+ const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
+ const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
+ const SCEV *Xop = CX->getOperand();
+ const SCEV *Yop = CY->getOperand();
+ if (Xop->getType() == Yop->getType()) {
+ X = Xop;
+ Y = Yop;
+ }
+ }
+ }
+ if (SE->isKnownPredicate(Pred, X, Y))
+ return true;
+ // If SE->isKnownPredicate can't prove the condition,
+ // we try the brute-force approach of subtracting
+ // and testing the difference.
+ // By testing with SE->isKnownPredicate first, we avoid
+ // the possibility of overflow when the arguments are constants.
+ const SCEV *Delta = SE->getMinusSCEV(X, Y);
+ switch (Pred) {
+ case CmpInst::ICMP_EQ:
+ return Delta->isZero();
+ case CmpInst::ICMP_NE:
+ return SE->isKnownNonZero(Delta);
+ case CmpInst::ICMP_SGE:
+ return SE->isKnownNonNegative(Delta);
+ case CmpInst::ICMP_SLE:
+ return SE->isKnownNonPositive(Delta);
+ case CmpInst::ICMP_SGT:
+ return SE->isKnownPositive(Delta);
+ case CmpInst::ICMP_SLT:
+ return SE->isKnownNegative(Delta);
+ default:
+ llvm_unreachable("unexpected predicate in isKnownPredicate");
+ }
+}
+
+
+// All subscripts are all the same type.
+// Loop bound may be smaller (e.g., a char).
+// Should zero extend loop bound, since it's always >= 0.
+// This routine collects upper bound and extends if needed.
+// Return null if no bound available.
+const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
+ Type *T) const {
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ const SCEV *UB = SE->getBackedgeTakenCount(L);
+ return SE->getNoopOrZeroExtend(UB, T);
+ }
+ return NULL;
+}
+
+
+// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
+// If the cast fails, returns NULL.
+const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L,
+ Type *T
+ ) const {
+ if (const SCEV *UB = collectUpperBound(L, T))
+ return dyn_cast<SCEVConstant>(UB);
+ return NULL;
+}
+
+
+// testZIV -
+// When we have a pair of subscripts of the form [c1] and [c2],
+// where c1 and c2 are both loop invariant, we attack it using
+// the ZIV test. Basically, we test by comparing the two values,
+// but there are actually three possible results:
+// 1) the values are equal, so there's a dependence
+// 2) the values are different, so there's no dependence
+// 3) the values might be equal, so we have to assume a dependence.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testZIV(const SCEV *Src,
+ const SCEV *Dst,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ ++ZIVapplications;
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
+ DEBUG(dbgs() << " provably dependent\n");
+ return false; // provably dependent
+ }
+ if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
+ DEBUG(dbgs() << " provably independent\n");
+ ++ZIVindependence;
+ return true; // provably independent
+ }
+ DEBUG(dbgs() << " possibly dependent\n");
+ Result.Consistent = false;
+ return false; // possibly dependent
+}
+
+
+// strongSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.1
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the Strong SIV test.
+//
+// Can prove independence. Failing that, can compute distance (and direction).
+// In the presence of symbolic terms, we can sometimes make progress.
+//
+// If there's a dependence,
+//
+// c1 + a*i = c2 + a*i'
+//
+// The dependence distance is
+//
+// d = i' - i = (c1 - c2)/a
+//
+// A dependence only exists if d is an integer and abs(d) <= U, where U is the
+// loop's upper bound. If a dependence exists, the dependence direction is
+// defined as
+//
+// { < if d > 0
+// direction = { = if d = 0
+// { > if d < 0
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ DEBUG(dbgs() << "\tStrong SIV test\n");
+ DEBUG(dbgs() << "\t Coeff = " << *Coeff);
+ DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
+ DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst);
+ DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
+ ++StrongSIVapplications;
+ assert(0 < Level && Level <= CommonLevels && "level out of range");
+ Level--;
+
+ const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta);
+ DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
+
+ // check that |Delta| < iteration count
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
+ DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
+ const SCEV *AbsDelta =
+ SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
+ const SCEV *AbsCoeff =
+ SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
+ const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
+ if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
+ // Distance greater than trip count - no dependence
+ ++StrongSIVindependence;
+ ++StrongSIVsuccesses;
+ return true;
+ }
+ }
+
+ // Can we compute distance?
+ if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
+ APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
+ APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
+ APInt Distance = ConstDelta; // these need to be initialized
+ APInt Remainder = ConstDelta;
+ APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
+ DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
+ DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ // Make sure Coeff divides Delta exactly
+ if (Remainder != 0) {
+ // Coeff doesn't divide Distance, no dependence
+ ++StrongSIVindependence;
+ ++StrongSIVsuccesses;
+ return true;
+ }
+ Result.DV[Level].Distance = SE->getConstant(Distance);
+ NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+ if (Distance.sgt(0))
+ Result.DV[Level].Direction &= Dependence::DVEntry::LT;
+ else if (Distance.slt(0))
+ Result.DV[Level].Direction &= Dependence::DVEntry::GT;
+ else
+ Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+ ++StrongSIVsuccesses;
+ }
+ else if (Delta->isZero()) {
+ // since 0/X == 0
+ Result.DV[Level].Distance = Delta;
+ NewConstraint.setDistance(Delta, CurLoop);
+ Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+ ++StrongSIVsuccesses;
+ }
+ else {
+ if (Coeff->isOne()) {
+ DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
+ Result.DV[Level].Distance = Delta; // since X/1 == X
+ NewConstraint.setDistance(Delta, CurLoop);
+ }
+ else {
+ Result.Consistent = false;
+ NewConstraint.setLine(Coeff,
+ SE->getNegativeSCEV(Coeff),
+ SE->getNegativeSCEV(Delta), CurLoop);
+ }
+
+ // maybe we can get a useful direction
+ bool DeltaMaybeZero = !SE->isKnownNonZero(Delta);
+ bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
+ bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
+ bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
+ bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
+ // The double negatives above are confusing.
+ // It helps to read !SE->isKnownNonZero(Delta)
+ // as "Delta might be Zero"
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+ if ((DeltaMaybePositive && CoeffMaybePositive) ||
+ (DeltaMaybeNegative && CoeffMaybeNegative))
+ NewDirection = Dependence::DVEntry::LT;
+ if (DeltaMaybeZero)
+ NewDirection |= Dependence::DVEntry::EQ;
+ if ((DeltaMaybeNegative && CoeffMaybePositive) ||
+ (DeltaMaybePositive && CoeffMaybeNegative))
+ NewDirection |= Dependence::DVEntry::GT;
+ if (NewDirection < Result.DV[Level].Direction)
+ ++StrongSIVsuccesses;
+ Result.DV[Level].Direction &= NewDirection;
+ }
+ return false;
+}
+
+
+// weakCrossingSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Crossing SIV test.
+//
+// Given c1 + a*i = c2 - a*i', we can look for the intersection of
+// the two lines, where i = i', yielding
+//
+// c1 + a*i = c2 - a*i
+// 2a*i = c2 - c1
+// i = (c2 - c1)/2a
+//
+// If i < 0, there is no dependence.
+// If i > upperbound, there is no dependence.
+// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
+// If i = upperbound, there's a dependence with distance = 0.
+// If i is integral, there's a dependence (all directions).
+// If the non-integer part = 1/2, there's a dependence (<> directions).
+// Otherwise, there's no dependence.
+//
+// Can prove independence. Failing that,
+// can sometimes refine the directions.
+// Can determine iteration for splitting.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint,
+ const SCEV *&SplitIter) const {
+ DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
+ DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++WeakCrossingSIVapplications;
+ assert(0 < Level && Level <= CommonLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+ if (Delta->isZero()) {
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+ ++WeakCrossingSIVsuccesses;
+ if (!Result.DV[Level].Direction) {
+ ++WeakCrossingSIVindependence;
+ return true;
+ }
+ Result.DV[Level].Distance = Delta; // = 0
+ return false;
+ }
+ const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
+ if (!ConstCoeff)
+ return false;
+
+ Result.DV[Level].Splitable = true;
+ if (SE->isKnownNegative(ConstCoeff)) {
+ ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
+ assert(ConstCoeff &&
+ "dynamic cast of negative of ConstCoeff should yield constant");
+ Delta = SE->getNegativeSCEV(Delta);
+ }
+ assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
+
+ // compute SplitIter for use by DependenceAnalysis::getSplitIteration()
+ SplitIter =
+ SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
+ Delta),
+ SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
+ ConstCoeff));
+ DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
+
+ const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+ if (!ConstDelta)
+ return false;
+
+ // We're certain that ConstCoeff > 0; therefore,
+ // if Delta < 0, then no dependence.
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
+ if (SE->isKnownNegative(Delta)) {
+ // No dependence, Delta < 0
+ ++WeakCrossingSIVindependence;
+ ++WeakCrossingSIVsuccesses;
+ return true;
+ }
+
+ // We're certain that Delta > 0 and ConstCoeff > 0.
+ // Check Delta/(2*ConstCoeff) against upper loop bound
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
+ const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
+ ConstantTwo);
+ DEBUG(dbgs() << "\t ML = " << *ML << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
+ // Delta too big, no dependence
+ ++WeakCrossingSIVindependence;
+ ++WeakCrossingSIVsuccesses;
+ return true;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
+ // i = i' = UB
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+ ++WeakCrossingSIVsuccesses;
+ if (!Result.DV[Level].Direction) {
+ ++WeakCrossingSIVindependence;
+ return true;
+ }
+ Result.DV[Level].Splitable = false;
+ Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
+ return false;
+ }
+ }
+
+ // check that Coeff divides Delta
+ APInt APDelta = ConstDelta->getValue()->getValue();
+ APInt APCoeff = ConstCoeff->getValue()->getValue();
+ APInt Distance = APDelta; // these need to be initialzed
+ APInt Remainder = APDelta;
+ APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
+ DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ if (Remainder != 0) {
+ // Coeff doesn't divide Delta, no dependence
+ ++WeakCrossingSIVindependence;
+ ++WeakCrossingSIVsuccesses;
+ return true;
+ }
+ DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
+
+ // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
+ APInt Two = APInt(Distance.getBitWidth(), 2, true);
+ Remainder = Distance.srem(Two);
+ DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
+ if (Remainder != 0) {
+ // Equal direction isn't possible
+ Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
+ ++WeakCrossingSIVsuccesses;
+ }
+ return false;
+}
+
+
+// Kirch's algorithm, from
+//
+// Optimizing Supercompilers for Supercomputers
+// Michael Wolfe
+// MIT Press, 1989
+//
+// Program 2.1, page 29.
+// Computes the GCD of AM and BM.
+// Also finds a solution to the equation ax - by = gdc(a, b).
+// Returns true iff the gcd divides Delta.
+static
+bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta,
+ APInt &G, APInt &X, APInt &Y) {
+ APInt A0(Bits, 1, true), A1(Bits, 0, true);
+ APInt B0(Bits, 0, true), B1(Bits, 1, true);
+ APInt G0 = AM.abs();
+ APInt G1 = BM.abs();
+ APInt Q = G0; // these need to be initialized
+ APInt R = G0;
+ APInt::sdivrem(G0, G1, Q, R);
+ while (R != 0) {
+ APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
+ APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
+ G0 = G1; G1 = R;
+ APInt::sdivrem(G0, G1, Q, R);
+ }
+ G = G1;
+ DEBUG(dbgs() << "\t GCD = " << G << "\n");
+ X = AM.slt(0) ? -A1 : A1;
+ Y = BM.slt(0) ? B1 : -B1;
+
+ // make sure gcd divides Delta
+ R = Delta.srem(G);
+ if (R != 0)
+ return true; // gcd doesn't divide Delta, no dependence
+ Q = Delta.sdiv(G);
+ X *= Q;
+ Y *= Q;
+ return false;
+}
+
+
+static
+APInt floorOfQuotient(APInt A, APInt B) {
+ APInt Q = A; // these need to be initialized
+ APInt R = A;
+ APInt::sdivrem(A, B, Q, R);
+ if (R == 0)
+ return Q;
+ if ((A.sgt(0) && B.sgt(0)) ||
+ (A.slt(0) && B.slt(0)))
+ return Q;
+ else
+ return Q - 1;
+}
+
+
+static
+APInt ceilingOfQuotient(APInt A, APInt B) {
+ APInt Q = A; // these need to be initialized
+ APInt R = A;
+ APInt::sdivrem(A, B, Q, R);
+ if (R == 0)
+ return Q;
+ if ((A.sgt(0) && B.sgt(0)) ||
+ (A.slt(0) && B.slt(0)))
+ return Q + 1;
+ else
+ return Q;
+}
+
+
+static
+APInt maxAPInt(APInt A, APInt B) {
+ return A.sgt(B) ? A : B;
+}
+
+
+static
+APInt minAPInt(APInt A, APInt B) {
+ return A.slt(B) ? A : B;
+}
+
+
+// exactSIVtest -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
+// where i is an induction variable, c1 and c2 are loop invariant, and a1
+// and a2 are constant, we can solve it exactly using an algorithm developed
+// by Banerjee and Wolfe. See Section 2.5.3 in
+//
+// Optimizing Supercompilers for Supercomputers
+// Michael Wolfe
+// MIT Press, 1989
+//
+// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
+// so use them if possible. They're also a bit better with symbolics and,
+// in the case of the strong SIV test, can compute Distances.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
+ const SCEV *DstCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ DEBUG(dbgs() << "\tExact SIV test\n");
+ DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
+ DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++ExactSIVapplications;
+ assert(0 < Level && Level <= CommonLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
+ Delta, CurLoop);
+ const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+ const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+ const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+ if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+ return false;
+
+ // find gcd
+ APInt G, X, Y;
+ APInt AM = ConstSrcCoeff->getValue()->getValue();
+ APInt BM = ConstDstCoeff->getValue()->getValue();
+ unsigned Bits = AM.getBitWidth();
+ if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ // gcd doesn't divide Delta, no dependence
+ ++ExactSIVindependence;
+ ++ExactSIVsuccesses;
+ return true;
+ }
+
+ DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
+
+ // since SCEV construction normalizes, LM = 0
+ APInt UM(Bits, 1, true);
+ bool UMvalid = false;
+ // UM is perhaps unavailable, let's check
+ if (const SCEVConstant *CUB =
+ collectConstantUpperBound(CurLoop, Delta->getType())) {
+ UM = CUB->getValue()->getValue();
+ DEBUG(dbgs() << "\t UM = " << UM << "\n");
+ UMvalid = true;
+ }
+
+ APInt TU(APInt::getSignedMaxValue(Bits));
+ APInt TL(APInt::getSignedMinValue(Bits));
+
+ // test(BM/G, LM-X) and test(-BM/G, X-UM)
+ APInt TMUL = BM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (UMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (UMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+
+ // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+ TMUL = AM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (UMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (UMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+ if (TL.sgt(TU)) {
+ ++ExactSIVindependence;
+ ++ExactSIVsuccesses;
+ return true;
+ }
+
+ // explore directions
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+
+ // less than
+ APInt SaveTU(TU); // save these
+ APInt SaveTL(TL);
+ DEBUG(dbgs() << "\t exploring LT direction\n");
+ TMUL = AM - BM;
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ if (TL.sle(TU)) {
+ NewDirection |= Dependence::DVEntry::LT;
+ ++ExactSIVsuccesses;
+ }
+
+ // equal
+ TU = SaveTU; // restore
+ TL = SaveTL;
+ DEBUG(dbgs() << "\t exploring EQ direction\n");
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ TMUL = BM - AM;
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ if (TL.sle(TU)) {
+ NewDirection |= Dependence::DVEntry::EQ;
+ ++ExactSIVsuccesses;
+ }
+
+ // greater than
+ TU = SaveTU; // restore
+ TL = SaveTL;
+ DEBUG(dbgs() << "\t exploring GT direction\n");
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TL = " << TL << "\n");
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
+ DEBUG(dbgs() << "\t\t TU = " << TU << "\n");
+ }
+ if (TL.sle(TU)) {
+ NewDirection |= Dependence::DVEntry::GT;
+ ++ExactSIVsuccesses;
+ }
+
+ // finished
+ Result.DV[Level].Direction &= NewDirection;
+ if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
+ ++ExactSIVindependence;
+ return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
+}
+
+
+
+// Return true if the divisor evenly divides the dividend.
+static
+bool isRemainderZero(const SCEVConstant *Dividend,
+ const SCEVConstant *Divisor) {
+ APInt ConstDividend = Dividend->getValue()->getValue();
+ APInt ConstDivisor = Divisor->getValue()->getValue();
+ return ConstDividend.srem(ConstDivisor) == 0;
+}
+
+
+// weakZeroSrcSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+// c1 = c2 + a*i
+//
+// we get
+//
+// (c1 - c2)/a = i
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroDstSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ // For the WeakSIV test, it's possible the loop isn't common to
+ // the Src and Dst loops. If it isn't, then there's no need to
+ // record a direction.
+ DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
+ DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++WeakZeroSIVapplications;
+ assert(0 < Level && Level <= MaxLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+ NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
+ DstCoeff, Delta, CurLoop);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+ Result.DV[Level].PeelFirst = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false; // dependences caused by first iteration
+ }
+ const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+ if (!ConstCoeff)
+ return false;
+ const SCEV *AbsCoeff =
+ SE->isKnownNegative(ConstCoeff) ?
+ SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+ const SCEV *NewDelta =
+ SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+ // check that Delta/SrcCoeff < iteration count
+ // really check NewDelta < count*AbsCoeff
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+ if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+ // dependences caused by last iteration
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+ Result.DV[Level].PeelLast = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false;
+ }
+ }
+
+ // check that Delta/SrcCoeff >= 0
+ // really check that NewDelta >= 0
+ if (SE->isKnownNegative(NewDelta)) {
+ // No dependence, newDelta < 0
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+
+ // if SrcCoeff doesn't divide Delta, then no dependence
+ if (isa<SCEVConstant>(Delta) &&
+ !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ return false;
+}
+
+
+// weakZeroDstSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+// c1 + a*i = c2
+//
+// we get
+//
+// i = (c2 - c1)/a
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroSrcSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *CurLoop,
+ unsigned Level,
+ FullDependence &Result,
+ Constraint &NewConstraint) const {
+ // For the WeakSIV test, it's possible the loop isn't common to the
+ // Src and Dst loops. If it isn't, then there's no need to record a direction.
+ DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
+ DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++WeakZeroSIVapplications;
+ assert(0 < Level && Level <= SrcLevels && "Level out of range");
+ Level--;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
+ Delta, CurLoop);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+ Result.DV[Level].PeelFirst = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false; // dependences caused by first iteration
+ }
+ const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+ if (!ConstCoeff)
+ return false;
+ const SCEV *AbsCoeff =
+ SE->isKnownNegative(ConstCoeff) ?
+ SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+ const SCEV *NewDelta =
+ SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+ // check that Delta/SrcCoeff < iteration count
+ // really check NewDelta < count*AbsCoeff
+ if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+ DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
+ const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+ if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+ // dependences caused by last iteration
+ if (Level < CommonLevels) {
+ Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+ Result.DV[Level].PeelLast = true;
+ ++WeakZeroSIVsuccesses;
+ }
+ return false;
+ }
+ }
+
+ // check that Delta/SrcCoeff >= 0
+ // really check that NewDelta >= 0
+ if (SE->isKnownNegative(NewDelta)) {
+ // No dependence, newDelta < 0
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+
+ // if SrcCoeff doesn't divide Delta, then no dependence
+ if (isa<SCEVConstant>(Delta) &&
+ !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+ ++WeakZeroSIVindependence;
+ ++WeakZeroSIVsuccesses;
+ return true;
+ }
+ return false;
+}
+
+
+// exactRDIVtest - Tests the RDIV subscript pair for dependence.
+// Things of the form [c1 + a*i] and [c2 + b*j],
+// where i and j are induction variable, c1 and c2 are loop invariant,
+// and a and b are constants.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistant.
+// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
+bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
+ const SCEV *DstCoeff,
+ const SCEV *SrcConst,
+ const SCEV *DstConst,
+ const Loop *SrcLoop,
+ const Loop *DstLoop,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << "\tExact RDIV test\n");
+ DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
+ DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
+ DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
+ DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
+ ++ExactRDIVapplications;
+ Result.Consistent = false;
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
+ const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+ const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+ const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+ if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+ return false;
+
+ // find gcd
+ APInt G, X, Y;
+ APInt AM = ConstSrcCoeff->getValue()->getValue();
+ APInt BM = ConstDstCoeff->getValue()->getValue();
+ unsigned Bits = AM.getBitWidth();
+ if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ // gcd doesn't divide Delta, no dependence
+ ++ExactRDIVindependence;
+ return true;
+ }
+
+ DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
+
+ // since SCEV construction seems to normalize, LM = 0
+ APInt SrcUM(Bits, 1, true);
+ bool SrcUMvalid = false;
+ // SrcUM is perhaps unavailable, let's check
+ if (const SCEVConstant *UpperBound =
+ collectConstantUpperBound(SrcLoop, Delta->getType())) {
+ SrcUM = UpperBound->getValue()->getValue();
+ DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
+ SrcUMvalid = true;
+ }
+
+ APInt DstUM(Bits, 1, true);
+ bool DstUMvalid = false;
+ // UM is perhaps unavailable, let's check
+ if (const SCEVConstant *UpperBound =
+ collectConstantUpperBound(DstLoop, Delta->getType())) {
+ DstUM = UpperBound->getValue()->getValue();
+ DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
+ DstUMvalid = true;
+ }
+
+ APInt TU(APInt::getSignedMaxValue(Bits));
+ APInt TL(APInt::getSignedMinValue(Bits));
+
+ // test(BM/G, LM-X) and test(-BM/G, X-UM)
+ APInt TMUL = BM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (SrcUMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (SrcUMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+
+ // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+ TMUL = AM.sdiv(G);
+ if (TMUL.sgt(0)) {
+ TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ if (DstUMvalid) {
+ TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ }
+ }
+ else {
+ TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+ DEBUG(dbgs() << "\t TU = " << TU << "\n");
+ if (DstUMvalid) {
+ TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
+ DEBUG(dbgs() << "\t TL = " << TL << "\n");
+ }
+ }
+ if (TL.sgt(TU))
+ ++ExactRDIVindependence;
+ return TL.sgt(TU);
+}
+
+
+// symbolicRDIVtest -
+// In Section 4.5 of the Practical Dependence Testing paper,the authors
+// introduce a special case of Banerjee's Inequalities (also called the
+// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
+// particularly cases with symbolics. Since it's only able to disprove
+// dependence (not compute distances or directions), we'll use it as a
+// fall back for the other tests.
+//
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables and c1 and c2 are loop invariants,
+// we can use the symbolic tests to disprove some dependences, serving as a
+// backup for the RDIV test. Note that i and j can be the same variable,
+// letting this test serve as a backup for the various SIV tests.
+//
+// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
+// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
+// loop bounds for the i and j loops, respectively. So, ...
+//
+// c1 + a1*i = c2 + a2*j
+// a1*i - a2*j = c2 - c1
+//
+// To test for a dependence, we compute c2 - c1 and make sure it's in the
+// range of the maximum and minimum possible values of a1*i - a2*j.
+// Considering the signs of a1 and a2, we have 4 possible cases:
+//
+// 1) If a1 >= 0 and a2 >= 0, then
+// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
+// -a2*N2 <= c2 - c1 <= a1*N1
+//
+// 2) If a1 >= 0 and a2 <= 0, then
+// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
+// 0 <= c2 - c1 <= a1*N1 - a2*N2
+//
+// 3) If a1 <= 0 and a2 >= 0, then
+// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
+// a1*N1 - a2*N2 <= c2 - c1 <= 0
+//
+// 4) If a1 <= 0 and a2 <= 0, then
+// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
+// a1*N1 <= c2 - c1 <= -a2*N2
+//
+// return true if dependence disproved
+bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1,
+ const SCEV *A2,
+ const SCEV *C1,
+ const SCEV *C2,
+ const Loop *Loop1,
+ const Loop *Loop2) const {
+ ++SymbolicRDIVapplications;
+ DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
+ DEBUG(dbgs() << "\t A1 = " << *A1);
+ DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
+ DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
+ DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
+ DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
+ const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
+ const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
+ DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
+ DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
+ const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
+ const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
+ DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
+ DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
+ if (SE->isKnownNonNegative(A1)) {
+ if (SE->isKnownNonNegative(A2)) {
+ // A1 >= 0 && A2 >= 0
+ if (N1) {
+ // make sure that c2 - c1 <= a1*N1
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ if (N2) {
+ // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ }
+ else if (SE->isKnownNonPositive(A2)) {
+ // a1 >= 0 && a2 <= 0
+ if (N1 && N2) {
+ // make sure that c2 - c1 <= a1*N1 - a2*N2
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+ DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ // make sure that 0 <= c2 - c1
+ if (SE->isKnownNegative(C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ }
+ else if (SE->isKnownNonPositive(A1)) {
+ if (SE->isKnownNonNegative(A2)) {
+ // a1 <= 0 && a2 >= 0
+ if (N1 && N2) {
+ // make sure that a1*N1 - a2*N2 <= c2 - c1
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+ DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ // make sure that c2 - c1 <= 0
+ if (SE->isKnownPositive(C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ else if (SE->isKnownNonPositive(A2)) {
+ // a1 <= 0 && a2 <= 0
+ if (N1) {
+ // make sure that a1*N1 <= c2 - c1
+ const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+ DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ if (N2) {
+ // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
+ const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+ DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
+ if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
+ ++SymbolicRDIVindependence;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+
+// testSIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
+// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
+// a2 are constant, we attack it with an SIV test. While they can all be
+// solved with the Exact SIV test, it's worthwhile to use simpler tests when
+// they apply; they're cheaper and sometimes more precise.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testSIV(const SCEV *Src,
+ const SCEV *Dst,
+ unsigned &Level,
+ FullDependence &Result,
+ Constraint &NewConstraint,
+ const SCEV *&SplitIter) const {
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+ const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+ if (SrcAddRec && DstAddRec) {
+ const SCEV *SrcConst = SrcAddRec->getStart();
+ const SCEV *DstConst = DstAddRec->getStart();
+ const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+ const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+ const Loop *CurLoop = SrcAddRec->getLoop();
+ assert(CurLoop == DstAddRec->getLoop() &&
+ "both loops in SIV should be same");
+ Level = mapSrcLoop(CurLoop);
+ bool disproven;
+ if (SrcCoeff == DstCoeff)
+ disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint);
+ else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
+ disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint, SplitIter);
+ else
+ disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint);
+ return disproven ||
+ gcdMIVtest(Src, Dst, Result) ||
+ symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
+ }
+ if (SrcAddRec) {
+ const SCEV *SrcConst = SrcAddRec->getStart();
+ const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+ const SCEV *DstConst = Dst;
+ const Loop *CurLoop = SrcAddRec->getLoop();
+ Level = mapSrcLoop(CurLoop);
+ return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+ Level, Result, NewConstraint) ||
+ gcdMIVtest(Src, Dst, Result);
+ }
+ if (DstAddRec) {
+ const SCEV *DstConst = DstAddRec->getStart();
+ const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+ const SCEV *SrcConst = Src;
+ const Loop *CurLoop = DstAddRec->getLoop();
+ Level = mapDstLoop(CurLoop);
+ return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
+ CurLoop, Level, Result, NewConstraint) ||
+ gcdMIVtest(Src, Dst, Result);
+ }
+ llvm_unreachable("SIV test expected at least one AddRec");
+ return false;
+}
+
+
+// testRDIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables, c1 and c2 are loop invariant,
+// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
+// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
+// It doesn't make sense to talk about distance or direction in this case,
+// so there's no point in making special versions of the Strong SIV test or
+// the Weak-crossing SIV test.
+//
+// With minor algebra, this test can also be used for things like
+// [c1 + a1*i + a2*j][c2].
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testRDIV(const SCEV *Src,
+ const SCEV *Dst,
+ FullDependence &Result) const {
+ // we have 3 possible situations here:
+ // 1) [a*i + b] and [c*j + d]
+ // 2) [a*i + c*j + b] and [d]
+ // 3) [b] and [a*i + c*j + d]
+ // We need to find what we've got and get organized
+
+ const SCEV *SrcConst, *DstConst;
+ const SCEV *SrcCoeff, *DstCoeff;
+ const Loop *SrcLoop, *DstLoop;
+
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+ const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+ if (SrcAddRec && DstAddRec) {
+ SrcConst = SrcAddRec->getStart();
+ SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+ SrcLoop = SrcAddRec->getLoop();
+ DstConst = DstAddRec->getStart();
+ DstCoeff = DstAddRec->getStepRecurrence(*SE);
+ DstLoop = DstAddRec->getLoop();
+ }
+ else if (SrcAddRec) {
+ if (const SCEVAddRecExpr *tmpAddRec =
+ dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
+ SrcConst = tmpAddRec->getStart();
+ SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
+ SrcLoop = tmpAddRec->getLoop();
+ DstConst = Dst;
+ DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
+ DstLoop = SrcAddRec->getLoop();
+ }
+ else
+ llvm_unreachable("RDIV reached by surprising SCEVs");
+ }
+ else if (DstAddRec) {
+ if (const SCEVAddRecExpr *tmpAddRec =
+ dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
+ DstConst = tmpAddRec->getStart();
+ DstCoeff = tmpAddRec->getStepRecurrence(*SE);
+ DstLoop = tmpAddRec->getLoop();
+ SrcConst = Src;
+ SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
+ SrcLoop = DstAddRec->getLoop();
+ }
+ else
+ llvm_unreachable("RDIV reached by surprising SCEVs");
+ }
+ else
+ llvm_unreachable("RDIV expected at least one AddRec");
+ return exactRDIVtest(SrcCoeff, DstCoeff,
+ SrcConst, DstConst,
+ SrcLoop, DstLoop,
+ Result) ||
+ gcdMIVtest(Src, Dst, Result) ||
+ symbolicRDIVtest(SrcCoeff, DstCoeff,
+ SrcConst, DstConst,
+ SrcLoop, DstLoop);
+}
+
+
+// Tests the single-subscript MIV pair (Src and Dst) for dependence.
+// Return true if dependence disproved.
+// Can sometimes refine direction vectors.
+bool DependenceAnalysis::testMIV(const SCEV *Src,
+ const SCEV *Dst,
+ const SmallBitVector &Loops,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << " src = " << *Src << "\n");
+ DEBUG(dbgs() << " dst = " << *Dst << "\n");
+ Result.Consistent = false;
+ return gcdMIVtest(Src, Dst, Result) ||
+ banerjeeMIVtest(Src, Dst, Loops, Result);
+}
+
+
+// Given a product, e.g., 10*X*Y, returns the first constant operand,
+// in this case 10. If there is no constant part, returns NULL.
+static
+const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
+ for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) {
+ if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
+ return Constant;
+ }
+ return NULL;
+}
+
+
+//===----------------------------------------------------------------------===//
+// gcdMIVtest -
+// Tests an MIV subscript pair for dependence.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistant.
+// Can sometimes disprove the equal direction for 1 or more loops,
+// as discussed in Michael Wolfe's book,
+// High Performance Compilers for Parallel Computing, page 235.
+//
+// We spend some effort (code!) to handle cases like
+// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
+// but M and N are just loop-invariant variables.
+// This should help us handle linearized subscripts;
+// also makes this test a useful backup to the various SIV tests.
+//
+// It occurs to me that the presence of loop-invariant variables
+// changes the nature of the test from "greatest common divisor"
+// to "a common divisor!"
+bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
+ const SCEV *Dst,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << "starting gcd\n");
+ ++GCDapplications;
+ unsigned BitWidth = Src->getType()->getIntegerBitWidth();
+ APInt RunningGCD = APInt::getNullValue(BitWidth);
+
+ // Examine Src coefficients.
+ // Compute running GCD and record source constant.
+ // Because we're looking for the constant at the end of the chain,
+ // we can't quit the loop just because the GCD == 1.
+ const SCEV *Coefficients = Src;
+ while (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ if (!Constant)
+ return false;
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ Coefficients = AddRec->getStart();
+ }
+ const SCEV *SrcConst = Coefficients;
+
+ // Examine Dst coefficients.
+ // Compute running GCD and record destination constant.
+ // Because we're looking for the constant at the end of the chain,
+ // we can't quit the loop just because the GCD == 1.
+ Coefficients = Dst;
+ while (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ if (!Constant)
+ return false;
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ Coefficients = AddRec->getStart();
+ }
+ const SCEV *DstConst = Coefficients;
+
+ APInt ExtraGCD = APInt::getNullValue(BitWidth);
+ const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+ DEBUG(dbgs() << " Delta = " << *Delta << "\n");
+ const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
+ if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
+ // If Delta is a sum of products, we may be able to make further progress.
+ for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
+ const SCEV *Operand = Sum->getOperand(Op);
+ if (isa<SCEVConstant>(Operand)) {
+ assert(!Constant && "Surprised to find multiple constants");
+ Constant = cast<SCEVConstant>(Operand);
+ }
+ else if (isa<SCEVMulExpr>(Operand)) {
+ // Search for constant operand to participate in GCD;
+ // If none found; return false.
+ const SCEVConstant *ConstOp =
+ getConstantPart(cast<SCEVMulExpr>(Operand));
+ APInt ConstOpValue = ConstOp->getValue()->getValue();
+ ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
+ ConstOpValue.abs());
+ }
+ else
+ return false;
+ }
+ }
+ if (!Constant)
+ return false;
+ APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
+ DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
+ if (ConstDelta == 0)
+ return false;
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
+ DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
+ APInt Remainder = ConstDelta.srem(RunningGCD);
+ if (Remainder != 0) {
+ ++GCDindependence;
+ return true;
+ }
+
+ // Try to disprove equal directions.
+ // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
+ // the code above can't disprove the dependence because the GCD = 1.
+ // So we consider what happen if i = i' and what happens if j = j'.
+ // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
+ // which is infeasible, so we can disallow the = direction for the i level.
+ // Setting j = j' doesn't help matters, so we end up with a direction vector
+ // of [<>, *]
+ //
+ // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
+ // we need to remember that the constant part is 5 and the RunningGCD should
+ // be initialized to ExtraGCD = 30.
+ DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
+
+ bool Improved = false;
+ Coefficients = Src;
+ while (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+ Coefficients = AddRec->getStart();
+ const Loop *CurLoop = AddRec->getLoop();
+ RunningGCD = ExtraGCD;
+ const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
+ const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
+ const SCEV *Inner = Src;
+ while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+ AddRec = cast<SCEVAddRecExpr>(Inner);
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ if (CurLoop == AddRec->getLoop())
+ ; // SrcCoeff == Coeff
+ else {
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ else
+ Constant = cast<SCEVConstant>(Coeff);
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ }
+ Inner = AddRec->getStart();
+ }
+ Inner = Dst;
+ while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+ AddRec = cast<SCEVAddRecExpr>(Inner);
+ const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+ if (CurLoop == AddRec->getLoop())
+ DstCoeff = Coeff;
+ else {
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ else
+ Constant = cast<SCEVConstant>(Coeff);
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ }
+ Inner = AddRec->getStart();
+ }
+ Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
+ if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta))
+ // If the coefficient is the product of a constant and other stuff,
+ // we can use the constant in the GCD computation.
+ Constant = getConstantPart(Product);
+ else if (isa<SCEVConstant>(Delta))
+ Constant = cast<SCEVConstant>(Delta);
+ else {
+ // The difference of the two coefficients might not be a product
+ // or constant, in which case we give up on this direction.
+ continue;
+ }
+ APInt ConstCoeff = Constant->getValue()->getValue();
+ RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+ DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
+ if (RunningGCD != 0) {
+ Remainder = ConstDelta.srem(RunningGCD);
+ DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
+ if (Remainder != 0) {
+ unsigned Level = mapSrcLoop(CurLoop);
+ Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
+ Improved = true;
+ }
+ }
+ }
+ if (Improved)
+ ++GCDsuccesses;
+ DEBUG(dbgs() << "all done\n");
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// banerjeeMIVtest -
+// Use Banerjee's Inequalities to test an MIV subscript pair.
+// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
+// Generally follows the discussion in Section 2.5.2 of
+//
+// Optimizing Supercompilers for Supercomputers
+// Michael Wolfe
+//
+// The inequalities given on page 25 are simplified in that loops are
+// normalized so that the lower bound is always 0 and the stride is always 1.
+// For example, Wolfe gives
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// where A_k is the coefficient of the kth index in the source subscript,
+// B_k is the coefficient of the kth index in the destination subscript,
+// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
+// index, and N_k is the stride of the kth index. Since all loops are normalized
+// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
+// equation to
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
+// = (A^-_k - B_k)^- (U_k - 1) - B_k
+//
+// Similar simplifications are possible for the other equations.
+//
+// When we can't determine the number of iterations for a loop,
+// we use NULL as an indicator for the worst case, infinity.
+// When computing the upper bound, NULL denotes +inf;
+// for the lower bound, NULL denotes -inf.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src,
+ const SCEV *Dst,
+ const SmallBitVector &Loops,
+ FullDependence &Result) const {
+ DEBUG(dbgs() << "starting Banerjee\n");
+ ++BanerjeeApplications;
+ DEBUG(dbgs() << " Src = " << *Src << '\n');
+ const SCEV *A0;
+ CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
+ DEBUG(dbgs() << " Dst = " << *Dst << '\n');
+ const SCEV *B0;
+ CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
+ BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
+ const SCEV *Delta = SE->getMinusSCEV(B0, A0);
+ DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
+
+ // Compute bounds for all the * directions.
+ DEBUG(dbgs() << "\tBounds[*]\n");
+ for (unsigned K = 1; K <= MaxLevels; ++K) {
+ Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
+ Bound[K].Direction = Dependence::DVEntry::ALL;
+ Bound[K].DirSet = Dependence::DVEntry::NONE;
+ findBoundsALL(A, B, Bound, K);
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\t " << K << '\t');
+ if (Bound[K].Lower[Dependence::DVEntry::ALL])
+ DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[K].Upper[Dependence::DVEntry::ALL])
+ DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+#endif
+ }
+
+ // Test the *, *, *, ... case.
+ bool Disproved = false;
+ if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
+ // Explore the direction vector hierarchy.
+ unsigned DepthExpanded = 0;
+ unsigned NewDeps = exploreDirections(1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+ if (NewDeps > 0) {
+ bool Improved = false;
+ for (unsigned K = 1; K <= CommonLevels; ++K) {
+ if (Loops[K]) {
+ unsigned Old = Result.DV[K - 1].Direction;
+ Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
+ Improved |= Old != Result.DV[K - 1].Direction;
+ if (!Result.DV[K - 1].Direction) {
+ Improved = false;
+ Disproved = true;
+ break;
+ }
+ }
+ }
+ if (Improved)
+ ++BanerjeeSuccesses;
+ }
+ else {
+ ++BanerjeeIndependence;
+ Disproved = true;
+ }
+ }
+ else {
+ ++BanerjeeIndependence;
+ Disproved = true;
+ }
+ delete [] Bound;
+ delete [] A;
+ delete [] B;
+ return Disproved;
+}
+
+
+// Hierarchically expands the direction vector
+// search space, combining the directions of discovered dependences
+// in the DirSet field of Bound. Returns the number of distinct
+// dependences discovered. If the dependence is disproved,
+// it will return 0.
+unsigned DependenceAnalysis::exploreDirections(unsigned Level,
+ CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ const SmallBitVector &Loops,
+ unsigned &DepthExpanded,
+ const SCEV *Delta) const {
+ if (Level > CommonLevels) {
+ // record result
+ DEBUG(dbgs() << "\t[");
+ for (unsigned K = 1; K <= CommonLevels; ++K) {
+ if (Loops[K]) {
+ Bound[K].DirSet |= Bound[K].Direction;
+#ifndef NDEBUG
+ switch (Bound[K].Direction) {
+ case Dependence::DVEntry::LT:
+ DEBUG(dbgs() << " <");
+ break;
+ case Dependence::DVEntry::EQ:
+ DEBUG(dbgs() << " =");
+ break;
+ case Dependence::DVEntry::GT:
+ DEBUG(dbgs() << " >");
+ break;
+ case Dependence::DVEntry::ALL:
+ DEBUG(dbgs() << " *");
+ break;
+ default:
+ llvm_unreachable("unexpected Bound[K].Direction");
+ }
+#endif
+ }
+ }
+ DEBUG(dbgs() << " ]\n");
+ return 1;
+ }
+ if (Loops[Level]) {
+ if (Level > DepthExpanded) {
+ DepthExpanded = Level;
+ // compute bounds for <, =, > at current level
+ findBoundsLT(A, B, Bound, Level);
+ findBoundsGT(A, B, Bound, Level);
+ findBoundsEQ(A, B, Bound, Level);
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
+ DEBUG(dbgs() << "\t <\t");
+ if (Bound[Level].Lower[Dependence::DVEntry::LT])
+ DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[Level].Upper[Dependence::DVEntry::LT])
+ DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+ DEBUG(dbgs() << "\t =\t");
+ if (Bound[Level].Lower[Dependence::DVEntry::EQ])
+ DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[Level].Upper[Dependence::DVEntry::EQ])
+ DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+ DEBUG(dbgs() << "\t >\t");
+ if (Bound[Level].Lower[Dependence::DVEntry::GT])
+ DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
+ else
+ DEBUG(dbgs() << "-inf\t");
+ if (Bound[Level].Upper[Dependence::DVEntry::GT])
+ DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
+ else
+ DEBUG(dbgs() << "+inf\n");
+#endif
+ }
+
+ unsigned NewDeps = 0;
+
+ // test bounds for <, *, *, ...
+ if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
+ NewDeps += exploreDirections(Level + 1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+
+ // Test bounds for =, *, *, ...
+ if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
+ NewDeps += exploreDirections(Level + 1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+
+ // test bounds for >, *, *, ...
+ if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
+ NewDeps += exploreDirections(Level + 1, A, B, Bound,
+ Loops, DepthExpanded, Delta);
+
+ Bound[Level].Direction = Dependence::DVEntry::ALL;
+ return NewDeps;
+ }
+ else
+ return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
+}
+
+
+// Returns true iff the current bounds are plausible.
+bool DependenceAnalysis::testBounds(unsigned char DirKind,
+ unsigned Level,
+ BoundInfo *Bound,
+ const SCEV *Delta) const {
+ Bound[Level].Direction = DirKind;
+ if (const SCEV *LowerBound = getLowerBound(Bound))
+ if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
+ return false;
+ if (const SCEV *UpperBound = getUpperBound(Bound))
+ if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
+ return false;
+ return true;
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the * direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
+// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^*_k = (A^-_k - B^+_k)U_k
+// UB^*_k = (A^+_k - B^-_k)U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ Bound[K].Lower[Dependence::DVEntry::ALL] =
+ SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
+ Bound[K].Iterations);
+ Bound[K].Upper[Dependence::DVEntry::ALL] =
+ SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
+ Bound[K].Iterations);
+ }
+ else {
+ // If the difference is 0, we won't need to know the number of iterations.
+ if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
+ Bound[K].Lower[Dependence::DVEntry::ALL] =
+ SE->getConstant(A[K].Coeff->getType(), 0);
+ if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
+ Bound[K].Upper[Dependence::DVEntry::ALL] =
+ SE->getConstant(A[K].Coeff->getType(), 0);
+ }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the = direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
+// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^=_k = (A_k - B_k)^- U_k
+// UB^=_k = (A_k - B_k)^+ U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+ const SCEV *NegativePart = getNegativePart(Delta);
+ Bound[K].Lower[Dependence::DVEntry::EQ] =
+ SE->getMulExpr(NegativePart, Bound[K].Iterations);
+ const SCEV *PositivePart = getPositivePart(Delta);
+ Bound[K].Upper[Dependence::DVEntry::EQ] =
+ SE->getMulExpr(PositivePart, Bound[K].Iterations);
+ }
+ else {
+ // If the positive/negative part of the difference is 0,
+ // we won't need to know the number of iterations.
+ const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+ const SCEV *NegativePart = getNegativePart(Delta);
+ if (NegativePart->isZero())
+ Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
+ const SCEV *PositivePart = getPositivePart(Delta);
+ if (PositivePart->isZero())
+ Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
+ }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the < direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
+// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ const SCEV *Iter_1 =
+ SE->getMinusSCEV(Bound[K].Iterations,
+ SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *NegPart =
+ getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+ Bound[K].Lower[Dependence::DVEntry::LT] =
+ SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
+ const SCEV *PosPart =
+ getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+ Bound[K].Upper[Dependence::DVEntry::LT] =
+ SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
+ }
+ else {
+ // If the positive/negative part of the difference is 0,
+ // we won't need to know the number of iterations.
+ const SCEV *NegPart =
+ getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+ if (NegPart->isZero())
+ Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+ const SCEV *PosPart =
+ getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+ if (PosPart->isZero())
+ Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+ }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the > direction. Records them in Bound.
+// Wolfe gives the equations
+//
+// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
+// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
+ CoefficientInfo *B,
+ BoundInfo *Bound,
+ unsigned K) const {
+ Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity.
+ if (Bound[K].Iterations) {
+ const SCEV *Iter_1 =
+ SE->getMinusSCEV(Bound[K].Iterations,
+ SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *NegPart =
+ getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+ Bound[K].Lower[Dependence::DVEntry::GT] =
+ SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
+ const SCEV *PosPart =
+ getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+ Bound[K].Upper[Dependence::DVEntry::GT] =
+ SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
+ }
+ else {
+ // If the positive/negative part of the difference is 0,
+ // we won't need to know the number of iterations.
+ const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+ if (NegPart->isZero())
+ Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
+ const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+ if (PosPart->isZero())
+ Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
+ }
+}
+
+
+// X^+ = max(X, 0)
+const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
+ return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
+}
+
+
+// X^- = min(X, 0)
+const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
+ return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
+}
+
+
+// Walks through the subscript,
+// collecting each coefficient, the associated loop bounds,
+// and recording its positive and negative parts for later use.
+DependenceAnalysis::CoefficientInfo *
+DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
+ bool SrcFlag,
+ const SCEV *&Constant) const {
+ const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
+ CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
+ for (unsigned K = 1; K <= MaxLevels; ++K) {
+ CI[K].Coeff = Zero;
+ CI[K].PosPart = Zero;
+ CI[K].NegPart = Zero;
+ CI[K].Iterations = NULL;
+ }
+ while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
+ const Loop *L = AddRec->getLoop();
+ unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
+ CI[K].Coeff = AddRec->getStepRecurrence(*SE);
+ CI[K].PosPart = getPositivePart(CI[K].Coeff);
+ CI[K].NegPart = getNegativePart(CI[K].Coeff);
+ CI[K].Iterations = collectUpperBound(L, Subscript->getType());
+ Subscript = AddRec->getStart();
+ }
+ Constant = Subscript;
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\tCoefficient Info\n");
+ for (unsigned K = 1; K <= MaxLevels; ++K) {
+ DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
+ DEBUG(dbgs() << "\tPos Part = ");
+ DEBUG(dbgs() << *CI[K].PosPart);
+ DEBUG(dbgs() << "\tNeg Part = ");
+ DEBUG(dbgs() << *CI[K].NegPart);
+ DEBUG(dbgs() << "\tUpper Bound = ");
+ if (CI[K].Iterations)
+ DEBUG(dbgs() << *CI[K].Iterations);
+ else
+ DEBUG(dbgs() << "+inf");
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
+#endif
+ return CI;
+}
+
+
+// Looks through all the bounds info and
+// computes the lower bound given the current direction settings
+// at each level. If the lower bound for any level is -inf,
+// the result is -inf.
+const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const {
+ const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
+ for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+ if (Bound[K].Lower[Bound[K].Direction])
+ Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
+ else
+ Sum = NULL;
+ }
+ return Sum;
+}
+
+
+// Looks through all the bounds info and
+// computes the upper bound given the current direction settings
+// at each level. If the upper bound at any level is +inf,
+// the result is +inf.
+const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
+ const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
+ for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+ if (Bound[K].Upper[Bound[K].Direction])
+ Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
+ else
+ Sum = NULL;
+ }
+ return Sum;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constraint manipulation for Delta test.
+
+// Given a linear SCEV,
+// return the coefficient (the step)
+// corresponding to the specified loop.
+// If there isn't one, return 0.
+// For example, given a*i + b*j + c*k, zeroing the coefficient
+// corresponding to the j loop would yield b.
+const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
+ const Loop *TargetLoop) const {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+ if (!AddRec)
+ return SE->getConstant(Expr->getType(), 0);
+ if (AddRec->getLoop() == TargetLoop)
+ return AddRec->getStepRecurrence(*SE);
+ return findCoefficient(AddRec->getStart(), TargetLoop);
+}
+
+
+// Given a linear SCEV,
+// return the SCEV given by zeroing out the coefficient
+// corresponding to the specified loop.
+// For example, given a*i + b*j + c*k, zeroing the coefficient
+// corresponding to the j loop would yield a*i + c*k.
+const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr,
+ const Loop *TargetLoop) const {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+ if (!AddRec)
+ return Expr; // ignore
+ if (AddRec->getLoop() == TargetLoop)
+ return AddRec->getStart();
+ return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
+ AddRec->getStepRecurrence(*SE),
+ AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
+}
+
+
+// Given a linear SCEV Expr,
+// return the SCEV given by adding some Value to the
+// coefficient corresponding to the specified TargetLoop.
+// For example, given a*i + b*j + c*k, adding 1 to the coefficient
+// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
+const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
+ const Loop *TargetLoop,
+ const SCEV *Value) const {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+ if (!AddRec) // create a new addRec
+ return SE->getAddRecExpr(Expr,
+ Value,
+ TargetLoop,
+ SCEV::FlagAnyWrap); // Worst case, with no info.
+ if (AddRec->getLoop() == TargetLoop) {
+ const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
+ if (Sum->isZero())
+ return AddRec->getStart();
+ return SE->getAddRecExpr(AddRec->getStart(),
+ Sum,
+ AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
+ }
+ return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
+ TargetLoop, Value),
+ AddRec->getStepRecurrence(*SE),
+ AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
+}
+
+
+// Review the constraints, looking for opportunities
+// to simplify a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+// Corresponds to Figure 5 from the paper
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+bool DependenceAnalysis::propagate(const SCEV *&Src,
+ const SCEV *&Dst,
+ SmallBitVector &Loops,
+ SmallVector<Constraint, 4> &Constraints,
+ bool &Consistent) {
+ bool Result = false;
+ for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
+ DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
+ DEBUG(Constraints[LI].dump(dbgs()));
+ if (Constraints[LI].isDistance())
+ Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
+ else if (Constraints[LI].isLine())
+ Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
+ else if (Constraints[LI].isPoint())
+ Result |= propagatePoint(Src, Dst, Constraints[LI]);
+ }
+ return Result;
+}
+
+
+// Attempt to propagate a distance
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateDistance(const SCEV *&Src,
+ const SCEV *&Dst,
+ Constraint &CurConstraint,
+ bool &Consistent) {
+ const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ if (A_K->isZero())
+ return false;
+ const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
+ Src = SE->getMinusSCEV(Src, DA_K);
+ Src = zeroCoefficient(Src, CurLoop);
+ DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+ Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
+ DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ return true;
+}
+
+
+// Attempt to propagate a line
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateLine(const SCEV *&Src,
+ const SCEV *&Dst,
+ Constraint &CurConstraint,
+ bool &Consistent) {
+ const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ const SCEV *A = CurConstraint.getA();
+ const SCEV *B = CurConstraint.getB();
+ const SCEV *C = CurConstraint.getC();
+ DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
+ DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
+ if (A->isZero()) {
+ const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
+ const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+ if (!Bconst || !Cconst) return false;
+ APInt Beta = Bconst->getValue()->getValue();
+ APInt Charlie = Cconst->getValue()->getValue();
+ APInt CdivB = Charlie.sdiv(Beta);
+ assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
+ const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+ // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+ Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+ Dst = zeroCoefficient(Dst, CurLoop);
+ if (!findCoefficient(Src, CurLoop)->isZero())
+ Consistent = false;
+ }
+ else if (B->isZero()) {
+ const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+ const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+ if (!Aconst || !Cconst) return false;
+ APInt Alpha = Aconst->getValue()->getValue();
+ APInt Charlie = Cconst->getValue()->getValue();
+ APInt CdivA = Charlie.sdiv(Alpha);
+ assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+ Src = zeroCoefficient(Src, CurLoop);
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ }
+ else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
+ const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+ const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+ if (!Aconst || !Cconst) return false;
+ APInt Alpha = Aconst->getValue()->getValue();
+ APInt Charlie = Cconst->getValue()->getValue();
+ APInt CdivA = Charlie.sdiv(Alpha);
+ assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+ Src = zeroCoefficient(Src, CurLoop);
+ Dst = addToCoefficient(Dst, CurLoop, A_K);
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ }
+ else {
+ // paper is incorrect here, or perhaps just misleading
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ Src = SE->getMulExpr(Src, A);
+ Dst = SE->getMulExpr(Dst, A);
+ Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
+ Src = zeroCoefficient(Src, CurLoop);
+ Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
+ if (!findCoefficient(Dst, CurLoop)->isZero())
+ Consistent = false;
+ }
+ DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
+ return true;
+}
+
+
+// Attempt to propagate a point
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+bool DependenceAnalysis::propagatePoint(const SCEV *&Src,
+ const SCEV *&Dst,
+ Constraint &CurConstraint) {
+ const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+ const SCEV *A_K = findCoefficient(Src, CurLoop);
+ const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+ const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
+ const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
+ DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+ Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
+ Src = zeroCoefficient(Src, CurLoop);
+ DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+ DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+ Dst = zeroCoefficient(Dst, CurLoop);
+ DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+ return true;
+}
+
+
+// Update direction vector entry based on the current constraint.
+void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
+ const Constraint &CurConstraint
+ ) const {
+ DEBUG(dbgs() << "\tUpdate direction, constraint =");
+ DEBUG(CurConstraint.dump(dbgs()));
+ if (CurConstraint.isAny())
+ ; // use defaults
+ else if (CurConstraint.isDistance()) {
+ // this one is consistent, the others aren't
+ Level.Scalar = false;
+ Level.Distance = CurConstraint.getD();
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+ if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
+ NewDirection = Dependence::DVEntry::EQ;
+ if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
+ NewDirection |= Dependence::DVEntry::LT;
+ if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
+ NewDirection |= Dependence::DVEntry::GT;
+ Level.Direction &= NewDirection;
+ }
+ else if (CurConstraint.isLine()) {
+ Level.Scalar = false;
+ Level.Distance = NULL;
+ // direction should be accurate
+ }
+ else if (CurConstraint.isPoint()) {
+ Level.Scalar = false;
+ Level.Distance = NULL;
+ unsigned NewDirection = Dependence::DVEntry::NONE;
+ if (!isKnownPredicate(CmpInst::ICMP_NE,
+ CurConstraint.getY(),
+ CurConstraint.getX()))
+ // if X may be = Y
+ NewDirection |= Dependence::DVEntry::EQ;
+ if (!isKnownPredicate(CmpInst::ICMP_SLE,
+ CurConstraint.getY(),
+ CurConstraint.getX()))
+ // if Y may be > X
+ NewDirection |= Dependence::DVEntry::LT;
+ if (!isKnownPredicate(CmpInst::ICMP_SGE,
+ CurConstraint.getY(),
+ CurConstraint.getX()))
+ // if Y may be < X
+ NewDirection |= Dependence::DVEntry::GT;
+ Level.Direction &= NewDirection;
+ }
+ else
+ llvm_unreachable("constraint has unexpected kind");
+}
+
+
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+// For debugging purposes, dump a small bit vector to dbgs().
+static void dumpSmallBitVector(SmallBitVector &BV) {
+ dbgs() << "{";
+ for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
+ dbgs() << VI;
+ if (BV.find_next(VI) >= 0)
+ dbgs() << ' ';
+ }
+ dbgs() << "}\n";
+}
+#endif
+
+
+// depends -
+// Returns NULL if there is no dependence.
+// Otherwise, return a Dependence with as many details as possible.
+// Corresponds to Section 3.1 in the paper
+//
+// Practical Dependence Testing
+// Goff, Kennedy, Tseng
+// PLDI 1991
+//
+// Care is required to keep the code below up to date w.r.t. this routine.
+Dependence *DependenceAnalysis::depends(const Instruction *Src,
+ const Instruction *Dst,
+ bool PossiblyLoopIndependent) {
+ if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
+ (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
+ // if both instructions don't reference memory, there's no dependence
+ return NULL;
+
+ if (!isLoadOrStore(Src) || !isLoadOrStore(Dst))
+ // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
+ return new Dependence(Src, Dst);
+
+ const Value *SrcPtr = getPointerOperand(Src);
+ const Value *DstPtr = getPointerOperand(Dst);
+
+ switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ // cannot analyse objects if we don't understand their aliasing.
+ return new Dependence(Src, Dst);
+ case AliasAnalysis::NoAlias:
+ // If the objects noalias, they are distinct, accesses are independent.
+ return NULL;
+ case AliasAnalysis::MustAlias:
+ break; // The underlying objects alias; test accesses for dependence.
+ }
+
+ const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+ const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+ if (!SrcGEP || !DstGEP)
+ return new Dependence(Src, Dst); // missing GEP, assume dependence
+
+ if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType())
+ return new Dependence(Src, Dst); // different types, assume dependence
+
+ // establish loop nesting levels
+ establishNestingLevels(Src, Dst);
+ DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
+ DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
+
+ FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+ ++TotalArrayPairs;
+
+ // classify subscript pairs
+ unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+ SmallVector<Subscript, 4> Pair(Pairs);
+ for (unsigned SI = 0; SI < Pairs; ++SI) {
+ Pair[SI].Loops.resize(MaxLevels + 1);
+ Pair[SI].GroupLoops.resize(MaxLevels + 1);
+ Pair[SI].Group.resize(Pairs);
+ }
+ Pairs = 0;
+ for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+ SrcEnd = SrcGEP->idx_end(),
+ DstIdx = DstGEP->idx_begin(),
+ DstEnd = DstGEP->idx_end();
+ SrcIdx != SrcEnd && DstIdx != DstEnd;
+ ++SrcIdx, ++DstIdx, ++Pairs) {
+ Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
+ Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
+ removeMatchingExtensions(&Pair[Pairs]);
+ Pair[Pairs].Classification =
+ classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
+ Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[Pairs].Loops);
+ Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
+ Pair[Pairs].Group.set(Pairs);
+ DEBUG(dbgs() << " subscript " << Pairs << "\n");
+ DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n");
+ DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n");
+ DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n");
+ DEBUG(dbgs() << "\tloops = ");
+ DEBUG(dumpSmallBitVector(Pair[Pairs].Loops));
+ }
+
+ SmallBitVector Separable(Pairs);
+ SmallBitVector Coupled(Pairs);
+
+ // Partition subscripts into separable and minimally-coupled groups
+ // Algorithm in paper is algorithmically better;
+ // this may be faster in practice. Check someday.
+ //
+ // Here's an example of how it works. Consider this code:
+ //
+ // for (i = ...) {
+ // for (j = ...) {
+ // for (k = ...) {
+ // for (l = ...) {
+ // for (m = ...) {
+ // A[i][j][k][m] = ...;
+ // ... = A[0][j][l][i + j];
+ // }
+ // }
+ // }
+ // }
+ // }
+ //
+ // There are 4 subscripts here:
+ // 0 [i] and [0]
+ // 1 [j] and [j]
+ // 2 [k] and [l]
+ // 3 [m] and [i + j]
+ //
+ // We've already classified each subscript pair as ZIV, SIV, etc.,
+ // and collected all the loops mentioned by pair P in Pair[P].Loops.
+ // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
+ // and set Pair[P].Group = {P}.
+ //
+ // Src Dst Classification Loops GroupLoops Group
+ // 0 [i] [0] SIV {1} {1} {0}
+ // 1 [j] [j] SIV {2} {2} {1}
+ // 2 [k] [l] RDIV {3,4} {3,4} {2}
+ // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}
+ //
+ // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
+ // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
+ //
+ // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
+ // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
+ // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
+ // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
+ // to either Separable or Coupled).
+ //
+ // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
+ // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
+ // so Pair[3].Group = {0, 1, 3} and Done = false.
+ //
+ // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
+ // Since Done remains true, we add 2 to the set of Separable pairs.
+ //
+ // Finally, we consider 3. There's nothing to compare it with,
+ // so Done remains true and we add it to the Coupled set.
+ // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
+ //
+ // In the end, we've got 1 separable subscript and 1 coupled group.
+ for (unsigned SI = 0; SI < Pairs; ++SI) {
+ if (Pair[SI].Classification == Subscript::NonLinear) {
+ // ignore these, but collect loops for later
+ ++NonlinearSubscriptPairs;
+ collectCommonLoops(Pair[SI].Src,
+ LI->getLoopFor(Src->getParent()),
+ Pair[SI].Loops);
+ collectCommonLoops(Pair[SI].Dst,
+ LI->getLoopFor(Dst->getParent()),
+ Pair[SI].Loops);
+ Result.Consistent = false;
+ }
+ else if (Pair[SI].Classification == Subscript::ZIV) {
+ // always separable
+ Separable.set(SI);
+ }
+ else {
+ // SIV, RDIV, or MIV, so check for coupled group
+ bool Done = true;
+ for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+ SmallBitVector Intersection = Pair[SI].GroupLoops;
+ Intersection &= Pair[SJ].GroupLoops;
+ if (Intersection.any()) {
+ // accumulate set of all the loops in group
+ Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+ // accumulate set of all subscripts in group
+ Pair[SJ].Group |= Pair[SI].Group;
+ Done = false;
+ }
+ }
+ if (Done) {
+ if (Pair[SI].Group.count() == 1) {
+ Separable.set(SI);
+ ++SeparableSubscriptPairs;
+ }
+ else {
+ Coupled.set(SI);
+ ++CoupledSubscriptPairs;
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Separable = ");
+ DEBUG(dumpSmallBitVector(Separable));
+ DEBUG(dbgs() << " Coupled = ");
+ DEBUG(dumpSmallBitVector(Coupled));
+
+ Constraint NewConstraint;
+ NewConstraint.setAny(SE);
+
+ // test separable subscripts
+ for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+ DEBUG(dbgs() << "testing subscript " << SI);
+ switch (Pair[SI].Classification) {
+ case Subscript::ZIV:
+ DEBUG(dbgs() << ", ZIV\n");
+ if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
+ return NULL;
+ break;
+ case Subscript::SIV: {
+ DEBUG(dbgs() << ", SIV\n");
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
+ Result, NewConstraint, SplitIter))
+ return NULL;
+ break;
+ }
+ case Subscript::RDIV:
+ DEBUG(dbgs() << ", RDIV\n");
+ if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
+ return NULL;
+ break;
+ case Subscript::MIV:
+ DEBUG(dbgs() << ", MIV\n");
+ if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
+ return NULL;
+ break;
+ default:
+ llvm_unreachable("subscript has unexpected classification");
+ }
+ }
+
+ if (Coupled.count()) {
+ // test coupled subscript groups
+ DEBUG(dbgs() << "starting on coupled subscripts\n");
+ DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
+ SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+ for (unsigned II = 0; II <= MaxLevels; ++II)
+ Constraints[II].setAny(SE);
+ for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+ DEBUG(dbgs() << "testing subscript group " << SI << " { ");
+ SmallBitVector Group(Pair[SI].Group);
+ SmallBitVector Sivs(Pairs);
+ SmallBitVector Mivs(Pairs);
+ SmallBitVector ConstrainedLevels(MaxLevels + 1);
+ for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+ DEBUG(dbgs() << SJ << " ");
+ if (Pair[SJ].Classification == Subscript::SIV)
+ Sivs.set(SJ);
+ else
+ Mivs.set(SJ);
+ }
+ DEBUG(dbgs() << "}\n");
+ while (Sivs.any()) {
+ bool Changed = false;
+ for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+ DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
+ // SJ is an SIV subscript that's part of the current coupled group
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ DEBUG(dbgs() << "SIV\n");
+ if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
+ Result, NewConstraint, SplitIter))
+ return NULL;
+ ConstrainedLevels.set(Level);
+ if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
+ if (Constraints[Level].isEmpty()) {
+ ++DeltaIndependence;
+ return NULL;
+ }
+ Changed = true;
+ }
+ Sivs.reset(SJ);
+ }
+ if (Changed) {
+ // propagate, possibly creating new SIVs and ZIVs
+ DEBUG(dbgs() << " propagating\n");
+ DEBUG(dbgs() << "\tMivs = ");
+ DEBUG(dumpSmallBitVector(Mivs));
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ // SJ is an MIV subscript that's part of the current coupled group
+ DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
+ if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
+ Constraints, Result.Consistent)) {
+ DEBUG(dbgs() << "\t Changed\n");
+ ++DeltaPropagations;
+ Pair[SJ].Classification =
+ classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+ Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[SJ].Loops);
+ switch (Pair[SJ].Classification) {
+ case Subscript::ZIV:
+ DEBUG(dbgs() << "ZIV\n");
+ if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+ return NULL;
+ Mivs.reset(SJ);
+ break;
+ case Subscript::SIV:
+ Sivs.set(SJ);
+ Mivs.reset(SJ);
+ break;
+ case Subscript::RDIV:
+ case Subscript::MIV:
+ break;
+ default:
+ llvm_unreachable("bad subscript classification");
+ }
+ }
+ }
+ }
+ }
+
+ // test & propagate remaining RDIVs
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ if (Pair[SJ].Classification == Subscript::RDIV) {
+ DEBUG(dbgs() << "RDIV test\n");
+ if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+ return NULL;
+ // I don't yet understand how to propagate RDIV results
+ Mivs.reset(SJ);
+ }
+ }
+
+ // test remaining MIVs
+ // This code is temporary.
+ // Better to somehow test all remaining subscripts simultaneously.
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ if (Pair[SJ].Classification == Subscript::MIV) {
+ DEBUG(dbgs() << "MIV test\n");
+ if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
+ return NULL;
+ }
+ else
+ llvm_unreachable("expected only MIV subscripts at this point");
+ }
+
+ // update Result.DV from constraint vector
+ DEBUG(dbgs() << " updating\n");
+ for (int SJ = ConstrainedLevels.find_first();
+ SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
+ updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
+ if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
+ return NULL;
+ }
+ }
+ }
+
+ // make sure Scalar flags are set correctly
+ SmallBitVector CompleteLoops(MaxLevels + 1);
+ for (unsigned SI = 0; SI < Pairs; ++SI)
+ CompleteLoops |= Pair[SI].Loops;
+ for (unsigned II = 1; II <= CommonLevels; ++II)
+ if (CompleteLoops[II])
+ Result.DV[II - 1].Scalar = false;
+
+ // make sure loopIndepent flag is set correctly
+ if (PossiblyLoopIndependent) {
+ for (unsigned II = 1; II <= CommonLevels; ++II) {
+ if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
+ Result.LoopIndependent = false;
+ break;
+ }
+ }
+ }
+
+ FullDependence *Final = new FullDependence(Result);
+ Result.DV = NULL;
+ return Final;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// getSplitIteration -
+// Rather than spend rarely-used space recording the splitting iteration
+// during the Weak-Crossing SIV test, we re-compute it on demand.
+// The re-computation is basically a repeat of the entire dependence test,
+// though simplified since we know that the dependence exists.
+// It's tedious, since we must go through all propagations, etc.
+//
+// Care is required to keep this code up to date w.r.t. the code above.
+//
+// Generally, the dependence analyzer will be used to build
+// a dependence graph for a function (basically a map from instructions
+// to dependences). Looking for cycles in the graph shows us loops
+// that cannot be trivially vectorized/parallelized.
+//
+// We can try to improve the situation by examining all the dependences
+// that make up the cycle, looking for ones we can break.
+// Sometimes, peeling the first or last iteration of a loop will break
+// dependences, and we've got flags for those possibilities.
+// Sometimes, splitting a loop at some other iteration will do the trick,
+// and we've got a flag for that case. Rather than waste the space to
+// record the exact iteration (since we rarely know), we provide
+// a method that calculates the iteration. It's a drag that it must work
+// from scratch, but wonderful in that it's possible.
+//
+// Here's an example:
+//
+// for (i = 0; i < 10; i++)
+// A[i] = ...
+// ... = A[11 - i]
+//
+// There's a loop-carried flow dependence from the store to the load,
+// found by the weak-crossing SIV test. The dependence will have a flag,
+// indicating that the dependence can be broken by splitting the loop.
+// Calling getSplitIteration will return 5.
+// Splitting the loop breaks the dependence, like so:
+//
+// for (i = 0; i <= 5; i++)
+// A[i] = ...
+// ... = A[11 - i]
+// for (i = 6; i < 10; i++)
+// A[i] = ...
+// ... = A[11 - i]
+//
+// breaks the dependence and allows us to vectorize/parallelize
+// both loops.
+const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
+ unsigned SplitLevel) {
+ assert(Dep && "expected a pointer to a Dependence");
+ assert(Dep->isSplitable(SplitLevel) &&
+ "Dep should be splitable at SplitLevel");
+ const Instruction *Src = Dep->getSrc();
+ const Instruction *Dst = Dep->getDst();
+ assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
+ assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
+ assert(isLoadOrStore(Src));
+ assert(isLoadOrStore(Dst));
+ const Value *SrcPtr = getPointerOperand(Src);
+ const Value *DstPtr = getPointerOperand(Dst);
+ assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
+ AliasAnalysis::MustAlias);
+ const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+ const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+ assert(SrcGEP);
+ assert(DstGEP);
+ assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType());
+
+ // establish loop nesting levels
+ establishNestingLevels(Src, Dst);
+
+ FullDependence Result(Src, Dst, false, CommonLevels);
+
+ // classify subscript pairs
+ unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+ SmallVector<Subscript, 4> Pair(Pairs);
+ for (unsigned SI = 0; SI < Pairs; ++SI) {
+ Pair[SI].Loops.resize(MaxLevels + 1);
+ Pair[SI].GroupLoops.resize(MaxLevels + 1);
+ Pair[SI].Group.resize(Pairs);
+ }
+ Pairs = 0;
+ for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+ SrcEnd = SrcGEP->idx_end(),
+ DstIdx = DstGEP->idx_begin(),
+ DstEnd = DstGEP->idx_end();
+ SrcIdx != SrcEnd && DstIdx != DstEnd;
+ ++SrcIdx, ++DstIdx, ++Pairs) {
+ Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
+ Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
+ Pair[Pairs].Classification =
+ classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
+ Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[Pairs].Loops);
+ Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
+ Pair[Pairs].Group.set(Pairs);
+ }
+
+ SmallBitVector Separable(Pairs);
+ SmallBitVector Coupled(Pairs);
+
+ // partition subscripts into separable and minimally-coupled groups
+ for (unsigned SI = 0; SI < Pairs; ++SI) {
+ if (Pair[SI].Classification == Subscript::NonLinear) {
+ // ignore these, but collect loops for later
+ collectCommonLoops(Pair[SI].Src,
+ LI->getLoopFor(Src->getParent()),
+ Pair[SI].Loops);
+ collectCommonLoops(Pair[SI].Dst,
+ LI->getLoopFor(Dst->getParent()),
+ Pair[SI].Loops);
+ Result.Consistent = false;
+ }
+ else if (Pair[SI].Classification == Subscript::ZIV)
+ Separable.set(SI);
+ else {
+ // SIV, RDIV, or MIV, so check for coupled group
+ bool Done = true;
+ for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+ SmallBitVector Intersection = Pair[SI].GroupLoops;
+ Intersection &= Pair[SJ].GroupLoops;
+ if (Intersection.any()) {
+ // accumulate set of all the loops in group
+ Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+ // accumulate set of all subscripts in group
+ Pair[SJ].Group |= Pair[SI].Group;
+ Done = false;
+ }
+ }
+ if (Done) {
+ if (Pair[SI].Group.count() == 1)
+ Separable.set(SI);
+ else
+ Coupled.set(SI);
+ }
+ }
+ }
+
+ Constraint NewConstraint;
+ NewConstraint.setAny(SE);
+
+ // test separable subscripts
+ for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+ switch (Pair[SI].Classification) {
+ case Subscript::SIV: {
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
+ Result, NewConstraint, SplitIter);
+ if (Level == SplitLevel) {
+ assert(SplitIter != NULL);
+ return SplitIter;
+ }
+ break;
+ }
+ case Subscript::ZIV:
+ case Subscript::RDIV:
+ case Subscript::MIV:
+ break;
+ default:
+ llvm_unreachable("subscript has unexpected classification");
+ }
+ }
+
+ if (Coupled.count()) {
+ // test coupled subscript groups
+ SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+ for (unsigned II = 0; II <= MaxLevels; ++II)
+ Constraints[II].setAny(SE);
+ for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+ SmallBitVector Group(Pair[SI].Group);
+ SmallBitVector Sivs(Pairs);
+ SmallBitVector Mivs(Pairs);
+ SmallBitVector ConstrainedLevels(MaxLevels + 1);
+ for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+ if (Pair[SJ].Classification == Subscript::SIV)
+ Sivs.set(SJ);
+ else
+ Mivs.set(SJ);
+ }
+ while (Sivs.any()) {
+ bool Changed = false;
+ for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+ // SJ is an SIV subscript that's part of the current coupled group
+ unsigned Level;
+ const SCEV *SplitIter = NULL;
+ (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
+ Result, NewConstraint, SplitIter);
+ if (Level == SplitLevel && SplitIter)
+ return SplitIter;
+ ConstrainedLevels.set(Level);
+ if (intersectConstraints(&Constraints[Level], &NewConstraint))
+ Changed = true;
+ Sivs.reset(SJ);
+ }
+ if (Changed) {
+ // propagate, possibly creating new SIVs and ZIVs
+ for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+ // SJ is an MIV subscript that's part of the current coupled group
+ if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
+ Pair[SJ].Loops, Constraints, Result.Consistent)) {
+ Pair[SJ].Classification =
+ classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+ Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[SJ].Loops);
+ switch (Pair[SJ].Classification) {
+ case Subscript::ZIV:
+ Mivs.reset(SJ);
+ break;
+ case Subscript::SIV:
+ Sivs.set(SJ);
+ Mivs.reset(SJ);
+ break;
+ case Subscript::RDIV:
+ case Subscript::MIV:
+ break;
+ default:
+ llvm_unreachable("bad subscript classification");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ llvm_unreachable("somehow reached end of routine");
+ return NULL;
+}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 5f51f775f14..95e58022ca1 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -243,7 +243,8 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
if (!TD)
return false;
- unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ unsigned AS = GEP.getPointerAddressSpace();
+ unsigned IntPtrWidth = TD->getPointerSizeInBits(AS);
assert(IntPtrWidth == Offset.getBitWidth());
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -391,7 +392,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Track base/offset pairs when converted to a plain integer provided the
// integer is large enough to represent the pointer.
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
- if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+ unsigned AS = I.getPointerAddressSpace();
+ if (TD && IntegerSize >= TD->getPointerSizeInBits(AS)) {
std::pair<Value *, APInt> BaseAndOffset
= ConstantOffsetPtrs.lookup(I.getOperand(0));
if (BaseAndOffset.first)
@@ -425,7 +427,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// modifications provided the integer is not too large.
Value *Op = I.getOperand(0);
unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
- if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+ unsigned AS = I.getAddressSpace();
+ if (TD && IntegerSize <= TD->getPointerSizeInBits(AS)) {
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
if (BaseAndOffset.first)
ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -760,7 +763,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
if (!TD || !V->getType()->isPointerTy())
return 0;
- unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();;
+ unsigned IntPtrWidth = TD->getPointerSizeInBits(AS);
APInt Offset = APInt::getNullValue(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
@@ -824,7 +828,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// size of the byval type by the target's pointer size.
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
- unsigned PointerSize = TD->getPointerSizeInBits();
+ unsigned AS = PTy->getAddressSpace();
+ unsigned PointerSize = TD->getPointerSizeInBits(AS);
// Ceiling division.
unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b3d62487fc1..8e326122fa5 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -666,7 +666,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// 'Offset' APInt must be the bitwidth of the target's pointer size.
static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP,
APInt &Offset) {
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ unsigned AS = GEP->getPointerAddressSpace();
+ unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
assert(IntPtrWidth == Offset.getBitWidth());
gep_type_iterator GTI = gep_type_begin(GEP);
@@ -696,12 +697,14 @@ static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP,
/// accumulates the total constant offset applied in the returned constant. It
/// returns 0 if V is not a pointer, and returns the constant '0' if there are
/// no constant offsets applied.
+/// FIXME: This function also exists in InlineCost.cpp.
static Constant *stripAndComputeConstantOffsets(const DataLayout &TD,
Value *&V) {
if (!V->getType()->isPointerTy())
return 0;
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();;
+ unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
APInt Offset = APInt::getNullValue(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
@@ -1877,7 +1880,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
- Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ Q.TD->getPointerSizeInBits(
+ cast<PtrToIntInst>(LI)->getPointerAddressSpace()) ==
+ DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 5e05f4c8ca1..5c2a49e767f 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,8 +19,8 @@
#include "llvm/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/TargetTransformInfo.h"
using namespace llvm;
@@ -1599,15 +1599,15 @@ static bool width_descending(Value *lhs, Value *rhs) {
/// This does not depend on any SCEVExpander state but should be used in
/// the same context that SCEVExpander is used.
unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
- SmallVectorImpl<WeakVH> &DeadInsts,
- const TargetLowering *TLI) {
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ const ScalarTargetTransformInfo *STTI) {
// Find integer phis in order of increasing width.
SmallVector<PHINode*, 8> Phis;
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
Phis.push_back(Phi);
}
- if (TLI)
+ if (STTI)
std::sort(Phis.begin(), Phis.end(), width_descending);
unsigned NumElim = 0;
@@ -1624,8 +1624,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
if (!OrigPhiRef) {
OrigPhiRef = Phi;
- if (Phi->getType()->isIntegerTy() && TLI
- && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+ if (Phi->getType()->isIntegerTy() && STTI &&
+ STTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
// This phi can be freely truncated to the narrowest phi type. Map the
// truncated expression to it so it will be reused for narrow types.
const SCEV *TruncExpr =
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 951b442b874..1d7f0692cbe 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -40,7 +40,8 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
assert(isa<PointerType>(Ty) && "Expected a pointer type!");
- return TD ? TD->getPointerSizeInBits() : 0;
+ return TD ?
+ TD->getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace()) : 0;
}
static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -1621,7 +1622,8 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
// Re-sign extend from the pointer size if needed to get overflow edge cases
// right.
- unsigned PtrSize = TD.getPointerSizeInBits();
+ unsigned AS = GEP->getPointerAddressSpace();
+ unsigned PtrSize = TD.getPointerSizeInBits(AS);
if (PtrSize < 64)
Offset = SignExtend64(Offset, PtrSize);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 86bc7aced1e..60e1741d694 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -916,7 +916,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
/// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind
/// indicates what kind of attribute list this is: 0: function arg, 1: result,
/// 2: function attr.
-bool LLParser::ParseOptionalAttrs(Attributes::Builder &B, unsigned AttrKind) {
+bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
LocTy AttrLoc = Lex.getLoc();
bool HaveError = false;
@@ -1435,7 +1435,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
// Parse the argument.
LocTy ArgLoc;
Type *ArgTy = 0;
- Attributes::Builder ArgAttrs;
+ AttrBuilder ArgAttrs;
Value *V;
if (ParseType(ArgTy, ArgLoc))
return true;
@@ -1443,7 +1443,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
// Otherwise, handle normal operands.
if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS))
return true;
- ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(ArgAttrs)));
+ ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(),
+ ArgAttrs)));
}
Lex.Lex(); // Lex the ')'.
@@ -1475,7 +1476,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
} else {
LocTy TypeLoc = Lex.getLoc();
Type *ArgTy = 0;
- Attributes::Builder Attrs;
+ AttrBuilder Attrs;
std::string Name;
if (ParseType(ArgTy) ||
@@ -1492,7 +1493,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
if (!FunctionType::isValidArgumentType(ArgTy))
return Error(TypeLoc, "invalid type for function argument");
- ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name));
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+ Attributes::get(ArgTy->getContext(),
+ Attrs), Name));
while (EatIfPresent(lltok::comma)) {
// Handle ... at end of arg list.
@@ -1518,7 +1521,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
if (!ArgTy->isFirstClassType())
return Error(TypeLoc, "invalid type for function argument");
- ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name));
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+ Attributes::get(ArgTy->getContext(), Attrs),
+ Name));
}
}
@@ -1542,7 +1547,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
if (!ArgList[i].Name.empty())
return Error(ArgList[i].Loc, "argument name invalid in function type");
- if (ArgList[i].Attrs)
+ if (ArgList[i].Attrs.hasAttributes())
return Error(ArgList[i].Loc,
"argument attributes invalid in function type");
}
@@ -2672,7 +2677,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
unsigned Linkage;
unsigned Visibility;
- Attributes::Builder RetAttrs;
+ AttrBuilder RetAttrs;
CallingConv::ID CC;
Type *RetType = 0;
LocTy RetTypeLoc = Lex.getLoc();
@@ -2736,7 +2741,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
SmallVector<ArgInfo, 8> ArgList;
bool isVarArg;
- Attributes::Builder FuncAttrs;
+ AttrBuilder FuncAttrs;
std::string Section;
unsigned Alignment;
std::string GC;
@@ -2766,7 +2771,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
SmallVector<AttributeWithIndex, 8> Attrs;
if (RetAttrs.hasAttributes())
- Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+ Attrs.push_back(
+ AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ Attributes::get(RetType->getContext(),
+ RetAttrs)));
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
ParamTypeList.push_back(ArgList[i].Ty);
@@ -2775,7 +2783,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
}
if (FuncAttrs.hasAttributes())
- Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FuncAttrs)));
+ Attrs.push_back(
+ AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ Attributes::get(RetType->getContext(),
+ FuncAttrs)));
AttrListPtr PAL = AttrListPtr::get(Attrs);
@@ -2795,6 +2806,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
ForwardRefVals.find(FunctionName);
if (FRVI != ForwardRefVals.end()) {
Fn = M->getFunction(FunctionName);
+ if (!Fn)
+ return Error(FRVI->second.second, "invalid forward reference to "
+ "function as global value!");
if (Fn->getType() != PFT)
return Error(FRVI->second.second, "invalid forward reference to "
"function '" + FunctionName + "' with wrong type!");
@@ -3248,7 +3262,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
/// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
LocTy CallLoc = Lex.getLoc();
- Attributes::Builder RetAttrs, FnAttrs;
+ AttrBuilder RetAttrs, FnAttrs;
CallingConv::ID CC;
Type *RetType = 0;
LocTy RetTypeLoc;
@@ -3294,7 +3308,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// Set up the Attributes for the function.
SmallVector<AttributeWithIndex, 8> Attrs;
if (RetAttrs.hasAttributes())
- Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+ Attrs.push_back(
+ AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ Attributes::get(Callee->getContext(),
+ RetAttrs)));
SmallVector<Value*, 8> Args;
@@ -3322,7 +3339,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
return Error(CallLoc, "not enough parameters specified for call");
if (FnAttrs.hasAttributes())
- Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs)));
+ Attrs.push_back(
+ AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ Attributes::get(Callee->getContext(),
+ FnAttrs)));
// Finish off the Attributes and check them
AttrListPtr PAL = AttrListPtr::get(Attrs);
@@ -3647,7 +3667,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
/// ParameterList OptionalAttrs
bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
bool isTail) {
- Attributes::Builder RetAttrs, FnAttrs;
+ AttrBuilder RetAttrs, FnAttrs;
CallingConv::ID CC;
Type *RetType = 0;
LocTy RetTypeLoc;
@@ -3690,7 +3710,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// Set up the Attributes for the function.
SmallVector<AttributeWithIndex, 8> Attrs;
if (RetAttrs.hasAttributes())
- Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+ Attrs.push_back(
+ AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ Attributes::get(Callee->getContext(),
+ RetAttrs)));
SmallVector<Value*, 8> Args;
@@ -3718,7 +3741,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
return Error(CallLoc, "not enough parameters specified for call");
if (FnAttrs.hasAttributes())
- Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs)));
+ Attrs.push_back(
+ AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ Attributes::get(Callee->getContext(),
+ FnAttrs)));
// Finish off the Attributes and check them
AttrListPtr PAL = AttrListPtr::get(Attrs);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 671eaf64291..c6bbdb27aee 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -175,7 +175,7 @@ namespace llvm {
bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
bool ParseOptionalAddrSpace(unsigned &AddrSpace);
- bool ParseOptionalAttrs(Attributes::Builder &Attrs, unsigned AttrKind);
+ bool ParseOptionalAttrs(AttrBuilder &Attrs, unsigned AttrKind);
bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
bool ParseOptionalLinkage(unsigned &Linkage) {
bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 8860ef065c6..279343c48c6 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -211,7 +211,6 @@ namespace {
}
/// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
- //static inline bool classof(const ConstantPlaceHolder *) { return true; }
static bool classof(const Value *V) {
return isa<ConstantExpr>(V) &&
cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
@@ -477,14 +476,15 @@ bool BitcodeReader::ParseAttributeBlock() {
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
Attributes ReconstitutedAttr =
- Attributes::decodeLLVMAttributesForBitcode(Record[i+1]);
+ Attributes::decodeLLVMAttributesForBitcode(Context, Record[i+1]);
Record[i+1] = ReconstitutedAttr.Raw();
}
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
- if (Attributes(Record[i+1]).hasAttributes())
+ AttrBuilder B(Record[i+1]);
+ if (B.hasAttributes())
Attrs.push_back(AttributeWithIndex::get(Record[i],
- Attributes(Record[i+1])));
+ Attributes::get(Context, B)));
}
MAttributes.push_back(AttrListPtr::get(Attrs));
@@ -891,9 +891,9 @@ bool BitcodeReader::ParseMetadata() {
}
}
-/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in
+/// decodeSignRotatedValue - Decode a signed value stored with the sign bit in
/// the LSB for dense VBR encoding.
-static uint64_t DecodeSignRotatedValue(uint64_t V) {
+uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
if ((V & 1) == 0)
return V >> 1;
if (V != 1)
@@ -943,7 +943,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() {
static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
SmallVector<uint64_t, 8> Words(Vals.size());
std::transform(Vals.begin(), Vals.end(), Words.begin(),
- DecodeSignRotatedValue);
+ BitcodeReader::decodeSignRotatedValue);
return APInt(TypeBits, Words);
}
@@ -997,7 +997,7 @@ bool BitcodeReader::ParseConstants() {
case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
if (!CurTy->isIntegerTy() || Record.empty())
return Error("Invalid CST_INTEGER record");
- V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
+ V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
break;
case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
if (!CurTy->isIntegerTy() || Record.empty())
@@ -1524,13 +1524,22 @@ bool BitcodeReader::ParseModule(bool Resume) {
// Read a record.
switch (Stream.ReadRecord(Code, Record)) {
default: break; // Default behavior, ignore unknown content.
- case bitc::MODULE_CODE_VERSION: // VERSION: [version#]
+ case bitc::MODULE_CODE_VERSION: { // VERSION: [version#]
if (Record.size() < 1)
return Error("Malformed MODULE_CODE_VERSION");
- // Only version #0 is supported so far.
- if (Record[0] != 0)
- return Error("Unknown bitstream version!");
+ // Only version #0 and #1 are supported so far.
+ unsigned module_version = Record[0];
+ switch (module_version) {
+ default: return Error("Unknown bitstream version!");
+ case 0:
+ UseRelativeIDs = false;
+ break;
+ case 1:
+ UseRelativeIDs = true;
+ break;
+ }
break;
+ }
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
@@ -1797,13 +1806,6 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
// Read a record.
switch (Stream.ReadRecord(Code, Record)) {
default: break; // Default behavior, ignore unknown content.
- case bitc::MODULE_CODE_VERSION: // VERSION: [version#]
- if (Record.size() < 1)
- return Error("Malformed MODULE_CODE_VERSION");
- // Only version #0 is supported so far.
- if (Record[0] != 0)
- return Error("Unknown bitstream version!");
- break;
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
@@ -2016,7 +2018,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *LHS, *RHS;
if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
- getValue(Record, OpNum, LHS->getType(), RHS) ||
+ popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
OpNum+1 > Record.size())
return Error("Invalid BINOP record");
@@ -2131,8 +2133,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *TrueVal, *FalseVal, *Cond;
if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
- getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
- getValue(Record, OpNum, Type::getInt1Ty(Context), Cond))
+ popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
+ popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
return Error("Invalid SELECT record");
I = SelectInst::Create(Cond, TrueVal, FalseVal);
@@ -2146,7 +2148,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *TrueVal, *FalseVal, *Cond;
if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
- getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+ popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
getValueTypePair(Record, OpNum, NextValueNo, Cond))
return Error("Invalid SELECT record");
@@ -2171,7 +2173,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Vec, *Idx;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
- getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+ popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
return Error("Invalid EXTRACTELT record");
I = ExtractElementInst::Create(Vec, Idx);
InstructionList.push_back(I);
@@ -2182,9 +2184,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Vec, *Elt, *Idx;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
- getValue(Record, OpNum,
+ popValue(Record, OpNum, NextValueNo,
cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
- getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+ popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
return Error("Invalid INSERTELT record");
I = InsertElementInst::Create(Vec, Elt, Idx);
InstructionList.push_back(I);
@@ -2195,7 +2197,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Vec1, *Vec2, *Mask;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
- getValue(Record, OpNum, Vec1->getType(), Vec2))
+ popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
return Error("Invalid SHUFFLEVEC record");
if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
@@ -2215,7 +2217,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *LHS, *RHS;
if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
- getValue(Record, OpNum, LHS->getType(), RHS) ||
+ popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
OpNum+1 != Record.size())
return Error("Invalid CMP record");
@@ -2260,7 +2262,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
else {
BasicBlock *FalseDest = getBasicBlock(Record[1]);
- Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context));
+ Value *Cond = getValue(Record, 2, NextValueNo,
+ Type::getInt1Ty(Context));
if (FalseDest == 0 || Cond == 0)
return Error("Invalid BR record");
I = BranchInst::Create(TrueDest, FalseDest, Cond);
@@ -2276,7 +2279,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Type *OpTy = getTypeByID(Record[1]);
unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
- Value *Cond = getFnValueByID(Record[2], OpTy);
+ Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[3]);
if (OpTy == 0 || Cond == 0 || Default == 0)
return Error("Invalid SWITCH record");
@@ -2331,7 +2334,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (Record.size() < 3 || (Record.size() & 1) == 0)
return Error("Invalid SWITCH record");
Type *OpTy = getTypeByID(Record[0]);
- Value *Cond = getFnValueByID(Record[1], OpTy);
+ Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[2]);
if (OpTy == 0 || Cond == 0 || Default == 0)
return Error("Invalid SWITCH record");
@@ -2355,7 +2358,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (Record.size() < 2)
return Error("Invalid INDIRECTBR record");
Type *OpTy = getTypeByID(Record[0]);
- Value *Address = getFnValueByID(Record[1], OpTy);
+ Value *Address = getValue(Record, 1, NextValueNo, OpTy);
if (OpTy == 0 || Address == 0)
return Error("Invalid INDIRECTBR record");
unsigned NumDests = Record.size()-2;
@@ -2397,7 +2400,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
SmallVector<Value*, 16> Ops;
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
- Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+ Ops.push_back(getValue(Record, OpNum, NextValueNo,
+ FTy->getParamType(i)));
if (Ops.back() == 0) return Error("Invalid INVOKE record");
}
@@ -2444,7 +2448,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.push_back(PN);
for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
- Value *V = getFnValueByID(Record[1+i], Ty);
+ Value *V;
+ // With the new function encoding, it is possible that operands have
+ // negative IDs (for forward references). Use a signed VBR
+ // representation to keep the encoding small.
+ if (UseRelativeIDs)
+ V = getValueSigned(Record, 1+i, NextValueNo, Ty);
+ else
+ V = getValue(Record, 1+i, NextValueNo, Ty);
BasicBlock *BB = getBasicBlock(Record[2+i]);
if (!V || !BB) return Error("Invalid PHI record");
PN->addIncoming(V, BB);
@@ -2542,7 +2553,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Val, *Ptr;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
- getValue(Record, OpNum,
+ popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+2 != Record.size())
return Error("Invalid STORE record");
@@ -2556,7 +2567,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Val, *Ptr;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
- getValue(Record, OpNum,
+ popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+4 != Record.size())
return Error("Invalid STOREATOMIC record");
@@ -2579,9 +2590,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Ptr, *Cmp, *New;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
- getValue(Record, OpNum,
+ popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
- getValue(Record, OpNum,
+ popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), New) ||
OpNum+3 != Record.size())
return Error("Invalid CMPXCHG record");
@@ -2599,7 +2610,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Ptr, *Val;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
- getValue(Record, OpNum,
+ popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+4 != Record.size())
return Error("Invalid ATOMICRMW record");
@@ -2653,7 +2664,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (FTy->getParamType(i)->isLabelTy())
Args.push_back(getBasicBlock(Record[OpNum]));
else
- Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+ Args.push_back(getValue(Record, OpNum, NextValueNo,
+ FTy->getParamType(i)));
if (Args.back() == 0) return Error("Invalid CALL record");
}
@@ -2682,7 +2694,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (Record.size() < 3)
return Error("Invalid VAARG record");
Type *OpTy = getTypeByID(Record[0]);
- Value *Op = getFnValueByID(Record[1], OpTy);
+ Value *Op = getValue(Record, 1, NextValueNo, OpTy);
Type *ResTy = getTypeByID(Record[2]);
if (!OpTy || !Op || !ResTy)
return Error("Invalid VAARG record");
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index e7c4e94f785..3d5c0eb4def 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -179,18 +179,27 @@ class BitcodeReader : public GVMaterializer {
typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+ /// UseRelativeIDs - Indicates that we are using a new encoding for
+ /// instruction operands where most operands in the current
+ /// FUNCTION_BLOCK are encoded relative to the instruction number,
+ /// for a more compact encoding. Some instruction operands are not
+ /// relative to the instruction ID: basic block numbers, and types.
+ /// Once the old style function blocks have been phased out, we would
+ /// not need this flag.
+ bool UseRelativeIDs;
+
public:
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
: Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
ErrorString(0), ValueList(C), MDValueList(C),
- SeenFirstFunctionBody(false) {
+ SeenFirstFunctionBody(false), UseRelativeIDs(false) {
}
explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
: Context(C), TheModule(0), Buffer(0), BufferOwned(false),
LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
ErrorString(0), ValueList(C), MDValueList(C),
- SeenFirstFunctionBody(false) {
+ SeenFirstFunctionBody(false), UseRelativeIDs(false) {
}
~BitcodeReader() {
FreeState();
@@ -223,6 +232,9 @@ public:
/// @brief Cheap mechanism to just extract module triple
/// @returns true if an error occurred.
bool ParseTriple(std::string &Triple);
+
+ static uint64_t decodeSignRotatedValue(uint64_t V);
+
private:
Type *getTypeByID(unsigned ID);
Value *getFnValueByID(unsigned ID, Type *Ty) {
@@ -247,6 +259,9 @@ private:
unsigned InstNum, Value *&ResVal) {
if (Slot == Record.size()) return true;
unsigned ValNo = (unsigned)Record[Slot++];
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
if (ValNo < InstNum) {
// If this is not a forward reference, just return the value we already
// have.
@@ -255,20 +270,54 @@ private:
} else if (Slot == Record.size()) {
return true;
}
-
+
unsigned TypeNo = (unsigned)Record[Slot++];
ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
return ResVal == 0;
}
- bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
- Type *Ty, Value *&ResVal) {
- if (Slot == Record.size()) return true;
- unsigned ValNo = (unsigned)Record[Slot++];
- ResVal = getFnValueByID(ValNo, Ty);
+
+ /// popValue - Read a value out of the specified record from slot 'Slot'.
+ /// Increment Slot past the number of slots used by the value in the record.
+ /// Return true if there is an error.
+ bool popValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+ unsigned InstNum, Type *Ty, Value *&ResVal) {
+ if (getValue(Record, Slot, InstNum, Ty, ResVal))
+ return true;
+ // All values currently take a single record slot.
+ ++Slot;
+ return false;
+ }
+
+ /// getValue -- Like popValue, but does not increment the Slot number.
+ bool getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty, Value *&ResVal) {
+ ResVal = getValue(Record, Slot, InstNum, Ty);
return ResVal == 0;
}
-
+ /// getValue -- Version of getValue that returns ResVal directly,
+ /// or 0 if there is an error.
+ Value *getValue(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty) {
+ if (Slot == Record.size()) return 0;
+ unsigned ValNo = (unsigned)Record[Slot];
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ return getFnValueByID(ValNo, Ty);
+ }
+
+ /// getValueSigned -- Like getValue, but decodes signed VBRs.
+ Value *getValueSigned(SmallVector<uint64_t, 64> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty) {
+ if (Slot == Record.size()) return 0;
+ unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ return getFnValueByID(ValNo, Ty);
+ }
+
bool ParseModule(bool Resume);
bool ParseAttributeBlock();
bool ParseTypeTable();
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index b3f1bb13a9f..60c657ae6dd 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -41,8 +41,6 @@ EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
/// These are manifest constants used by the bitcode writer. They do not need to
/// be kept in sync with the reader, but need to be consistent within this file.
enum {
- CurVersion = 0,
-
// VALUE_SYMTAB_BLOCK abbrev id's.
VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
VST_ENTRY_7_ABBREV,
@@ -722,16 +720,20 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
+static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
+ if ((int64_t)V >= 0)
+ Vals.push_back(V << 1);
+ else
+ Vals.push_back((-V << 1) | 1);
+}
+
static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
unsigned &Code, unsigned &AbbrevToUse, const APInt &Val,
bool EmitSizeForWideNumbers = false
) {
if (Val.getBitWidth() <= 64) {
uint64_t V = Val.getSExtValue();
- if ((int64_t)V >= 0)
- Vals.push_back(V << 1);
- else
- Vals.push_back((-V << 1) | 1);
+ emitSignedInt64(Vals, V);
Code = bitc::CST_CODE_INTEGER;
AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
} else {
@@ -747,11 +749,7 @@ static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
const uint64_t *RawWords = Val.getRawData();
for (unsigned i = 0; i != NWords; ++i) {
- int64_t V = RawWords[i];
- if (V >= 0)
- Vals.push_back(V << 1);
- else
- Vals.push_back((-V << 1) | 1);
+ emitSignedInt64(Vals, RawWords[i]);
}
Code = bitc::CST_CODE_WIDE_INTEGER;
}
@@ -1025,12 +1023,13 @@ static void WriteModuleConstants(const ValueEnumerator &VE,
///
/// This function adds V's value ID to Vals. If the value ID is higher than the
/// instruction ID, then it is a forward reference, and it also includes the
-/// type ID.
+/// type ID. The value ID that is written is encoded relative to the InstID.
static bool PushValueAndType(const Value *V, unsigned InstID,
SmallVector<unsigned, 64> &Vals,
ValueEnumerator &VE) {
unsigned ValID = VE.getValueID(V);
- Vals.push_back(ValID);
+ // Make encoding relative to the InstID.
+ Vals.push_back(InstID - ValID);
if (ValID >= InstID) {
Vals.push_back(VE.getTypeID(V->getType()));
return true;
@@ -1038,6 +1037,30 @@ static bool PushValueAndType(const Value *V, unsigned InstID,
return false;
}
+/// pushValue - Like PushValueAndType, but where the type of the value is
+/// omitted (perhaps it was already encoded in an earlier operand).
+static void pushValue(const Value *V, unsigned InstID,
+ SmallVector<unsigned, 64> &Vals,
+ ValueEnumerator &VE) {
+ unsigned ValID = VE.getValueID(V);
+ Vals.push_back(InstID - ValID);
+}
+
+static void pushValue64(const Value *V, unsigned InstID,
+ SmallVector<uint64_t, 128> &Vals,
+ ValueEnumerator &VE) {
+ uint64_t ValID = VE.getValueID(V);
+ Vals.push_back(InstID - ValID);
+}
+
+static void pushValueSigned(const Value *V, unsigned InstID,
+ SmallVector<uint64_t, 128> &Vals,
+ ValueEnumerator &VE) {
+ unsigned ValID = VE.getValueID(V);
+ int64_t diff = ((int32_t)InstID - (int32_t)ValID);
+ emitSignedInt64(Vals, diff);
+}
+
/// WriteInstruction - Emit an instruction to the specified stream.
static void WriteInstruction(const Instruction &I, unsigned InstID,
ValueEnumerator &VE, BitstreamWriter &Stream,
@@ -1058,7 +1081,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Code = bitc::FUNC_CODE_INST_BINOP;
if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
- Vals.push_back(VE.getValueID(I.getOperand(1)));
+ pushValue(I.getOperand(1), InstID, Vals, VE);
Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
uint64_t Flags = GetOptimizationFlags(&I);
if (Flags != 0) {
@@ -1096,32 +1119,32 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::Select:
Code = bitc::FUNC_CODE_INST_VSELECT;
PushValueAndType(I.getOperand(1), InstID, Vals, VE);
- Vals.push_back(VE.getValueID(I.getOperand(2)));
+ pushValue(I.getOperand(2), InstID, Vals, VE);
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
break;
case Instruction::ExtractElement:
Code = bitc::FUNC_CODE_INST_EXTRACTELT;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
- Vals.push_back(VE.getValueID(I.getOperand(1)));
+ pushValue(I.getOperand(1), InstID, Vals, VE);
break;
case Instruction::InsertElement:
Code = bitc::FUNC_CODE_INST_INSERTELT;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
- Vals.push_back(VE.getValueID(I.getOperand(1)));
- Vals.push_back(VE.getValueID(I.getOperand(2)));
+ pushValue(I.getOperand(1), InstID, Vals, VE);
+ pushValue(I.getOperand(2), InstID, Vals, VE);
break;
case Instruction::ShuffleVector:
Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
- Vals.push_back(VE.getValueID(I.getOperand(1)));
- Vals.push_back(VE.getValueID(I.getOperand(2)));
+ pushValue(I.getOperand(1), InstID, Vals, VE);
+ pushValue(I.getOperand(2), InstID, Vals, VE);
break;
case Instruction::ICmp:
case Instruction::FCmp:
// compare returning Int1Ty or vector of Int1Ty
Code = bitc::FUNC_CODE_INST_CMP2;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
- Vals.push_back(VE.getValueID(I.getOperand(1)));
+ pushValue(I.getOperand(1), InstID, Vals, VE);
Vals.push_back(cast<CmpInst>(I).getPredicate());
break;
@@ -1147,7 +1170,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals.push_back(VE.getValueID(II.getSuccessor(0)));
if (II.isConditional()) {
Vals.push_back(VE.getValueID(II.getSuccessor(1)));
- Vals.push_back(VE.getValueID(II.getCondition()));
+ pushValue(II.getCondition(), InstID, Vals, VE);
}
}
break;
@@ -1164,7 +1187,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals64.push_back(SwitchRecordHeader);
Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
- Vals64.push_back(VE.getValueID(SI.getCondition()));
+ pushValue64(SI.getCondition(), InstID, Vals64, VE);
Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
Vals64.push_back(SI.getNumCases());
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
@@ -1215,7 +1238,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::IndirectBr:
Code = bitc::FUNC_CODE_INST_INDIRECTBR;
Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ // Encode the address operand as relative, but not the basic blocks.
+ pushValue(I.getOperand(0), InstID, Vals, VE);
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
Vals.push_back(VE.getValueID(I.getOperand(i)));
break;
@@ -1234,7 +1259,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
// Emit value #'s for the fixed parameters.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Vals.push_back(VE.getValueID(I.getOperand(i))); // fixed param.
+ pushValue(I.getOperand(i), InstID, Vals, VE); // fixed param.
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
@@ -1256,12 +1281,19 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::PHI: {
const PHINode &PN = cast<PHINode>(I);
Code = bitc::FUNC_CODE_INST_PHI;
- Vals.push_back(VE.getTypeID(PN.getType()));
+ // With the newer instruction encoding, forward references could give
+ // negative valued IDs. This is most common for PHIs, so we use
+ // signed VBRs.
+ SmallVector<uint64_t, 128> Vals64;
+ Vals64.push_back(VE.getTypeID(PN.getType()));
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- Vals.push_back(VE.getValueID(PN.getIncomingValue(i)));
- Vals.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+ pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE);
+ Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
}
- break;
+ // Emit a Vals64 vector and exit.
+ Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+ Vals64.clear();
+ return;
}
case Instruction::LandingPad: {
@@ -1311,7 +1343,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
else
Code = bitc::FUNC_CODE_INST_STORE;
PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr
- Vals.push_back(VE.getValueID(I.getOperand(0))); // val.
+ pushValue(I.getOperand(0), InstID, Vals, VE); // val.
Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
Vals.push_back(cast<StoreInst>(I).isVolatile());
if (cast<StoreInst>(I).isAtomic()) {
@@ -1322,8 +1354,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::AtomicCmpXchg:
Code = bitc::FUNC_CODE_INST_CMPXCHG;
PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr
- Vals.push_back(VE.getValueID(I.getOperand(1))); // cmp.
- Vals.push_back(VE.getValueID(I.getOperand(2))); // newval.
+ pushValue(I.getOperand(1), InstID, Vals, VE); // cmp.
+ pushValue(I.getOperand(2), InstID, Vals, VE); // newval.
Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
Vals.push_back(GetEncodedOrdering(
cast<AtomicCmpXchgInst>(I).getOrdering()));
@@ -1333,7 +1365,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::AtomicRMW:
Code = bitc::FUNC_CODE_INST_ATOMICRMW;
PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr
- Vals.push_back(VE.getValueID(I.getOperand(1))); // val.
+ pushValue(I.getOperand(1), InstID, Vals, VE); // val.
Vals.push_back(GetEncodedRMWOperation(
cast<AtomicRMWInst>(I).getOperation()));
Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
@@ -1358,8 +1390,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee
// Emit value #'s for the fixed parameters.
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Vals.push_back(VE.getValueID(CI.getArgOperand(i))); // fixed param.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+ // Check for labels (can happen with asm labels).
+ if (FTy->getParamType(i)->isLabelTy())
+ Vals.push_back(VE.getValueID(CI.getArgOperand(i)));
+ else
+ pushValue(CI.getArgOperand(i), InstID, Vals, VE); // fixed param.
+ }
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
@@ -1372,7 +1409,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::VAArg:
Code = bitc::FUNC_CODE_INST_VAARG;
Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty
- Vals.push_back(VE.getValueID(I.getOperand(0))); // valist.
+ pushValue(I.getOperand(0), InstID, Vals, VE); // valist.
Vals.push_back(VE.getTypeID(I.getType())); // restype.
break;
}
@@ -1514,8 +1551,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
// Emit blockinfo, which defines the standard abbreviations etc.
static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
// We only want to emit block info records for blocks that have multiple
- // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. Other
- // blocks can defined their abbrevs inline.
+ // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
+ // Other blocks can define their abbrevs inline.
Stream.EnterBlockInfoBlock(2);
{ // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
@@ -1773,12 +1810,10 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
static void WriteModule(const Module *M, BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
- // Emit the version number if it is non-zero.
- if (CurVersion) {
- SmallVector<unsigned, 1> Vals;
- Vals.push_back(CurVersion);
- Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
- }
+ SmallVector<unsigned, 1> Vals;
+ unsigned CurVersion = 1;
+ Vals.push_back(CurVersion);
+ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
// Analyze the module, enumerating globals, functions, etc.
ValueEnumerator VE(M);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 205480a4692..7a1c049d522 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -635,7 +635,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
--R;
const unsigned NewSuperReg = Order[R];
// Don't consider non-allocatable registers
- if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
+ if (!MRI.isAllocatable(NewSuperReg)) continue;
// Don't replace a register with itself.
if (NewSuperReg == SuperReg) continue;
@@ -818,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
- if (!RegClassInfo.isAllocatable(AntiDepReg)) {
+ if (!MRI.isAllocatable(AntiDepReg)) {
// Don't break anti-dependencies on non-allocatable registers.
DEBUG(dbgs() << " (non-allocatable)\n");
continue;
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 32ad34a76d6..7cde136c5ef 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -29,6 +29,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
std::pair<unsigned, unsigned> HintPair =
VRM.getRegInfo().getRegAllocationHint(VirtReg);
+ const MachineRegisterInfo &MRI = VRM.getRegInfo();
// HintPair.second is a register, phys or virt.
Hint = HintPair.second;
@@ -52,7 +53,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
unsigned *P = new unsigned[Order.size()];
Begin = P;
for (unsigned i = 0; i != Order.size(); ++i)
- if (!RCI.isReserved(Order[i]))
+ if (!MRI.isReserved(Order[i]))
*P++ = Order[i];
End = P;
@@ -69,7 +70,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
// The hint must be a valid physreg for allocation.
if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
- !RC->contains(Hint) || RCI.isReserved(Hint)))
+ !RC->contains(Hint) || MRI.isReserved(Hint)))
Hint = 0;
}
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 09e30eba579..5162ad762e7 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -314,8 +314,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// the return. Ignore noalias because it doesn't affect the call sequence.
const Function *F = ExitBB->getParent();
Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
- if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr)
- .removeAttribute(Attributes::NoAlias).hasAttributes())
+ if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) !=
+ AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias))
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
@@ -356,7 +356,7 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
- if (Attributes::Builder(CallerRetAttr)
+ if (AttrBuilder(CallerRetAttr)
.removeAttribute(Attributes::NoAlias).hasAttributes())
return false;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d74a70362a2..4de98da655b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -385,7 +385,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// - __tlv_bootstrap - used to make sure support exists
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
- unsigned PtrSize = TD->getPointerSizeInBits()/8;
+ unsigned AS = GV->getType()->getAddressSpace();
+ unsigned PtrSize = TD->getPointerSizeInBits(AS)/8;
OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize, 0);
OutStreamer.EmitIntValue(0, PtrSize, 0);
@@ -1299,7 +1300,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
// Emit the function pointers in the target-specific order
const DataLayout *TD = TM.getDataLayout();
- unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment(0));
std::stable_sort(Structors.begin(), Structors.end(), priority_order);
for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
const MCSection *OutputSection =
@@ -1480,8 +1481,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
if (Offset == 0)
return Base;
+ unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
// Truncate/sext the offset to the pointer size.
- unsigned Width = TD.getPointerSizeInBits();
+ unsigned Width = TD.getPointerSizeInBits(AS);
if (Width < 64)
Offset = SignExtend64(Offset, Width);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d94e1fe61bf..6c17af2e8c8 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
switch (Encoding & 0x07) {
default: llvm_unreachable("Invalid encoded value.");
- case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
+ case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(0);
case dwarf::DW_EH_PE_udata2: return 2;
case dwarf::DW_EH_PE_udata4: return 4;
case dwarf::DW_EH_PE_udata8: return 8;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4d73b3c2226..73e18cd817b 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -200,7 +200,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
case dwarf::DW_FORM_addr:
- Size = Asm->getDataLayout().getPointerSize(); break;
+ Size = Asm->getDataLayout().getPointerSize(0); break;
default: llvm_unreachable("DIE Value form not supported yet");
}
Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -222,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_data8: return sizeof(int64_t);
case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
- case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
+ case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(0);
default: llvm_unreachable("DIE Value form not supported yet");
}
}
@@ -249,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getDataLayout().getPointerSize(0);
}
#ifndef NDEBUG
@@ -273,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getDataLayout().getPointerSize(0);
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index f93ea1b045b..28a96f3b2b6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -214,9 +214,6 @@ namespace llvm {
///
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *) { return true; }
-
#ifndef NDEBUG
virtual void print(raw_ostream &O) = 0;
void dump();
@@ -257,7 +254,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIEInteger *) { return true; }
static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
#ifndef NDEBUG
@@ -286,7 +282,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIELabel *) { return true; }
static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
#ifndef NDEBUG
@@ -313,7 +308,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIEDelta *) { return true; }
static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
#ifndef NDEBUG
@@ -343,7 +337,6 @@ namespace llvm {
}
// Implement isa/cast/dyncast.
- static bool classof(const DIEEntry *) { return true; }
static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
#ifndef NDEBUG
@@ -383,7 +376,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIEBlock *) { return true; }
static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 6acf19ee8c4..df162e07a88 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -384,7 +384,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
// DW_AT_ranges appropriately.
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
DebugRangeSymbols.size()
- * Asm->getDataLayout().getPointerSize());
+ * Asm->getDataLayout().getPointerSize(0));
for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -450,7 +450,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
// DW_AT_ranges appropriately.
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
DebugRangeSymbols.size()
- * Asm->getDataLayout().getPointerSize());
+ * Asm->getDataLayout().getPointerSize(0));
for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -1765,7 +1765,7 @@ void DwarfDebug::emitDebugInfo() {
Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
DwarfAbbrevSectionSym);
Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0));
emitDIE(Die);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
@@ -1811,14 +1811,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(0);
Asm->OutStreamer.AddComment("Op size");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0) + 1);
Asm->OutStreamer.AddComment("DW_LNE_set_address");
Asm->EmitInt8(dwarf::DW_LNE_set_address);
Asm->OutStreamer.AddComment("Section end label");
Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
- Asm->getDataLayout().getPointerSize(),
+ Asm->getDataLayout().getPointerSize(0),
0/*AddrSpace*/);
// Mark end of matrix.
@@ -2047,7 +2047,7 @@ void DwarfDebug::emitDebugLoc() {
// Start the dwarf loc section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
- unsigned char Size = Asm->getDataLayout().getPointerSize();
+ unsigned char Size = Asm->getDataLayout().getPointerSize(0);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
for (SmallVector<DotDebugLocEntry, 4>::iterator
@@ -2144,7 +2144,7 @@ void DwarfDebug::emitDebugRanges() {
// Start the dwarf ranges section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
- unsigned char Size = Asm->getDataLayout().getPointerSize();
+ unsigned char Size = Asm->getDataLayout().getPointerSize(0);
for (SmallVector<const MCSymbol *, 8>::iterator
I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
I != E; ++I) {
@@ -2202,7 +2202,7 @@ void DwarfDebug::emitDebugInlineInfo() {
Asm->OutStreamer.AddComment("Dwarf Version");
Asm->EmitInt16(dwarf::DWARF_VERSION);
Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0));
for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
E = InlinedSPNodes.end(); I != E; ++I) {
@@ -2233,7 +2233,7 @@ void DwarfDebug::emitDebugInlineInfo() {
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
Asm->OutStreamer.EmitSymbolValue(LI->first,
- Asm->getDataLayout().getPointerSize(),0);
+ Asm->getDataLayout().getPointerSize(0),0);
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 08fb6b3f52c..31d07141a1d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() {
// that we're omitting that bit.
TTypeEncoding = dwarf::DW_EH_PE_omit;
// dwarf::DW_EH_PE_absptr
- TypeFormatSize = Asm->getDataLayout().getPointerSize();
+ TypeFormatSize = Asm->getDataLayout().getPointerSize(0);
} else {
// Okay, we have actual filters or typeinfos to emit. As such, we need to
// pick a type encoding for them. We're about to emit a list of pointers to
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index f7c011968c2..d0e27d1d04d 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
/// either condition is detected in a function which uses the GC.
///
void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(0);
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index bc5258ef7d8..dee339a4586 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -164,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
continue;
float hweight = Hint[hint] += weight;
if (TargetRegisterInfo::isPhysicalRegister(hint)) {
- if (hweight > bestPhys && LIS.isAllocatable(hint))
+ if (hweight > bestPhys && mri.isAllocatable(hint))
bestPhys = hweight, hintPhys = hint;
} else {
if (hweight > bestVirt)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index cdae33c2441..22b91409240 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
if (MinAlign > (int)Align)
Align = MinAlign;
MF.getFrameInfo()->ensureMaxAlignment(Align);
- TM.getTargetLowering()->HandleByVal(this, Size);
+ TM.getTargetLowering()->HandleByVal(this, Size, Align);
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a9de1c7490f..377b4712bea 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (Edge->getKind() == SDep::Anti) {
AntiDepReg = Edge->getReg();
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
- if (!RegClassInfo.isAllocatable(AntiDepReg))
+ if (!MRI.isAllocatable(AntiDepReg))
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
else if (KeepRegs.test(AntiDepReg))
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index b4394e8d56e..8964269dde5 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -33,7 +33,6 @@ namespace {
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
BitVector LivePhysRegs;
- BitVector ReservedRegs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// Don't delete live physreg defs, or any reserved register defs.
- if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+ if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
} else {
if (!MRI->use_nodbg_empty(Reg))
@@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
- // Treat reserved registers as always live.
- ReservedRegs = TRI->getReservedRegs(MF);
-
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
@@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = &*I;
// Start out assuming that reserved registers are live out of this block.
- LivePhysRegs = ReservedRegs;
+ LivePhysRegs = MRI->getReservedRegs();
// Also add any explicit live-out physregs for this block.
if (!MBB->empty() && MBB->back().isReturn())
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 141f8edc839..65bc4af99e2 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -110,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
DomTree = &getAnalysis<MachineDominatorTree>();
if (!LRCalc)
LRCalc = new LiveRangeCalc();
- AllocatableRegs = TRI->getAllocatableSet(fn);
- ReservedRegs = TRI->getReservedRegs(fn);
// Allocate space for all virtual registers.
VirtRegIntervals.resize(MRI->getNumVirtRegs());
@@ -542,11 +540,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
// Ignore uses of reserved registers. We only track defs of those.
for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
unsigned Root = *Roots;
- if (!isReserved(Root) && !MRI->reg_empty(Root))
+ if (!MRI->isReserved(Root) && !MRI->reg_empty(Root))
LRCalc->extendToUses(LI, Root);
for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
unsigned Reg = *Supers;
- if (!isReserved(Reg) && !MRI->reg_empty(Reg))
+ if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
LRCalc->extendToUses(LI, Reg);
}
}
@@ -761,38 +759,41 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
LI->removeRange(Kill, MBBEnd);
if (EndPoints) EndPoints->push_back(MBBEnd);
- // Find all blocks that are reachable from MBB without leaving VNI's live
- // range.
- for (df_iterator<MachineBasicBlock*>
- I = df_begin(KillMBB), E = df_end(KillMBB); I != E;) {
- MachineBasicBlock *MBB = *I;
- // KillMBB itself was already handled.
- if (MBB == KillMBB) {
- ++I;
- continue;
- }
+ // Find all blocks that are reachable from KillMBB without leaving VNI's live
+ // range. It is possible that KillMBB itself is reachable, so start a DFS
+ // from each successor.
+ typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+ VisitedTy Visited;
+ for (MachineBasicBlock::succ_iterator
+ SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+ SuccI != SuccE; ++SuccI) {
+ for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+ I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I != E;) {
+ MachineBasicBlock *MBB = *I;
+
+ // Check if VNI is live in to MBB.
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+ LiveRangeQuery LRQ(*LI, MBBStart);
+ if (LRQ.valueIn() != VNI) {
+ // This block isn't part of the VNI live range. Prune the search.
+ I.skipChildren();
+ continue;
+ }
- // Check if VNI is live in to MBB.
- tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
- LiveRangeQuery LRQ(*LI, MBBStart);
- if (LRQ.valueIn() != VNI) {
- // This block isn't part of the VNI live range. Prune the search.
- I.skipChildren();
- continue;
- }
+ // Prune the search if VNI is killed in MBB.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(MBBStart, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ I.skipChildren();
+ continue;
+ }
- // Prune the search if VNI is killed in MBB.
- if (LRQ.endPoint() < MBBEnd) {
- LI->removeRange(MBBStart, LRQ.endPoint());
- if (EndPoints) EndPoints->push_back(LRQ.endPoint());
- I.skipChildren();
- continue;
+ // VNI is live through MBB.
+ LI->removeRange(MBBStart, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+ ++I;
}
-
- // VNI is live through MBB.
- LI->removeRange(MBBStart, MBBEnd);
- if (EndPoints) EndPoints->push_back(MBBEnd);
- ++I;
}
}
@@ -1007,246 +1008,252 @@ private:
LiveIntervals& LIS;
const MachineRegisterInfo& MRI;
const TargetRegisterInfo& TRI;
+ SlotIndex OldIdx;
SlotIndex NewIdx;
-
- typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
- typedef DenseSet<IntRangePair> RangeSet;
-
- struct RegRanges {
- LiveRange* Use;
- LiveRange* EC;
- LiveRange* Dead;
- LiveRange* Def;
- RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
- };
- typedef DenseMap<unsigned, RegRanges> BundleRanges;
+ SmallPtrSet<LiveInterval*, 8> Updated;
+ bool UpdateFlags;
public:
HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
- const TargetRegisterInfo& TRI, SlotIndex NewIdx)
- : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
-
- // Update intervals for all operands of MI from OldIdx to NewIdx.
- // This assumes that MI used to be at OldIdx, and now resides at
- // NewIdx.
- void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
- assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
-
- // Collect the operands.
- RangeSet Entering, Internal, Exiting;
- bool hasRegMaskOp = false;
- collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-
- // To keep the LiveRanges valid within an interval, move the ranges closest
- // to the destination first. This prevents ranges from overlapping, to that
- // APIs like removeRange still work.
- if (NewIdx < OldIdx) {
- moveAllEnteringFrom(OldIdx, Entering);
- moveAllInternalFrom(OldIdx, Internal);
- moveAllExitingFrom(OldIdx, Exiting);
- }
- else {
- moveAllExitingFrom(OldIdx, Exiting);
- moveAllInternalFrom(OldIdx, Internal);
- moveAllEnteringFrom(OldIdx, Entering);
- }
-
- if (hasRegMaskOp)
- updateRegMaskSlots(OldIdx);
-
-#ifndef NDEBUG
- LIValidator validator;
- validator = std::for_each(Entering.begin(), Entering.end(), validator);
- validator = std::for_each(Internal.begin(), Internal.end(), validator);
- validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
- assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
-#endif
-
+ const TargetRegisterInfo& TRI,
+ SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+ : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+ UpdateFlags(UpdateFlags) {}
+
+ // FIXME: UpdateFlags is a workaround that creates live intervals for all
+ // physregs, even those that aren't needed for regalloc, in order to update
+ // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+ // flags, and postRA passes will use a live register utility instead.
+ LiveInterval *getRegUnitLI(unsigned Unit) {
+ if (UpdateFlags)
+ return &LIS.getRegUnit(Unit);
+ return LIS.getCachedRegUnit(Unit);
}
- // Update intervals for all operands of MI to refer to BundleStart's
- // SlotIndex.
- void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
- if (MI == BundleStart)
- return; // Bundling instr with itself - nothing to do.
-
- SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
- assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
- "SlotIndex <-> Instruction mapping broken for MI");
-
- // Collect all ranges already in the bundle.
- MachineBasicBlock::instr_iterator BII(BundleStart);
- RangeSet Entering, Internal, Exiting;
- bool hasRegMaskOp = false;
- collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
- assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
- for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
- if (&*BII == MI)
+ /// Update all live ranges touched by MI, assuming a move from OldIdx to
+ /// NewIdx.
+ void updateAllRanges(MachineInstr *MI) {
+ DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+ bool hasRegMask = false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (MO->isRegMask())
+ hasRegMask = true;
+ if (!MO->isReg())
continue;
- collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
- assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
- }
-
- BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-
- Entering.clear();
- Internal.clear();
- Exiting.clear();
- collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
- assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-
- DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
- DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
- DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
-
- moveAllEnteringFromInto(OldIdx, Entering, BR);
- moveAllInternalFromInto(OldIdx, Internal, BR);
- moveAllExitingFromInto(OldIdx, Exiting, BR);
+ // Aggressively clear all kill flags.
+ // They are reinserted by VirtRegRewriter.
+ if (MO->isUse())
+ MO->setIsKill(false);
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ updateRange(LIS.getInterval(Reg));
+ continue;
+ }
-#ifndef NDEBUG
- LIValidator validator;
- validator = std::for_each(Entering.begin(), Entering.end(), validator);
- validator = std::for_each(Internal.begin(), Internal.end(), validator);
- validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
- assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
-#endif
+ // For physregs, only update the regunits that actually have a
+ // precomputed live range.
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = getRegUnitLI(*Units))
+ updateRange(*LI);
+ }
+ if (hasRegMask)
+ updateRegMaskSlots();
}
private:
+ /// Update a single live range, assuming an instruction has been moved from
+ /// OldIdx to NewIdx.
+ void updateRange(LiveInterval &LI) {
+ if (!Updated.insert(&LI))
+ return;
+ DEBUG({
+ dbgs() << " ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ dbgs() << PrintReg(LI.reg);
+ else
+ dbgs() << PrintRegUnit(LI.reg, &TRI);
+ dbgs() << ":\t" << LI << '\n';
+ });
+ if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+ handleMoveDown(LI);
+ else
+ handleMoveUp(LI);
+ DEBUG(dbgs() << " -->\t" << LI << '\n');
+ LI.verify();
+ }
+
+ /// Update LI to reflect an instruction has been moved downwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Move def to NewIdx, assert endpoint after NewIdx.
+ ///
+ /// 2. Live def at OldIdx, killed at NewIdx:
+ /// Change to dead def at NewIdx.
+ /// (Happens when bundling def+kill together).
+ ///
+ /// 3. Dead def at OldIdx:
+ /// Move def to NewIdx, possibly across another live value.
+ ///
+ /// 4. Def at OldIdx AND at NewIdx:
+ /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value read at OldIdx, killed before NewIdx:
+ /// Extend kill to NewIdx.
+ ///
+ void handleMoveDown(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ return;
-#ifndef NDEBUG
- class LIValidator {
- private:
- DenseSet<const LiveInterval*> Checked, Bogus;
- public:
- void operator()(const IntRangePair& P) {
- const LiveInterval* LI = P.first;
- if (Checked.count(LI))
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+ // If the live-in value already extends to NewIdx, there is nothing to do.
+ if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
return;
- Checked.insert(LI);
- if (LI->empty())
+ // Aggressively remove all kill flags from the old kill point.
+ // Kill flags shouldn't be used while live intervals exist, they will be
+ // reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+ for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse())
+ MO->setIsKill(false);
+ // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+ // overlapping ranges. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ // If this was a kill, there may also be a def. Otherwise we're done.
+ if (!isKill)
return;
- SlotIndex LastEnd = LI->begin()->start;
- for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
- LRI != LRE; ++LRI) {
- const LiveRange& LR = *LRI;
- if (LastEnd > LR.start || LR.start >= LR.end)
- Bogus.insert(LI);
- LastEnd = LR.end;
- }
- }
-
- bool rangesOk() const {
- return Bogus.empty();
- }
- };
-#endif
-
- // Collect IntRangePairs for all operands of MI that may need fixing.
- // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
- // maps).
- void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
- RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
- hasRegMaskOp = false;
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand& MO = *MOI;
-
- if (MO.isRegMask()) {
- hasRegMaskOp = true;
- continue;
- }
-
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
-
- unsigned Reg = MO.getReg();
-
- // Don't track uses of reserved registers - they're not accurate.
- // Reserved register live ranges look like a set of dead defs.
- bool Resv =
- TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg);
-
- // Collect ranges for register units. These live ranges are computed on
- // demand, so just skip any that haven't been computed yet.
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
- if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
- collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx, Resv);
- } else {
- // Collect ranges for individual virtual registers.
- collectRanges(MO, &LIS.getInterval(Reg),
- Entering, Internal, Exiting, OldIdx);
- }
+ ++I;
}
- }
- void collectRanges(const MachineOperand &MO, LiveInterval *LI,
- RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
- SlotIndex OldIdx, bool IgnoreReads = false) {
- if (!IgnoreReads && MO.readsReg()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
- if (LR != 0)
- Entering.insert(std::make_pair(LI, LR));
+ // Check for a def at OldIdx.
+ if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+ return;
+ // We have a def at OldIdx.
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+ // If the defined value extends beyond NewIdx, just move the def down.
+ // This is case 1 above.
+ if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+ I->start = DefVNI->def;
+ return;
}
- if (MO.isDef()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
- assert(LR != 0 && "No live range for def?");
- if (LR->end > OldIdx.getDeadSlot())
- Exiting.insert(std::make_pair(LI, LR));
- else
- Internal.insert(std::make_pair(LI, LR));
+ // The remaining possibilities are now:
+ // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+ // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+ // In either case, it is possible that there is an existing def at NewIdx.
+ assert((I->end == OldIdx.getDeadSlot() ||
+ SlotIndex::isSameInstr(I->end, NewIdx)) &&
+ "Cannot move def below kill");
+ LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+ if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+ // coalesced into that value.
+ assert(NewI->valno != DefVNI && "Multiple defs of value?");
+ LI.removeValNo(DefVNI);
+ return;
}
+ // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+ // If the def at OldIdx was dead, we allow it to be moved across other LI
+ // values. The new range should be placed immediately before NewI, move any
+ // intermediate ranges up.
+ assert(NewI != I && "Inconsistent iterators");
+ std::copy(llvm::next(I), NewI, I);
+ *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
}
- BundleRanges createBundleRanges(RangeSet& Entering,
- RangeSet& Internal,
- RangeSet& Exiting) {
- BundleRanges BR;
+ /// Update LI to reflect an instruction has been moved upwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Hoist def to NewIdx.
+ ///
+ /// 2. Dead def at OldIdx:
+ /// Hoist def+end to NewIdx, possibly move across other values.
+ ///
+ /// 3. Dead def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at OldIdx, coalescing it with existing value.
+ ///
+ /// 4. Live def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value killed at OldIdx:
+ /// Hoist kill to NewIdx, then scan for last kill between NewIdx and
+ /// OldIdx.
+ ///
+ void handleMoveUp(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ return;
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI) {
- LiveInterval* LI = EI->first;
- LiveRange* LR = EI->second;
- BR[LI->reg].Use = LR;
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // If the live-in value isn't killed here, there is nothing to do.
+ if (!SlotIndex::isSameInstr(OldIdx, I->end))
+ return;
+ // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+ // another use, we need to search for that use. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ ++I;
+ // If OldIdx also defines a value, there couldn't have been another use.
+ if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // No def, search for the new kill.
+ // This can never be an early clobber kill since there is no def.
+ llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+ return;
+ }
}
- for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
- II != IE; ++II) {
- LiveInterval* LI = II->first;
- LiveRange* LR = II->second;
- if (LR->end.isDead()) {
- BR[LI->reg].Dead = LR;
- } else {
- BR[LI->reg].EC = LR;
+ // Now deal with the def at OldIdx.
+ assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+ // Check for an existing def at NewIdx.
+ LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+ if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ assert(NewI->valno != DefVNI && "Same value defined more than once?");
+ // There is an existing def at NewIdx.
+ if (I->end.isDead()) {
+ // Case 3: Remove the dead def at OldIdx.
+ LI.removeValNo(DefVNI);
+ return;
}
+ // Case 4: Replace def at NewIdx with live def at OldIdx.
+ I->start = DefVNI->def;
+ LI.removeValNo(NewI->valno);
+ return;
}
- for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
- EI != EE; ++EI) {
- LiveInterval* LI = EI->first;
- LiveRange* LR = EI->second;
- BR[LI->reg].Def = LR;
+ // There is no existing def at NewIdx. Hoist DefVNI.
+ if (!I->end.isDead()) {
+ // Leave the end point of a live def.
+ I->start = DefVNI->def;
+ return;
}
- return BR;
- }
-
- void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
- MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
- if (!OldKillMI->killsRegister(reg))
- return; // Bail out if we don't have kill flags on the old register.
- MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
- assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
- assert(!NewKillMI->killsRegister(reg) &&
- "New kill instr is already a kill.");
- OldKillMI->clearRegisterKills(reg, &TRI);
- NewKillMI->addRegisterKilled(reg, &TRI);
+ // DefVNI is a dead def. It may have been moved across other values in LI,
+ // so move I up to NewI. Slide [NewI;I) down one position.
+ std::copy_backward(NewI, I, llvm::next(I));
+ *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
}
- void updateRegMaskSlots(SlotIndex OldIdx) {
+ void updateRegMaskSlots() {
SmallVectorImpl<SlotIndex>::iterator RI =
std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
OldIdx);
@@ -1257,7 +1264,7 @@ private:
}
// Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
+ SlotIndex findLastUseBefore(unsigned Reg) {
SlotIndex LastUse = NewIdx;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -1291,233 +1298,26 @@ private:
}
return LastUse;
}
-
- void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- bool LiveThrough = LR->end > OldIdx.getRegSlot();
- if (LiveThrough)
- return;
- SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
- if (LastUse != NewIdx)
- moveKillFlags(LI->reg, NewIdx, LastUse);
- LR->end = LastUse.getRegSlot(LR->end.isEarlyClobber());
- }
-
- void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- // Extend the LiveRange if NewIdx is past the end.
- if (NewIdx > LR->end) {
- // Move kill flags if OldIdx was not originally the end
- // (otherwise LR->end points to an invalid slot).
- if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
- assert(LR->end > OldIdx && "LiveRange does not cover original slot");
- moveKillFlags(LI->reg, LR->end, NewIdx);
- }
- LR->end = NewIdx.getRegSlot(LR->end.isEarlyClobber());
- }
- }
-
- void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
- bool GoingUp = NewIdx < OldIdx;
-
- if (GoingUp) {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringUpFrom(OldIdx, *EI);
- } else {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringDownFrom(OldIdx, *EI);
- }
- }
-
- void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
- LR->end <= OldIdx.getDeadSlot() &&
- "Range should be internal to OldIdx.");
- LiveRange Tmp(*LR);
- Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
- Tmp.valno->def = Tmp.start;
- Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
- LI->removeRange(*LR);
- LI->addRange(Tmp);
- }
-
- void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
- for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
- II != IE; ++II)
- moveInternalFrom(OldIdx, *II);
- }
-
- void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveRange* LR = P.second;
- assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
- "Range should start in OldIdx.");
- assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
- SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
- LR->start = NewStart;
- LR->valno->def = NewStart;
- }
-
- void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
- for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
- EI != EE; ++EI)
- moveExitingFrom(OldIdx, *EI);
- }
-
- void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- bool LiveThrough = LR->end > OldIdx.getRegSlot();
- if (LiveThrough) {
- assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
- "Def in bundle should be def range.");
- assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
- "If bundle has use for this reg it should be LR.");
- BR[LI->reg].Use = LR;
- return;
- }
-
- SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
- moveKillFlags(LI->reg, OldIdx, LastUse);
-
- if (LR->start < NewIdx) {
- // Becoming a new entering range.
- assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
- "Bundle shouldn't be re-defining reg mid-range.");
- assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
- "Bundle shouldn't have different use range for same reg.");
- LR->end = LastUse.getRegSlot();
- BR[LI->reg].Use = LR;
- } else {
- // Becoming a new Dead-def.
- assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
- "Live range starting at unexpected slot.");
- assert(BR[LI->reg].Def == LR && "Reg should have def range.");
- assert(BR[LI->reg].Dead == 0 &&
- "Can't have def and dead def of same reg in a bundle.");
- LR->end = LastUse.getDeadSlot();
- BR[LI->reg].Dead = BR[LI->reg].Def;
- BR[LI->reg].Def = 0;
- }
- }
-
- void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- if (NewIdx > LR->end) {
- // Range extended to bundle. Add to bundle uses.
- // Note: Currently adds kill flags to bundle start.
- assert(BR[LI->reg].Use == 0 &&
- "Bundle already has use range for reg.");
- moveKillFlags(LI->reg, LR->end, NewIdx);
- LR->end = NewIdx.getRegSlot();
- BR[LI->reg].Use = LR;
- } else {
- assert(BR[LI->reg].Use != 0 &&
- "Bundle should already have a use range for reg.");
- }
- }
-
- void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
- BundleRanges& BR) {
- bool GoingUp = NewIdx < OldIdx;
-
- if (GoingUp) {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringUpFromInto(OldIdx, *EI, BR);
- } else {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringDownFromInto(OldIdx, *EI, BR);
- }
- }
-
- void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- // TODO: Sane rules for moving ranges into bundles.
- }
-
- void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
- BundleRanges& BR) {
- for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
- II != IE; ++II)
- moveInternalFromInto(OldIdx, *II, BR);
- }
-
- void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
-
- assert(LR->start.isRegister() &&
- "Don't know how to merge exiting ECs into bundles yet.");
-
- if (LR->end > NewIdx.getDeadSlot()) {
- // This range is becoming an exiting range on the bundle.
- // If there was an old dead-def of this reg, delete it.
- if (BR[LI->reg].Dead != 0) {
- LI->removeRange(*BR[LI->reg].Dead);
- BR[LI->reg].Dead = 0;
- }
- assert(BR[LI->reg].Def == 0 &&
- "Can't have two defs for the same variable exiting a bundle.");
- LR->start = NewIdx.getRegSlot();
- LR->valno->def = LR->start;
- BR[LI->reg].Def = LR;
- } else {
- // This range is becoming internal to the bundle.
- assert(LR->end == NewIdx.getRegSlot() &&
- "Can't bundle def whose kill is before the bundle");
- if (BR[LI->reg].Dead || BR[LI->reg].Def) {
- // Already have a def for this. Just delete range.
- LI->removeRange(*LR);
- } else {
- // Make range dead, record.
- LR->end = NewIdx.getDeadSlot();
- BR[LI->reg].Dead = LR;
- assert(BR[LI->reg].Use == LR &&
- "Range becoming dead should currently be use.");
- }
- // In both cases the range is no longer a use on the bundle.
- BR[LI->reg].Use = 0;
- }
- }
-
- void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
- BundleRanges& BR) {
- for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
- EI != EE; ++EI)
- moveExitingFromInto(OldIdx, *EI, BR);
- }
-
};
-void LiveIntervals::handleMove(MachineInstr* MI) {
+void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
+ assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
Indexes->removeMachineInstrFromMaps(MI);
- SlotIndex NewIndex = MI->isInsideBundle() ?
- Indexes->getInstructionIndex(MI) :
- Indexes->insertMachineInstrInMaps(MI);
+ SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
OldIndex < getMBBEndIdx(MI->getParent()) &&
"Cannot handle moves across basic block boundaries.");
- assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
- HMEditor HME(*this, *MRI, *TRI, NewIndex);
- HME.moveAllRangesFrom(MI, OldIndex);
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
}
void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
- MachineInstr* BundleStart) {
+ MachineInstr* BundleStart,
+ bool UpdateFlags) {
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
- HMEditor HME(*this, *MRI, *TRI, NewIndex);
- HME.moveAllRangesInto(MI, BundleStart);
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
}
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 82710414b30..0dfb084f1e1 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -249,7 +249,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
unsigned Reg = MOI->getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
// Check if MI reads any unreserved physregs.
- if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+ if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
continue;
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 7359bb92a15..6ea933d4304 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -503,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
TRI = MF->getTarget().getRegisterInfo();
- ReservedRegisters = TRI->getReservedRegs(mf);
-
unsigned NumRegs = TRI->getNumRegs();
PhysRegDef = new MachineInstr*[NumRegs];
PhysRegUse = new MachineInstr*[NumRegs];
@@ -588,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
unsigned MOReg = UseRegs[i];
if (TargetRegisterInfo::isVirtualRegister(MOReg))
HandleVirtRegUse(MOReg, MBB, MI);
- else if (!ReservedRegisters[MOReg])
+ else if (!MRI->isReserved(MOReg))
HandlePhysRegUse(MOReg, MI);
}
@@ -601,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
unsigned MOReg = DefRegs[i];
if (TargetRegisterInfo::isVirtualRegister(MOReg))
HandleVirtRegDef(MOReg, MI);
- else if (!ReservedRegisters[MOReg])
+ else if (!MRI->isReserved(MOReg))
HandlePhysRegDef(MOReg, MI, Defs);
}
UpdatePhysRegDefs(MI, Defs);
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 896461fd194..0f260205df2 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -63,8 +63,6 @@ namespace {
virtual void releaseMemory() {
ScopeMap.clear();
Exps.clear();
- AllocatableRegs.clear();
- ReservedRegs.clear();
}
private:
@@ -78,8 +76,6 @@ namespace {
ScopedHTType VNT;
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
- BitVector AllocatableRegs;
- BitVector ReservedRegs;
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
@@ -242,7 +238,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
return false;
for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
- if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+ if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
// Avoid extending live range of physical registers if they are
//allocatable or reserved.
return false;
@@ -635,7 +631,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<MachineDominatorTree>();
- AllocatableRegs = TRI->getAllocatableSet(MF);
- ReservedRegs = TRI->getReservedRegs(MF);
return PerformCSE(DT->getRootNode());
}
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index bac3aa2c155..4a793281b2c 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -16,6 +16,7 @@
#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -32,7 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted");
namespace {
class MachineCopyPropagation : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
- BitVector ReservedRegs;
+ MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification, replacement for typeid
@@ -146,8 +147,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
if (CI != AvailCopyMap.end()) {
MachineInstr *CopyMI = CI->second;
- if (!ReservedRegs.test(Def) &&
- (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+ if (!MRI->isReserved(Def) &&
+ (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
isNopCopy(CopyMI, Def, Src, TRI)) {
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
@@ -259,7 +260,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
DI != DE; ++DI) {
unsigned Reg = (*DI)->getOperand(0).getReg();
- if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+ if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
continue;
(*DI)->eraseFromParent();
Changed = true;
@@ -296,7 +297,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
for (SmallSetVector<MachineInstr*, 8>::iterator
DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
DI != DE; ++DI) {
- if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+ if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
(*DI)->eraseFromParent();
Changed = true;
++NumDeletes;
@@ -311,7 +312,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
TRI = MF.getTarget().getRegisterInfo();
- ReservedRegs = TRI->getReservedRegs(MF);
+ MRI = &MF.getRegInfo();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
Changed |= CopyPropagateBlock(*I);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 91d52118576..f11785070bb 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -550,7 +550,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
// address of a block, in which case it is the pointer size.
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
- return TD.getPointerSize();
+ return TD.getPointerSize(0);
case MachineJumpTableInfo::EK_GPRel64BlockAddress:
return 8;
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
@@ -570,7 +570,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
// alignment.
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
- return TD.getPointerABIAlignment();
+ return TD.getPointerABIAlignment(0);
case MachineJumpTableInfo::EK_GPRel64BlockAddress:
return TD.getABIIntegerTypeAlignment(64);
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 5fb938f3400..ae7c15be158 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -306,22 +306,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
ReservedRegs = TRI->getReservedRegs(MF);
+ assert(ReservedRegs.size() == TRI->getNumRegs() &&
+ "Invalid ReservedRegs vector from target");
}
bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
- // Check if any overlapping register is modified.
+ // Check if any overlapping register is modified, or allocatable so it may be
+ // used later.
for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
- if (!def_empty(*AI))
- return false;
-
- // Check if any overlapping register is allocatable so it may be used later.
- if (AllocatableRegs.empty())
- AllocatableRegs = TRI->getAllocatableSet(MF);
- for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
- if (AllocatableRegs.test(*AI))
+ if (!def_empty(*AI) || isAllocatable(*AI))
return false;
return true;
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 11a7d4760cb..c55e8b78988 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
@@ -359,7 +360,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
BB->splice(InsertPos, BB, MI);
// Update LiveIntervals
- LIS->handleMove(MI);
+ LIS->handleMove(MI, /*UpdateFlags=*/true);
// Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
@@ -451,26 +452,6 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
}
}
-// Release all DAG roots for scheduling.
-void ScheduleDAGMI::releaseRoots() {
- SmallVector<SUnit*, 16> BotRoots;
-
- for (std::vector<SUnit>::iterator
- I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
- // A SUnit is ready to top schedule if it has no predecessors.
- if (I->Preds.empty())
- SchedImpl->releaseTopNode(&(*I));
- // A SUnit is ready to bottom schedule if it has no successors.
- if (I->Succs.empty())
- BotRoots.push_back(&(*I));
- }
- // Release bottom roots in reverse order so the higher priority nodes appear
- // first. This is more natural and slightly more efficient.
- for (SmallVectorImpl<SUnit*>::const_reverse_iterator
- I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
- SchedImpl->releaseBottomNode(*I);
-}
-
/// schedule - Called back from MachineScheduler::runOnMachineFunction
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
/// only includes instructions that have DAG nodes, not scheduling boundaries.
@@ -532,8 +513,29 @@ void ScheduleDAGMI::postprocessDAG() {
}
}
+// Release all DAG roots for scheduling.
+void ScheduleDAGMI::releaseRoots() {
+ SmallVector<SUnit*, 16> BotRoots;
+
+ for (std::vector<SUnit>::iterator
+ I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (I->Preds.empty())
+ SchedImpl->releaseTopNode(&(*I));
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (I->Succs.empty())
+ BotRoots.push_back(&(*I));
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+ SchedImpl->releaseBottomNode(*I);
+}
+
/// Identify DAG roots and setup scheduler queues.
void ScheduleDAGMI::initQueues() {
+
// Initialize the strategy before modifying the DAG.
SchedImpl->initialize(this);
@@ -544,6 +546,8 @@ void ScheduleDAGMI::initQueues() {
// Release all DAG roots for scheduling.
releaseRoots();
+ SchedImpl->registerRoots();
+
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
CurrentBottom = RegionEnd;
}
@@ -1198,6 +1202,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
createConvergingSched);
//===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+ ScheduleDAGILP *ILP;
+ bool MaximizeILP;
+
+ ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
+
+ /// \brief Apply a less-than relation on node priority.
+ bool operator()(const SUnit *A, const SUnit *B) const {
+ // Return true if A comes after B in the Q.
+ if (MaximizeILP)
+ return ILP->getILP(A) < ILP->getILP(B);
+ else
+ return ILP->getILP(A) > ILP->getILP(B);
+ }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+ ScheduleDAGILP ILP;
+ ILPOrder Cmp;
+
+ std::vector<SUnit*> ReadyQ;
+public:
+ ILPScheduler(bool MaximizeILP)
+ : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
+
+ virtual void initialize(ScheduleDAGMI *DAG) {
+ ReadyQ.clear();
+ ILP.resize(DAG->SUnits.size());
+ }
+
+ virtual void registerRoots() {
+ for (std::vector<SUnit*>::const_iterator
+ I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
+ ILP.computeILP(*I);
+ }
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ if (ReadyQ.empty()) return NULL;
+ pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ SUnit *SU = ReadyQ.back();
+ ReadyQ.pop_back();
+ IsTopNode = false;
+ DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
+ << " ILP: " << ILP.getILP(SU) << '\n');
+ return SU;
+ }
+
+ virtual void schedNode(SUnit *, bool) {}
+
+ virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+ virtual void releaseBottomNode(SUnit *SU) {
+ ReadyQ.push_back(SU);
+ std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+ "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+ "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
// Machine Instruction Shuffler for Correctness Testing
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index bc383cba455..b117f8c3a20 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -49,7 +49,6 @@ namespace {
MachineDominatorTree *DT; // Machine dominator tree
MachineLoopInfo *LI;
AliasAnalysis *AA;
- BitVector AllocatableSet; // Which physregs are allocatable?
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
@@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- AllocatableSet = TRI->getAllocatableSet(MF);
bool EverMadeChange = false;
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index b3abec76bc9..9686b041329 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -850,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep,
return false;
}
-/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
-/// of all the blocks in Trace. Stop when reaching the block that contains
-/// DefMI.
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
void MachineTraceMetrics::Ensemble::
-addLiveIns(const MachineInstr *DefMI,
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace) {
assert(!Trace.empty() && "Trace should contain at least one block");
- unsigned Reg = DefMI->getOperand(0).getReg();
+ unsigned Reg = DefMI->getOperand(DefOp).getReg();
assert(TargetRegisterInfo::isVirtualRegister(Reg));
const MachineBasicBlock *DefMBB = DefMI->getParent();
@@ -950,7 +950,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
if (pushDepHeight(Deps.front(), PHI, Height,
Heights, MTM.SchedModel, MTM.TII))
- addLiveIns(Deps.front().DefMI, Stack);
+ addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
}
}
}
@@ -983,7 +983,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
// Update the required height of any virtual registers read by MI.
for (unsigned i = 0, e = Deps.size(); i != e; ++i)
if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
- addLiveIns(Deps[i].DefMI, Stack);
+ addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
InstrCycles &MICycles = Cycles[MI];
MICycles.Height = Cycle;
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
index 5f3b1d23e41..460730b0405 100644
--- a/lib/CodeGen/MachineTraceMetrics.h
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -279,7 +279,7 @@ public:
unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
void computeInstrDepths(const MachineBasicBlock*);
void computeInstrHeights(const MachineBasicBlock*);
- void addLiveIns(const MachineInstr *DefMI,
+ void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace);
protected:
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 181e09ecc9e..dca68da2f3e 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -80,7 +80,6 @@ namespace {
BlockSet FunctionBlocks;
BitVector regsReserved;
- BitVector regsAllocatable;
RegSet regsLive;
RegVector regsDefined, regsDead, regsKilled;
RegMaskVector regMasks;
@@ -186,7 +185,7 @@ namespace {
}
bool isAllocatable(unsigned Reg) {
- return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+ return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
}
// Analysis information if available
@@ -427,7 +426,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
void MachineVerifier::visitMachineFunctionBefore() {
lastIndex = SlotIndex();
- regsReserved = TRI->getReservedRegs(*MF);
+ regsReserved = MRI->getReservedRegs();
// A sub-register of a reserved register is also reserved
for (int Reg = regsReserved.find_first(); Reg>=0;
@@ -439,8 +438,6 @@ void MachineVerifier::visitMachineFunctionBefore() {
}
}
- regsAllocatable = TRI->getAllocatableSet(*MF);
-
markReachable(&MF->front());
// Build a set of the basic blocks in the function.
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 4ea21d4ff7b..abd62efc026 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -359,7 +359,7 @@ void TargetPassConfig::addIRPasses() {
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
- addPass(createLoopStrengthReducePass(getTargetLowering()));
+ addPass(createLoopStrengthReducePass());
if (PrintLSR)
addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
@@ -389,7 +389,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
- addPass(createLowerInvokePass(TM->getTargetLowering()));
+ addPass(createLowerInvokePass());
// The lower invoke pass may create unreachable code. Remove it.
addPass(createUnreachableBlockEliminationPass());
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 9099862bd31..a795ac8448f 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SeenMoveImm = true;
} else {
Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+ // optimizeExtInstr might have created new instructions after MI
+ // and before the already incremented MII. Adjust MII so that the
+ // next iteration sees the new instructions.
+ MII = MI;
+ ++MII;
if (SeenMoveImm)
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 32c02bf0f03..d57bc7362de 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -490,7 +490,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
BitVector killedRegs(TRI->getNumRegs());
- BitVector ReservedRegs = TRI->getReservedRegs(MF);
StartBlockForKills(MBB);
@@ -531,7 +530,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
unsigned Reg = MO.getReg();
- if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
bool kill = false;
if (!killedRegs.test(Reg)) {
@@ -566,7 +565,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
- if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
LiveRegs.set(Reg);
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index f573d419ea5..e096240e04b 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -509,7 +509,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
// Ignore invalid hints.
if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
- !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
+ !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
Hint = 0;
// Take hint when possible.
@@ -838,7 +838,7 @@ void RAFast::AllocateBasicBlock() {
// Add live-in registers as live.
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I)
- if (RegClassInfo.isAllocatable(*I))
+ if (MRI->isAllocatable(*I))
definePhysReg(MII, *I, regReserved);
SmallVector<unsigned, 8> VirtDead;
@@ -970,7 +970,7 @@ void RAFast::AllocateBasicBlock() {
}
continue;
}
- if (!RegClassInfo.isAllocatable(Reg)) continue;
+ if (!MRI->isAllocatable(Reg)) continue;
if (MO.isUse()) {
usePhysReg(MO);
} else if (MO.isEarlyClobber()) {
@@ -1058,7 +1058,7 @@ void RAFast::AllocateBasicBlock() {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!RegClassInfo.isAllocatable(Reg)) continue;
+ if (!MRI->isAllocatable(Reg)) continue;
definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
regFree : regReserved);
continue;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 984aab2a7a8..9320993d90a 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -208,8 +208,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
mri->setPhysRegUsed(Reg);
}
- BitVector reservedRegs = tri->getReservedRegs(*mf);
-
// Iterate over vregs.
for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
vregItr != vregEnd; ++vregItr) {
@@ -227,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
for (unsigned i = 0; i != rawOrder.size(); ++i) {
unsigned preg = rawOrder[i];
- if (reservedRegs.test(preg))
+ if (mri->isReserved(preg))
continue;
// vregLI crosses a regmask operand that clobbers preg.
@@ -357,7 +355,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
loopInfo->getLoopDepth(mbb));
if (cp.isPhys()) {
- if (!lis->isAllocatable(dst)) {
+ if (!mf->getRegInfo().isAllocatable(dst)) {
continue;
}
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 652bc3015a3..805d2356730 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -15,8 +15,9 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
CalleeSaved = CSR;
// Different reserved registers?
- BitVector RR = TRI->getReservedRegs(*MF);
- if (RR != Reserved)
+ const BitVector &RR = MF->getRegInfo().getReservedRegs();
+ if (Reserved.size() != RR.size() || RR != Reserved) {
Update = true;
- Reserved = RR;
+ Reserved = RR;
+ }
// Invalidate cached information from previous function.
if (Update)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1b46256baf2..ba6b4569a8f 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -895,7 +895,7 @@ bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
/// always beneficial.
- if (!RegClassInfo.isReserved(CP.getDstReg())) {
+ if (!MRI->isReserved(CP.getDstReg())) {
DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
return false;
}
@@ -1070,7 +1070,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
/// Attempt joining with a reserved physreg.
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
assert(CP.isPhys() && "Must be a physreg copy");
- assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register");
+ assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
<< '\n');
@@ -1241,6 +1241,9 @@ class JoinVals {
// Value in the other live range that overlaps this def, if any.
VNInfo *OtherVNI;
+ // Is this value an IMPLICIT_DEF?
+ bool IsImplicitDef;
+
// True when the live range of this value will be pruned because of an
// overlapping CR_Replace value in the other live range.
bool Pruned;
@@ -1249,7 +1252,8 @@ class JoinVals {
bool PrunedComputed;
Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
- RedefVNI(0), OtherVNI(0), Pruned(false), PrunedComputed(false) {}
+ RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
+ PrunedComputed(false) {}
bool isAnalyzed() const { return WriteLanes != 0; }
};
@@ -1385,8 +1389,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
}
// An IMPLICIT_DEF writes undef values.
- if (DefMI->isImplicitDef())
+ if (DefMI->isImplicitDef()) {
+ V.IsImplicitDef = true;
V.ValidLanes &= ~V.WriteLanes;
+ }
}
// Find the value in Other that overlaps VNI->def, if any.
@@ -1724,22 +1730,34 @@ void JoinVals::pruneValues(JoinVals &Other,
switch (Vals[i].Resolution) {
case CR_Keep:
break;
- case CR_Replace:
+ case CR_Replace: {
// This value takes precedence over the value in Other.LI.
LIS->pruneValue(&Other.LI, Def, &EndPoints);
- // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+ // instructions are only inserted to provide a live-out value for PHI
+ // predecessors, so the instruction should simply go away once its value
+ // has been replaced.
+ Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+ bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
if (!Def.isBlock()) {
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Also remove <def,dead> flags since the joined live range will
+ // continue past this instruction.
for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
MO.isValid(); ++MO)
- if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg)
- MO->setIsUndef(false);
- // This value will reach instructions below, but we need to make sure
- // the live range also reaches the instruction at Def.
- EndPoints.push_back(Def);
+ if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+ MO->setIsUndef(EraseImpDef);
+ MO->setIsDead(false);
+ }
+ // This value will reach instructions below, but we need to make sure
+ // the live range also reaches the instruction at Def.
+ if (!EraseImpDef)
+ EndPoints.push_back(Def);
}
DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
<< ": " << Other.LI << '\n');
break;
+ }
case CR_Erase:
case CR_Merge:
if (isPrunedValue(i, Other)) {
@@ -1762,21 +1780,41 @@ void JoinVals::pruneValues(JoinVals &Other,
void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs) {
for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
- if (Vals[i].Resolution != CR_Erase)
- continue;
+ // Get the def location before markUnused() below invalidates it.
SlotIndex Def = LI.getValNumInfo(i)->def;
- MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
- assert(MI && "No instruction to erase");
- if (MI->isCopy()) {
- unsigned Reg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- Reg != CP.getSrcReg() && Reg != CP.getDstReg())
- ShrinkRegs.push_back(Reg);
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+ // longer. The IMPLICIT_DEF instructions are only inserted by
+ // PHIElimination to guarantee that all PHI predecessors have a value.
+ if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
+ break;
+ // Remove value number i from LI. Note that this VNInfo is still present
+ // in NewVNInfo, so it will appear as an unused value number in the final
+ // joined interval.
+ LI.getValNumInfo(i)->markUnused();
+ LI.removeValNo(LI.getValNumInfo(i));
+ DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+ // FALL THROUGH.
+
+ case CR_Erase: {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No instruction to erase");
+ if (MI->isCopy()) {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ ShrinkRegs.push_back(Reg);
+ }
+ ErasedInstrs.insert(MI);
+ DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+ default:
+ break;
}
- ErasedInstrs.insert(MI);
- DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
- LIS->RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
}
}
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index b267aea816c..94779770e0e 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -337,7 +337,7 @@ static void collectOperands(const MachineInstr *MI,
PhysRegOperands &PhysRegOpers,
VirtRegOperands &VirtRegOpers,
const TargetRegisterInfo *TRI,
- const RegisterClassInfo *RCI) {
+ const MachineRegisterInfo *MRI) {
for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
const MachineOperand &MO = *OperI;
if (!MO.isReg() || !MO.getReg())
@@ -345,7 +345,7 @@ static void collectOperands(const MachineInstr *MI,
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
VirtRegOpers.collect(MO, TRI);
- else if (RCI->isAllocatable(MO.getReg()))
+ else if (MRI->isAllocatable(MO.getReg()))
PhysRegOpers.collect(MO, TRI);
}
// Remove redundant physreg dead defs.
@@ -451,7 +451,7 @@ bool RegPressureTracker::recede() {
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Boost pressure for all dead defs together.
increasePhysRegPressure(PhysRegOpers.DeadDefs);
@@ -524,7 +524,7 @@ bool RegPressureTracker::advance() {
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Kill liveness at last uses.
for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
@@ -666,7 +666,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Boost max pressure for all dead defs together.
// Since CurrSetPressure and MaxSetPressure
@@ -752,7 +752,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Kill liveness at last uses. Assume allocatable physregs are single-use
// rather than checking LiveIntervals.
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index d673794e1b9..5ec6564ce39 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -92,9 +92,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
KillRegs.resize(NumPhysRegs);
DefRegs.resize(NumPhysRegs);
- // Create reserved registers bitvector.
- ReservedRegs = TRI->getReservedRegs(MF);
-
// Create callee-saved registers bitvector.
CalleeSavedRegs.resize(NumPhysRegs);
const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
@@ -225,9 +222,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
used = RegsAvailable;
used.flip();
if (includeReserved)
- used |= ReservedRegs;
+ used |= MRI->getReservedRegs();
else
- used.reset(ReservedRegs);
+ used.reset(MRI->getReservedRegs());
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index aa45a6861ca..8dcbf83353d 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
@@ -30,6 +31,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -933,3 +935,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
std::string ScheduleDAGInstrs::getDAGName() const {
return "dag." + BB->getFullName();
}
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+ std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+ bool isComplete() const { return DFSStack.empty(); }
+
+ void follow(const SUnit *SU) {
+ DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+ }
+ void advance() { ++DFSStack.back().second; }
+
+ void backtrack() { DFSStack.pop_back(); }
+
+ const SUnit *getCurr() const { return DFSStack.back().first; }
+
+ SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+ SUnit::const_pred_iterator getPredEnd() const {
+ return getCurr()->Preds.end();
+ }
+};
+} // anonymous
+
+void ScheduleDAGILP::resize(unsigned NumSUnits) {
+ ILPValues.resize(NumSUnits);
+}
+
+ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
+ return ILPValues[SU->NodeNum];
+}
+
+// A leaf node has an ILP of 1/1.
+static ILPValue initILP(const SUnit *SU) {
+ unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
+ return ILPValue(Cnt, 1 + SU->getDepth());
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void ScheduleDAGILP::computeILP(const SUnit *Root) {
+ if (!IsBottomUp)
+ llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+ SchedDAGReverseDFS DFS;
+ // Mark a node visited by validating it.
+ ILPValues[Root->NodeNum] = initILP(Root);
+ DFS.follow(Root);
+ for (;;) {
+ // Traverse the leftmost path as far as possible.
+ while (DFS.getPred() != DFS.getPredEnd()) {
+ const SUnit *PredSU = DFS.getPred()->getSUnit();
+ DFS.advance();
+ // If the pred is already valid, skip it.
+ if (ILPValues[PredSU->NodeNum].isValid())
+ continue;
+ ILPValues[PredSU->NodeNum] = initILP(PredSU);
+ DFS.follow(PredSU);
+ }
+ // Visit the top of the stack in postorder and backtrack.
+ unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
+ DFS.backtrack();
+ if (DFS.isComplete())
+ break;
+ // Add the recently finished predecessor's bottom-up descendent count.
+ ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+ if (!isValid())
+ OS << "BADILP";
+ OS << InstrCount << " / " << Cycles << " = "
+ << format("%g", ((double)InstrCount / Cycles));
+}
+
+void ILPValue::dump() const {
+ dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+ Val.print(OS);
+ return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 80f495309d1..2ec129f7308 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1587,26 +1587,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
break;
case TargetLowering::Expand: {
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ ISD::CondCode InvCC = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
- case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- // FIXME: Implement more expansions.
- }
-
- SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
- SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ case ISD::SETO:
+ assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+ == TargetLowering::Legal
+ && "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+ case ISD::SETUO:
+ assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+ == TargetLowering::Legal
+ && "If SETUO is expanded, SETUNE must be legal!");
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
+ // We only support using the inverted operation and not a
+ // different manner of supporting expanding these cases.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+ LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
+ RHS = SDValue();
+ CC = SDValue();
+ return;
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ }
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
@@ -3108,6 +3153,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(1);
if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
(isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ // If div is legal, it's better to do the normal expansion
+ !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
useDivRem(Node, isSigned, false))) {
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e3938968b20..92dc5a9831b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
- if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
- == TargetLowering::Custom)
- Res = TLI.LowerOperation(SDValue(N, 0), DAG);
-
- if (Res.getNode() == 0) {
- switch (N->getOpcode()) {
- default:
- #ifndef NDEBUG
- dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
- N->dump(&DAG); dbgs() << "\n";
- #endif
- llvm_unreachable("Do not know how to expand this operator's operand!");
-
- case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
- case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
- case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
-
- case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
- case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
- case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
- case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
- case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
- case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
- case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
- OpNo); break;
- }
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
}
// If the result is null, the sub-method took care of registering results etc.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 37f0e60087d..20b7ce6b15b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -634,7 +634,7 @@ private:
SDValue WidenVecRes_InregOp(SDNode *N);
// Widen Vector Operand.
- bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+ bool WidenVectorOperand(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index bb54fd24e21..6bcb3b25e98 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -124,6 +124,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// there are only two nodes left, i.e. Lo and Hi.
SDValue LHS = Vals[Slot];
SDValue RHS = Vals[Slot + 1];
+
+ if (TLI.isBigEndian())
+ std::swap(LHS, RHS);
+
Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
EVT::getIntegerVT(
*DAG.getContext(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index eca4d99098a..d51a6eb192e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2082,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
//===----------------------------------------------------------------------===//
// Widen Vector Operand
//===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
- dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 79cfcdfe0ea..183416f3fd2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3449,9 +3449,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsZeroVal, MemcpyStrSrc,
DAG.getMachineFunction());
+ Type *vtType = VT.isExtended() ? VT.getTypeForEVT(*DAG.getContext()) : NULL;
+ unsigned AS = (vtType && vtType->isPointerTy()) ?
+ cast<PointerType>(vtType)->getAddressSpace() : 0;
if (VT == MVT::Other) {
- if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
+ if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
TLI.allowsUnalignedMemoryAccesses(VT)) {
VT = TLI.getPointerTy();
} else {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 8fc9c70f995..c314fa5b511 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -474,6 +474,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MRI.replaceRegWith(From, To);
}
+ // Freeze the set of reserved registers now that MachineFrameInfo has been
+ // set up. All the information required by getReservedRegs() should be
+ // available now.
+ MRI.freezeReservedRegs(*MF);
+
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 8ed66f70443..4439192fe2f 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -563,6 +563,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
/// Return the default expected latency for a def based on it's opcode.
unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
const MachineInstr *DefMI) const {
+ if (DefMI->isTransient())
+ return 0;
if (DefMI->mayLoad())
return SchedModel->LoadLatency;
if (isHighLatencyDef(DefMI->getOpcode()))
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 8f5d770f665..bf26a6d5920 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Flags,
SectionKind::getDataRel(),
0, Label->getName());
- unsigned Size = TM.getDataLayout()->getPointerSize();
+ unsigned Size = TM.getDataLayout()->getPointerSize(0);
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment(0));
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::Create(Size, getContext());
Streamer.EmitELFSize(Label, E);
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 4e753c6ecb4..7a6e2604d77 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -50,10 +50,12 @@ unsigned TargetSchedModel::getNumMicroOps(MachineInstr *MI) const {
int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
}
- if (hasInstrSchedModel())
- return resolveSchedClass(MI)->NumMicroOps;
-
- return 1;
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+ if (SCDesc->isValid())
+ return SCDesc->NumMicroOps;
+ }
+ return MI->isTransient() ? 0 : 1;
}
/// If we can determine the operand latency from the def only, without machine
@@ -199,7 +201,7 @@ unsigned TargetSchedModel::computeOperandLatency(
report_fatal_error(ss.str());
}
#endif
- return 1;
+ return DefMI->isTransient() ? 0 : 1;
}
unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
@@ -209,16 +211,18 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
return TII->getInstrLatency(&InstrItins, MI);
if (hasInstrSchedModel()) {
- unsigned Latency = 0;
const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
- for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
- DefIdx != DefEnd; ++DefIdx) {
- // Lookup the definition's write latency in SubtargetInfo.
- const MCWriteLatencyEntry *WLEntry =
- STI->getWriteLatencyEntry(SCDesc, DefIdx);
- Latency = std::max(Latency, WLEntry->Cycles);
+ if (SCDesc->isValid()) {
+ unsigned Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, WLEntry->Cycles);
+ }
+ return Latency;
}
- return Latency;
}
return TII->defaultDefLatency(&SchedModel, MI);
}
@@ -251,10 +255,12 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
// an unbuffered resource. If so, it treated like an in-order cpu.
if (hasInstrSchedModel()) {
const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
- for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
- *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
- if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
- return 1;
+ if (SCDesc->isValid()) {
+ for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+ *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+ if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+ return 1;
+ }
}
}
return 0;
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index a69a8169d30..bb93bdc0bc2 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -257,9 +257,6 @@ void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
-#ifndef NDEBUG
- BitVector Reserved = TRI->getReservedRegs(*MF);
-#endif
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
@@ -283,7 +280,7 @@ void VirtRegRewriter::rewrite() {
unsigned PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
"Instruction uses unmapped VirtReg");
- assert(!Reserved.test(PhysReg) && "Reserved register assignment");
+ assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
// Preserve semantics of sub-register operands.
if (MO.getSubReg()) {
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index c5c46815a28..94a2542e7ad 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -17,6 +17,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/ADT/SmallString.h"
@@ -267,7 +268,7 @@ public:
void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
const std::vector<std::string> &InputArgv) {
clear(); // Free the old contents.
- unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+ unsigned PtrSize = EE->getDataLayout()->getPointerSize(0);
Array = new char[(InputArgv.size()+1)*PtrSize];
DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
@@ -342,7 +343,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
#ifndef NDEBUG
/// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
- unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+ unsigned PtrSize = EE->getDataLayout()->getPointerSize(0);
for (unsigned i = 0; i < PtrSize; ++i)
if (*(i + (uint8_t*)Loc))
return false;
@@ -644,13 +645,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
}
case Instruction::PtrToInt: {
GenericValue GV = getConstantValue(Op0);
- uint32_t PtrWidth = TD->getPointerSizeInBits();
+ unsigned AS = cast<PointerType>(CE->getOperand(1)->getType())
+ ->getAddressSpace();
+ uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
return GV;
}
case Instruction::IntToPtr: {
GenericValue GV = getConstantValue(Op0);
- uint32_t PtrWidth = TD->getPointerSizeInBits();
+ unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
+ uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
if (PtrWidth != GV.IntVal.getBitWidth())
GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 5202b091654..326bf79c589 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1054,7 +1054,8 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
- uint32_t PtrSize = TD.getPointerSizeInBits();
+ unsigned AS = cast<PointerType>(DstTy)->getAddressSpace();
+ uint32_t PtrSize = TD.getPointerSizeInBits(AS);
if (PtrSize != Src.IntVal.getBitWidth())
Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index e16e2d112a9..f58adbe1e1a 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -376,7 +376,7 @@ GenericValue lle_X_sprintf(FunctionType *FT,
case 'x': case 'X':
if (HowLong >= 1) {
if (HowLong == 1 &&
- TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 &&
+ TheInterpreter->getDataLayout()->getPointerSizeInBits(0) == 64 &&
sizeof(long) < sizeof(int64_t)) {
// Make sure we use %lld with a 64 bit argument because we might be
// compiling LLI on a 32 bit compiler.
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 19c197903a6..bcd5b263654 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -14,7 +14,9 @@
#include "JIT.h"
#include "JITDwarfEmitter.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -66,7 +68,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
void
JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
const std::vector<MachineMove> &Moves) const {
- unsigned PointerSize = TD->getPointerSize();
+ unsigned PointerSize = TD->getPointerSize(0);
int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
PointerSize : -PointerSize;
MCSymbol *BaseLabel = 0;
@@ -378,7 +380,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
- unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+ unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(0);
unsigned TypeOffset = sizeof(int8_t) + // Call site format
// Call-site table length
@@ -454,12 +456,12 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
const GlobalVariable *GV = TypeInfos[M - 1];
if (GV) {
- if (TD->getPointerSize() == sizeof(int32_t))
+ if (TD->getPointerSize(GV->getType()->getAddressSpace()) == sizeof(int32_t))
JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
else
JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
} else {
- if (TD->getPointerSize() == sizeof(int32_t))
+ if (TD->getPointerSize(0) == sizeof(int32_t))
JCE->emitInt32(0);
else
JCE->emitInt64(0);
@@ -481,7 +483,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
unsigned char*
JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
- unsigned PointerSize = TD->getPointerSize();
+ unsigned PointerSize = TD->getPointerSize(0);
int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
PointerSize : -PointerSize;
@@ -541,7 +543,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
unsigned char* StartFunction,
unsigned char* EndFunction,
unsigned char* ExceptionTable) const {
- unsigned PointerSize = TD->getPointerSize();
+ unsigned PointerSize = TD->getPointerSize(0);
// EH frame header.
unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 1198a7be372..ecafda7286f 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -384,11 +384,6 @@ namespace {
delete MemMgr;
}
- /// classof - Methods for support type inquiry through isa, cast, and
- /// dyn_cast:
- ///
- static inline bool classof(const MachineCodeEmitter*) { return true; }
-
JITResolver &getJITResolver() { return Resolver; }
virtual void startFunction(MachineFunction &F);
@@ -1265,15 +1260,13 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
return Addr;
// Get a stub if the target supports it.
- assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
- JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+ JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
return JE->getJITResolver().getLazyFunctionStub(F);
}
void JIT::updateFunctionStub(Function *F) {
// Get the empty stub we generated earlier.
- assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
- JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+ JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
void *Addr = getPointerToGlobalIfAvailable(F);
assert(Addr != Stub && "Function must have non-stub address to be updated.");
@@ -1294,6 +1287,5 @@ void JIT::freeMachineCodeForFunction(Function *F) {
updateGlobalMapping(F, 0);
// Free the actual memory for the function body and related stuff.
- assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
- cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
+ static_cast<JITEmitter*>(JCE)->deallocateMemForFunction(F);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index eb69693359d..c1f8baed1a4 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -378,17 +378,17 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
uint64_t Value) {
- // Ignore relocations for sections that were not loaded
- if (Sections[RE.SectionID].Address != 0) {
- uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
- DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
- << " + " << RE.Offset << " (" << format("%p", Target) << ")"
- << " RelType: " << RE.RelType
- << " Addend: " << RE.Addend
- << "\n");
-
- resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
- Value, RE.RelType, RE.Addend);
+ // Ignore relocations for sections that were not loaded
+ if (Sections[RE.SectionID].Address != 0) {
+ uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+ DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+ << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+ << " RelType: " << RE.RelType
+ << " Addend: " << RE.Addend
+ << "\n");
+
+ resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+ Value, RE.RelType, RE.Addend);
}
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index efefacf632d..08aba64e460 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -59,9 +59,6 @@ public:
const ELFObjectFile<target_endianness, is64Bits> *v) {
return v->isDyldType();
}
- static inline bool classof(const DyldELFObject *v) {
- return true;
- }
};
template<support::endianness target_endianness, bool is64Bits>
@@ -416,7 +413,13 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
if (si == Obj.end_sections())
llvm_unreachable("Symbol section not found, bad object file format!");
DEBUG(dbgs() << "\t\tThis is section symbol\n");
- Value.SectionID = findOrEmitSection(Obj, (*si), true, ObjSectionToID);
+ // Default to 'true' in case isText fails (though it never does).
+ bool isCode = true;
+ si->isText(isCode);
+ Value.SectionID = findOrEmitSection(Obj,
+ (*si),
+ isCode,
+ ObjSectionToID);
Value.Addend = Addend;
break;
}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index b0bc2900ecb..17a6323d0e7 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -251,6 +251,7 @@ public:
virtual void EmitPad(int64_t Offset);
virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
+ virtual void EmitTCEntry(const MCSymbol &S);
virtual void EmitInstruction(const MCInst &Inst);
@@ -1299,6 +1300,14 @@ void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
EmitEOL();
}
+void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
+ OS << "\t.tc ";
+ OS << S.getName();
+ OS << "[TC],";
+ OS << S.getName();
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
assert(getCurrentSection() && "Cannot emit contents before setting section!");
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 8107005481d..14fbc1ec839 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -103,6 +103,8 @@ public:
virtual void EmitFileDirective(StringRef Filename);
+ virtual void EmitTCEntry(const MCSymbol &S);
+
virtual void FinishImpl();
private:
@@ -469,6 +471,12 @@ void MCELFStreamer::FinishImpl() {
this->MCObjectStreamer::FinishImpl();
}
+void MCELFStreamer::EmitTCEntry(const MCSymbol &S)
+{
+ // Creates a R_PPC64_TOC relocation
+ MCObjectStreamer::EmitSymbolValue(&S, 8, 0);
+}
+
MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *CE,
bool RelaxAll, bool NoExecStack) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index c2fff3c5207..0406ff8d446 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -133,6 +133,15 @@ private:
/// IsDarwin - is Darwin compatibility enabled?
bool IsDarwin;
+ /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
+ bool ParsingInlineAsm;
+
+ /// ParsedOperands - The parsed operands from the last parsed statement.
+ SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+
+ /// Opcode - The opcode from the last parsed instruction.
+ unsigned Opcode;
+
public:
AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI);
@@ -171,6 +180,21 @@ public:
virtual const AsmToken &Lex();
+ bool ParseStatement();
+ void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
+ unsigned getNumParsedOperands() { return ParsedOperands.size(); }
+ MCParsedAsmOperand &getParsedOperand(unsigned OpNum) {
+ assert (ParsedOperands.size() > OpNum);
+ return *ParsedOperands[OpNum];
+ }
+ void freeParsedOperands() {
+ for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+ delete ParsedOperands[i];
+ ParsedOperands.clear();
+ }
+ bool isInstruction() { return Opcode != (unsigned)~0x0; }
+ unsigned getOpcode() { return Opcode; }
+
bool ParseExpression(const MCExpr *&Res);
virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
@@ -181,7 +205,6 @@ public:
private:
void CheckForValidSection();
- bool ParseStatement();
void EatToEndOfLine();
bool ParseCppHashLineFilenameComment(const SMLoc &L);
@@ -412,7 +435,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
GenericParser(new GenericAsmParser), PlatformParser(0),
CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
- AssemblerDialect(~0U), IsDarwin(false) {
+ AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false),
+ Opcode(~0x0) {
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
SavedDiagContext = SrcMgr.getDiagContext();
@@ -604,7 +628,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
}
void AsmParser::CheckForValidSection() {
- if (!getStreamer().getCurrentSection()) {
+ if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
TokError("expected section directive before assembly directive");
Out.SwitchSection(Ctx.getMachOSection(
"__TEXT", "__text",
@@ -1310,12 +1334,11 @@ bool AsmParser::ParseStatement() {
CheckForValidSection();
// Canonicalize the opcode to lower case.
- SmallString<128> Opcode;
+ SmallString<128> OpcodeStr;
for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
- Opcode.push_back(tolower(IDVal[i]));
+ OpcodeStr.push_back(tolower(IDVal[i]));
- SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
- bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
+ bool HadError = getTargetParser().ParseInstruction(OpcodeStr.str(), IDLoc,
ParsedOperands);
// Dump the parsed representation, if requested.
@@ -1346,13 +1369,18 @@ bool AsmParser::ParseStatement() {
}
// If parsing succeeded, match the instruction.
- if (!HadError)
- HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
- Out);
-
- // Free any parsed operands.
- for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
- delete ParsedOperands[i];
+ if (!HadError) {
+ unsigned ErrorInfo;
+ HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, Opcode,
+ ParsedOperands, Out,
+ ErrorInfo,
+ ParsingInlineAsm);
+ }
+
+ // Free any parsed operands. If parsing ms-style inline assembly it is the
+ // responsibility of the caller (i.e., clang) to free the parsed operands.
+ if (!ParsingInlineAsm)
+ freeParsedOperands();
// Don't skip the rest of the line, the instruction parser is responsible for
// that.
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 0bac24dc3a7..afece0ba551 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -561,6 +561,10 @@ void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
abort();
}
+void MCStreamer::EmitTCEntry(const MCSymbol &S) {
+ llvm_unreachable("Unsupported method");
+}
+
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index abfaecc2790..2cc7a584624 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -98,6 +98,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
case BGP: return "bgp";
case BGQ: return "bgq";
case Freescale: return "fsl";
+ case IBM: return "ibm";
}
llvm_unreachable("Invalid VendorType!");
@@ -128,6 +129,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
case NativeClient: return "nacl";
case CNK: return "cnk";
case Bitrig: return "bitrig";
+ case AIX: return "aix";
}
llvm_unreachable("Invalid OSType");
@@ -278,6 +280,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("bgp", Triple::BGP)
.Case("bgq", Triple::BGQ)
.Case("fsl", Triple::Freescale)
+ .Case("ibm", Triple::IBM)
.Default(Triple::UnknownVendor);
}
@@ -304,6 +307,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("nacl", Triple::NativeClient)
.StartsWith("cnk", Triple::CNK)
.StartsWith("bitrig", Triple::Bitrig)
+ .StartsWith("aix", Triple::AIX)
.Default(Triple::UnknownOS);
}
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 28240f4a673..4f64eb4ff24 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -1,5 +1,3 @@
-## FIXME: This only requires RTTI because tblgen uses it. Fix that.
-set(LLVM_REQUIRES_RTTI 1)
set(LLVM_REQUIRES_EH 1)
add_llvm_library(LLVMTableGen
diff --git a/lib/TableGen/Makefile b/lib/TableGen/Makefile
index 44724389e1d..732d8a197ee 100644
--- a/lib/TableGen/Makefile
+++ b/lib/TableGen/Makefile
@@ -11,8 +11,6 @@ LEVEL = ../..
LIBRARYNAME = LLVMTableGen
BUILD_ARCHIVE = 1
-## FIXME: This only requires RTTI because tblgen uses it. Fix that.
-REQUIRES_RTTI = 1
REQUIRES_EH = 1
include $(LEVEL)/Makefile.common
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 83f2fff9312..c7b2de2b0fe 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -200,7 +200,7 @@ Init *IntRecTy::convertValue(BitInit *BI) {
Init *IntRecTy::convertValue(BitsInit *BI) {
int64_t Result = 0;
for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
- if (BitInit *Bit = dynamic_cast<BitInit*>(BI->getBit(i))) {
+ if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i))) {
Result |= Bit->getValue() << i;
} else {
return 0;
@@ -615,7 +615,7 @@ ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
Record *ListInit::getElementAsRecord(unsigned i) const {
assert(i < Values.size() && "List element index out of range!");
- DefInit *DI = dynamic_cast<DefInit*>(Values[i]);
+ DefInit *DI = dyn_cast<DefInit>(Values[i]);
if (DI == 0) throw "Expected record in list!";
return DI->getDef();
}
@@ -650,7 +650,7 @@ Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
// If the element is set to some value, or if we are resolving a reference
// to a specific variable and that variable is explicitly unset, then
// replace the VarListElementInit with it.
- if (IRV || !dynamic_cast<UnsetInit*>(E))
+ if (IRV || !isa<UnsetInit>(E))
return E;
return 0;
}
@@ -667,13 +667,13 @@ std::string ListInit::getAsString() const {
Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
unsigned Elt) const {
Init *Resolved = resolveReferences(R, IRV);
- OpInit *OResolved = dynamic_cast<OpInit *>(Resolved);
+ OpInit *OResolved = dyn_cast<OpInit>(Resolved);
if (OResolved) {
Resolved = OResolved->Fold(&R, 0);
}
if (Resolved != this) {
- TypedInit *Typed = dynamic_cast<TypedInit *>(Resolved);
+ TypedInit *Typed = dyn_cast<TypedInit>(Resolved);
assert(Typed && "Expected typed init for list reference");
if (Typed) {
Init *New = Typed->resolveListElementReference(R, IRV, Elt);
@@ -709,23 +709,16 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
case CAST: {
if (getType()->getAsString() == "string") {
- StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
- if (LHSs) {
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
return LHSs;
- }
- DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
- if (LHSd) {
+ if (DefInit *LHSd = dyn_cast<DefInit>(LHS))
return StringInit::get(LHSd->getDef()->getName());
- }
- IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
- if (LHSi) {
+ if (IntInit *LHSi = dyn_cast<IntInit>(LHS))
return StringInit::get(LHSi->getAsString());
- }
} else {
- StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
- if (LHSs) {
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
std::string Name = LHSs->getValue();
// From TGParser::ParseIDValue
@@ -773,8 +766,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
break;
}
case HEAD: {
- ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
- if (LHSl) {
+ if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
if (LHSl->getSize() == 0) {
assert(0 && "Empty list in car");
return 0;
@@ -784,8 +776,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
break;
}
case TAIL: {
- ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
- if (LHSl) {
+ if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
if (LHSl->getSize() == 0) {
assert(0 && "Empty list in cdr");
return 0;
@@ -802,16 +793,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
break;
}
case EMPTY: {
- ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
- if (LHSl) {
+ if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
if (LHSl->getSize() == 0) {
return IntInit::get(1);
} else {
return IntInit::get(0);
}
}
- StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
- if (LHSs) {
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
if (LHSs->getValue().empty()) {
return IntInit::get(1);
} else {
@@ -865,11 +854,11 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
case CONCAT: {
- DagInit *LHSs = dynamic_cast<DagInit*>(LHS);
- DagInit *RHSs = dynamic_cast<DagInit*>(RHS);
+ DagInit *LHSs = dyn_cast<DagInit>(LHS);
+ DagInit *RHSs = dyn_cast<DagInit>(RHS);
if (LHSs && RHSs) {
- DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator());
- DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator());
+ DefInit *LOp = dyn_cast<DefInit>(LHSs->getOperator());
+ DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator());
if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
throw "Concated Dag operators do not match!";
std::vector<Init*> Args;
@@ -887,8 +876,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
break;
}
case STRCONCAT: {
- StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
- StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
if (LHSs && RHSs)
return StringInit::get(LHSs->getValue() + RHSs->getValue());
break;
@@ -897,15 +886,15 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
// try to fold eq comparison for 'bit' and 'int', otherwise fallback
// to string objects.
IntInit *L =
- dynamic_cast<IntInit*>(LHS->convertInitializerTo(IntRecTy::get()));
+ dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get()));
IntInit *R =
- dynamic_cast<IntInit*>(RHS->convertInitializerTo(IntRecTy::get()));
+ dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get()));
if (L && R)
return IntInit::get(L->getValue() == R->getValue());
- StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
- StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
// Make sure we've resolved
if (LHSs && RHSs)
@@ -916,8 +905,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
case SHL:
case SRA:
case SRL: {
- IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
- IntInit *RHSi = dynamic_cast<IntInit*>(RHS);
+ IntInit *LHSi = dyn_cast<IntInit>(LHS);
+ IntInit *RHSi = dyn_cast<IntInit>(RHS);
if (LHSi && RHSi) {
int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
int64_t Result;
@@ -990,7 +979,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
MultiClass *CurMultiClass) {
std::vector<Init *> NewOperands;
- TypedInit *TArg = dynamic_cast<TypedInit*>(Arg);
+ TypedInit *TArg = dyn_cast<TypedInit>(Arg);
// If this is a dag, recurse
if (TArg && TArg->getType()->getAsString() == "dag") {
@@ -1004,7 +993,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
}
for (int i = 0; i < RHSo->getNumOperands(); ++i) {
- OpInit *RHSoo = dynamic_cast<OpInit*>(RHSo->getOperand(i));
+ OpInit *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i));
if (RHSoo) {
Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
@@ -1032,16 +1021,16 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
Record *CurRec, MultiClass *CurMultiClass) {
- DagInit *MHSd = dynamic_cast<DagInit*>(MHS);
- ListInit *MHSl = dynamic_cast<ListInit*>(MHS);
+ DagInit *MHSd = dyn_cast<DagInit>(MHS);
+ ListInit *MHSl = dyn_cast<ListInit>(MHS);
- OpInit *RHSo = dynamic_cast<OpInit*>(RHS);
+ OpInit *RHSo = dyn_cast<OpInit>(RHS);
if (!RHSo) {
throw TGError(CurRec->getLoc(), "!foreach requires an operator\n");
}
- TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+ TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
if (!LHSt) {
throw TGError(CurRec->getLoc(), "!foreach requires typed variable\n");
@@ -1110,17 +1099,17 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
switch (getOpcode()) {
case SUBST: {
- DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
- VarInit *LHSv = dynamic_cast<VarInit*>(LHS);
- StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+ DefInit *LHSd = dyn_cast<DefInit>(LHS);
+ VarInit *LHSv = dyn_cast<VarInit>(LHS);
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
- DefInit *MHSd = dynamic_cast<DefInit*>(MHS);
- VarInit *MHSv = dynamic_cast<VarInit*>(MHS);
- StringInit *MHSs = dynamic_cast<StringInit*>(MHS);
+ DefInit *MHSd = dyn_cast<DefInit>(MHS);
+ VarInit *MHSv = dyn_cast<VarInit>(MHS);
+ StringInit *MHSs = dyn_cast<StringInit>(MHS);
- DefInit *RHSd = dynamic_cast<DefInit*>(RHS);
- VarInit *RHSv = dynamic_cast<VarInit*>(RHS);
- StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+ DefInit *RHSd = dyn_cast<DefInit>(RHS);
+ VarInit *RHSv = dyn_cast<VarInit>(RHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
if ((LHSd && MHSd && RHSd)
|| (LHSv && MHSv && RHSv)
@@ -1168,9 +1157,9 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
}
case IF: {
- IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+ IntInit *LHSi = dyn_cast<IntInit>(LHS);
if (Init *I = LHS->convertInitializerTo(IntRecTy::get()))
- LHSi = dynamic_cast<IntInit*>(I);
+ LHSi = dyn_cast<IntInit>(I);
if (LHSi) {
if (LHSi->getValue()) {
return MHS;
@@ -1190,9 +1179,9 @@ Init *TernOpInit::resolveReferences(Record &R,
Init *lhs = LHS->resolveReferences(R, RV);
if (Opc == IF && lhs != LHS) {
- IntInit *Value = dynamic_cast<IntInit*>(lhs);
+ IntInit *Value = dyn_cast<IntInit>(lhs);
if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
- Value = dynamic_cast<IntInit*>(I);
+ Value = dyn_cast<IntInit>(I);
if (Value != 0) {
// Short-circuit
if (Value->getValue()) {
@@ -1285,8 +1274,7 @@ VarInit *VarInit::get(Init *VN, RecTy *T) {
}
const std::string &VarInit::getName() const {
- StringInit *NameString =
- dynamic_cast<StringInit *>(getNameInit());
+ StringInit *NameString = dyn_cast<StringInit>(getNameInit());
assert(NameString && "VarInit name is not a string!");
return NameString->getValue();
}
@@ -1305,9 +1293,9 @@ Init *VarInit::resolveListElementReference(Record &R,
RecordVal *RV = R.getValue(getNameInit());
assert(RV && "Reference to a non-existent variable?");
- ListInit *LI = dynamic_cast<ListInit*>(RV->getValue());
+ ListInit *LI = dyn_cast<ListInit>(RV->getValue());
if (!LI) {
- TypedInit *VI = dynamic_cast<TypedInit*>(RV->getValue());
+ TypedInit *VI = dyn_cast<TypedInit>(RV->getValue());
assert(VI && "Invalid list element!");
return VarListElementInit::get(VI, Elt);
}
@@ -1318,7 +1306,7 @@ Init *VarInit::resolveListElementReference(Record &R,
// If the element is set to some value, or if we are resolving a reference
// to a specific variable and that variable is explicitly unset, then
// replace the VarListElementInit with it.
- if (IRV || !dynamic_cast<UnsetInit*>(E))
+ if (IRV || !isa<UnsetInit>(E))
return E;
return 0;
}
@@ -1335,7 +1323,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
const std::string &FieldName) const {
if (isa<RecordRecTy>(getType()))
if (const RecordVal *Val = R.getValue(VarName)) {
- if (RV != Val && (RV || dynamic_cast<UnsetInit*>(Val->getValue())))
+ if (RV != Val && (RV || isa<UnsetInit>(Val->getValue())))
return 0;
Init *TheInit = Val->getValue();
assert(TheInit != this && "Infinite loop detected!");
@@ -1354,7 +1342,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
///
Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
if (RecordVal *Val = R.getValue(VarName))
- if (RV == Val || (RV == 0 && !dynamic_cast<UnsetInit*>(Val->getValue())))
+ if (RV == Val || (RV == 0 && !isa<UnsetInit>(Val->getValue())))
return Val->getValue();
return const_cast<VarInit *>(this);
}
@@ -1422,8 +1410,7 @@ Init *VarListElementInit:: resolveListElementReference(Record &R,
Init *Result = TI->resolveListElementReference(R, RV, Element);
if (Result) {
- TypedInit *TInit = dynamic_cast<TypedInit *>(Result);
- if (TInit) {
+ if (TypedInit *TInit = dyn_cast<TypedInit>(Result)) {
Init *Result2 = TInit->resolveListElementReference(R, RV, Elt);
if (Result2) return Result2;
return new VarListElementInit(TInit, Elt);
@@ -1475,14 +1462,14 @@ Init *FieldInit::getBit(unsigned Bit) const {
Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
unsigned Elt) const {
if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName))
- if (ListInit *LI = dynamic_cast<ListInit*>(ListVal)) {
+ if (ListInit *LI = dyn_cast<ListInit>(ListVal)) {
if (Elt >= LI->getSize()) return 0;
Init *E = LI->getElement(Elt);
// If the element is set to some value, or if we are resolving a
// reference to a specific variable and that variable is explicitly
// unset, then replace the VarListElementInit with it.
- if (RV || !dynamic_cast<UnsetInit*>(E))
+ if (RV || !isa<UnsetInit>(E))
return E;
}
return 0;
@@ -1611,7 +1598,7 @@ RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P)
}
const std::string &RecordVal::getName() const {
- StringInit *NameString = dynamic_cast<StringInit *>(Name);
+ StringInit *NameString = dyn_cast<StringInit>(Name);
assert(NameString && "RecordVal name is not a string!");
return NameString->getValue();
}
@@ -1641,7 +1628,7 @@ void Record::init() {
void Record::checkName() {
// Ensure the record name has string type.
- const TypedInit *TypedName = dynamic_cast<const TypedInit *>(Name);
+ const TypedInit *TypedName = dyn_cast<const TypedInit>(Name);
assert(TypedName && "Record name is not typed!");
RecTy *Type = TypedName->getType();
if (!isa<StringRecTy>(Type))
@@ -1655,8 +1642,7 @@ DefInit *Record::getDefInit() {
}
const std::string &Record::getName() const {
- const StringInit *NameString =
- dynamic_cast<const StringInit *>(Name);
+ const StringInit *NameString = dyn_cast<StringInit>(Name);
assert(NameString && "Record name is not a string!");
return NameString->getValue();
}
@@ -1773,7 +1759,7 @@ std::string Record::getValueAsString(StringRef FieldName) const {
throw "Record `" + getName() + "' does not have a field named `" +
FieldName.str() + "'!\n";
- if (StringInit *SI = dynamic_cast<StringInit*>(R->getValue()))
+ if (StringInit *SI = dyn_cast<StringInit>(R->getValue()))
return SI->getValue();
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have a string initializer!";
@@ -1789,7 +1775,7 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
throw "Record `" + getName() + "' does not have a field named `" +
FieldName.str() + "'!\n";
- if (BitsInit *BI = dynamic_cast<BitsInit*>(R->getValue()))
+ if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue()))
return BI;
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have a BitsInit initializer!";
@@ -1805,7 +1791,7 @@ ListInit *Record::getValueAsListInit(StringRef FieldName) const {
throw "Record `" + getName() + "' does not have a field named `" +
FieldName.str() + "'!\n";
- if (ListInit *LI = dynamic_cast<ListInit*>(R->getValue()))
+ if (ListInit *LI = dyn_cast<ListInit>(R->getValue()))
return LI;
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have a list initializer!";
@@ -1820,7 +1806,7 @@ Record::getValueAsListOfDefs(StringRef FieldName) const {
ListInit *List = getValueAsListInit(FieldName);
std::vector<Record*> Defs;
for (unsigned i = 0; i < List->getSize(); i++) {
- if (DefInit *DI = dynamic_cast<DefInit*>(List->getElement(i))) {
+ if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i))) {
Defs.push_back(DI->getDef());
} else {
throw "Record `" + getName() + "', field `" + FieldName.str() +
@@ -1840,7 +1826,7 @@ int64_t Record::getValueAsInt(StringRef FieldName) const {
throw "Record `" + getName() + "' does not have a field named `" +
FieldName.str() + "'!\n";
- if (IntInit *II = dynamic_cast<IntInit*>(R->getValue()))
+ if (IntInit *II = dyn_cast<IntInit>(R->getValue()))
return II->getValue();
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have an int initializer!";
@@ -1855,7 +1841,7 @@ Record::getValueAsListOfInts(StringRef FieldName) const {
ListInit *List = getValueAsListInit(FieldName);
std::vector<int64_t> Ints;
for (unsigned i = 0; i < List->getSize(); i++) {
- if (IntInit *II = dynamic_cast<IntInit*>(List->getElement(i))) {
+ if (IntInit *II = dyn_cast<IntInit>(List->getElement(i))) {
Ints.push_back(II->getValue());
} else {
throw "Record `" + getName() + "', field `" + FieldName.str() +
@@ -1874,7 +1860,7 @@ Record::getValueAsListOfStrings(StringRef FieldName) const {
ListInit *List = getValueAsListInit(FieldName);
std::vector<std::string> Strings;
for (unsigned i = 0; i < List->getSize(); i++) {
- if (StringInit *II = dynamic_cast<StringInit*>(List->getElement(i))) {
+ if (StringInit *II = dyn_cast<StringInit>(List->getElement(i))) {
Strings.push_back(II->getValue());
} else {
throw "Record `" + getName() + "', field `" + FieldName.str() +
@@ -1894,7 +1880,7 @@ Record *Record::getValueAsDef(StringRef FieldName) const {
throw "Record `" + getName() + "' does not have a field named `" +
FieldName.str() + "'!\n";
- if (DefInit *DI = dynamic_cast<DefInit*>(R->getValue()))
+ if (DefInit *DI = dyn_cast<DefInit>(R->getValue()))
return DI->getDef();
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have a def initializer!";
@@ -1910,7 +1896,7 @@ bool Record::getValueAsBit(StringRef FieldName) const {
throw "Record `" + getName() + "' does not have a field named `" +
FieldName.str() + "'!\n";
- if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue()))
+ if (BitInit *BI = dyn_cast<BitInit>(R->getValue()))
return BI->getValue();
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have a bit initializer!";
@@ -1927,7 +1913,7 @@ bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const {
return false;
}
Unset = false;
- if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue()))
+ if (BitInit *BI = dyn_cast<BitInit>(R->getValue()))
return BI->getValue();
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have a bit initializer!";
@@ -1943,7 +1929,7 @@ DagInit *Record::getValueAsDag(StringRef FieldName) const {
throw "Record `" + getName() + "' does not have a field named `" +
FieldName.str() + "'!\n";
- if (DagInit *DI = dynamic_cast<DagInit*>(R->getValue()))
+ if (DagInit *DI = dyn_cast<DagInit>(R->getValue()))
return DI;
throw "Record `" + getName() + "', field `" + FieldName.str() +
"' does not have a dag initializer!";
@@ -2004,7 +1990,7 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
/// to CurRec's name.
Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
Init *Name, const std::string &Scoper) {
- RecTy *Type = dynamic_cast<TypedInit *>(Name)->getType();
+ RecTy *Type = dyn_cast<TypedInit>(Name)->getType();
BinOpInit *NewName =
BinOpInit::get(BinOpInit::STRCONCAT,
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 0ed75f014ee..b1f9f724efd 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -93,7 +93,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
// Do not allow assignments like 'X = X'. This will just cause infinite loops
// in the resolution machinery.
if (BitList.empty())
- if (VarInit *VI = dynamic_cast<VarInit*>(V))
+ if (VarInit *VI = dyn_cast<VarInit>(V))
if (VI->getNameInit() == ValName)
return false;
@@ -102,7 +102,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
// initializer.
//
if (!BitList.empty()) {
- BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue());
+ BitsInit *CurVal = dyn_cast<BitsInit>(RV->getValue());
if (CurVal == 0)
return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+ "' is not a bits type");
@@ -114,7 +114,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
}
// We should have a BitsInit type now.
- BitsInit *BInit = dynamic_cast<BitsInit*>(BI);
+ BitsInit *BInit = dyn_cast<BitsInit>(BI);
assert(BInit != 0);
SmallVector<Init *, 16> NewBits(CurVal->getNumBits());
@@ -310,7 +310,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
if (IterVals.size() != Loops.size()) {
assert(IterVals.size() < Loops.size());
ForeachLoop &CurLoop = Loops[IterVals.size()];
- ListInit *List = dynamic_cast<ListInit *>(CurLoop.ListValue);
+ ListInit *List = dyn_cast<ListInit>(CurLoop.ListValue);
if (List == 0) {
Error(Loc, "Loop list is not a list");
return true;
@@ -335,7 +335,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
// Set the iterator values now.
for (unsigned i = 0, e = IterVals.size(); i != e; ++i) {
VarInit *IterVar = IterVals[i].IterVar;
- TypedInit *IVal = dynamic_cast<TypedInit *>(IterVals[i].IterValue);
+ TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue);
if (IVal == 0) {
Error(Loc, "foreach iterator value is untyped");
return true;
@@ -406,8 +406,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
RecTy *Type = 0;
if (CurRec) {
- const TypedInit *CurRecName =
- dynamic_cast<const TypedInit *>(CurRec->getNameInit());
+ const TypedInit *CurRecName = dyn_cast<TypedInit>(CurRec->getNameInit());
if (!CurRecName) {
TokError("Record name is not typed!");
return 0;
@@ -780,7 +779,7 @@ Init *TGParser::ParseIDValue(Record *CurRec,
for (LoopVector::iterator i = Loops.begin(), iend = Loops.end();
i != iend;
++i) {
- VarInit *IterVar = dynamic_cast<VarInit *>(i->IterVar);
+ VarInit *IterVar = dyn_cast<VarInit>(i->IterVar);
if (IterVar && IterVar->getName() == Name)
return IterVar;
}
@@ -855,9 +854,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
if (Code == UnOpInit::HEAD
|| Code == UnOpInit::TAIL
|| Code == UnOpInit::EMPTY) {
- ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
- StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
- TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+ ListInit *LHSl = dyn_cast<ListInit>(LHS);
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
if (LHSl == 0 && LHSs == 0 && LHSt == 0) {
TokError("expected list or string type argument in unary operator");
return 0;
@@ -884,7 +883,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
}
if (LHSl) {
Init *Item = LHSl->getElement(0);
- TypedInit *Itemt = dynamic_cast<TypedInit*>(Item);
+ TypedInit *Itemt = dyn_cast<TypedInit>(Item);
if (Itemt == 0) {
TokError("untyped list element in unary operator");
return 0;
@@ -1046,24 +1045,24 @@ Init *TGParser::ParseOperation(Record *CurRec) {
RecTy *MHSTy = 0;
RecTy *RHSTy = 0;
- if (TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS))
+ if (TypedInit *MHSt = dyn_cast<TypedInit>(MHS))
MHSTy = MHSt->getType();
- if (BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS))
+ if (BitsInit *MHSbits = dyn_cast<BitsInit>(MHS))
MHSTy = BitsRecTy::get(MHSbits->getNumBits());
- if (dynamic_cast<BitInit*>(MHS))
+ if (isa<BitInit>(MHS))
MHSTy = BitRecTy::get();
- if (TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS))
+ if (TypedInit *RHSt = dyn_cast<TypedInit>(RHS))
RHSTy = RHSt->getType();
- if (BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS))
+ if (BitsInit *RHSbits = dyn_cast<BitsInit>(RHS))
RHSTy = BitsRecTy::get(RHSbits->getNumBits());
- if (dynamic_cast<BitInit*>(RHS))
+ if (isa<BitInit>(RHS))
RHSTy = BitRecTy::get();
// For UnsetInit, it's typed from the other hand.
- if (dynamic_cast<UnsetInit*>(MHS))
+ if (isa<UnsetInit>(MHS))
MHSTy = RHSTy;
- if (dynamic_cast<UnsetInit*>(RHS))
+ if (isa<UnsetInit>(RHS))
RHSTy = MHSTy;
if (!MHSTy || !RHSTy) {
@@ -1082,7 +1081,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
break;
}
case tgtok::XForEach: {
- TypedInit *MHSt = dynamic_cast<TypedInit *>(MHS);
+ TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
if (MHSt == 0) {
TokError("could not get type for !foreach");
return 0;
@@ -1091,7 +1090,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
break;
}
case tgtok::XSubst: {
- TypedInit *RHSt = dynamic_cast<TypedInit *>(RHS);
+ TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
if (RHSt == 0) {
TokError("could not get type for !subst");
return 0;
@@ -1315,7 +1314,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
i != ie;
++i) {
- TypedInit *TArg = dynamic_cast<TypedInit*>(*i);
+ TypedInit *TArg = dyn_cast<TypedInit>(*i);
if (TArg == 0) {
TokError("Untyped list element");
return 0;
@@ -1498,7 +1497,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
// Create a !strconcat() operation, first casting each operand to
// a string if necessary.
- TypedInit *LHS = dynamic_cast<TypedInit *>(Result);
+ TypedInit *LHS = dyn_cast<TypedInit>(Result);
if (!LHS) {
Error(PasteLoc, "LHS of paste is not typed!");
return 0;
@@ -1525,7 +1524,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
default:
Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
- RHS = dynamic_cast<TypedInit *>(RHSResult);
+ RHS = dyn_cast<TypedInit>(RHSResult);
if (!RHS) {
Error(PasteLoc, "RHS of paste is not typed!");
return 0;
@@ -1716,7 +1715,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
default: TokError("Unknown token when expecting a range list"); return 0;
case tgtok::l_square: { // '[' ValueList ']'
Init *List = ParseSimpleValue(0, 0, ParseForeachMode);
- ForeachListValue = dynamic_cast<ListInit*>(List);
+ ForeachListValue = dyn_cast<ListInit>(List);
if (ForeachListValue == 0) {
TokError("Expected a Value list");
return 0;
@@ -2257,7 +2256,7 @@ InstantiateMulticlassDef(MultiClass &MC,
Init *DefName = DefProto->getNameInit();
- StringInit *DefNameString = dynamic_cast<StringInit *>(DefName);
+ StringInit *DefNameString = dyn_cast<StringInit>(DefName);
if (DefNameString != 0) {
// We have a fully expanded string so there are no operators to
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6b98d446b00..ae531c4ea88 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -102,8 +102,6 @@ public:
virtual void print(raw_ostream &O) const;
void print(raw_ostream *O) const { if (O) print(*O); }
void dump() const;
-
- static bool classof(const ARMConstantPoolValue *) { return true; }
};
inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
@@ -158,7 +156,6 @@ public:
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
}
- static bool classof(const ARMConstantPoolConstant *) { return true; }
};
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
@@ -192,7 +189,6 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isExtSymbol();
}
- static bool classof(const ARMConstantPoolSymbol *) { return true; }
};
/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
@@ -225,7 +221,6 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isMachineBasicBlock();
}
- static bool classof(const ARMConstantPoolMBB *) { return true; }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
index d88bf0c8fa1..7bca0edf915 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ b/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
- : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64,
+ : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64,
TM.getDataLayout()->isLittleEndian()) {
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 051aab05cbd..b2eb5784879 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -122,6 +122,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT, Custom);
@@ -1655,22 +1656,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
-ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(
+ CCState *State, unsigned &size, unsigned Align) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((!State->isFirstByValRegValid()) &&
(ARM::R0 <= reg) && (reg <= ARM::R3)) {
- State->setFirstByValReg(reg);
- // At a call site, a byval parameter that is split between
- // registers and memory needs its size truncated here. In a
- // function prologue, such byval parameters are reassembled in
- // memory, and are not truncated.
- if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ if (Subtarget->isAAPCS_ABI() && Align > 4) {
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ reg = State->AllocateReg(GPRArgRegs, 4);
+ }
+ if (reg != 0) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
}
}
// Confiscate any remaining parameter registers to preclude their
@@ -1803,6 +1813,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ // If Caller's vararg or byval argument has been split between registers and
+ // stack, do not perform tail call, since part of the argument is in caller's
+ // local frame.
+ const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
+ getInfo<ARMFunctionInfo>();
+ if (AFI_Caller->getVarArgsRegSaveSize())
+ return false;
+
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
@@ -4221,9 +4239,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// If we are VDUPing a value that comes directly from a vector, that will
// cause an unnecessary move to and from a GPR, where instead we could
// just use VDUPLANE.
- if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
- N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ // We need to create a new undef vector to use for the VDUPLANE if the
+ // size of the vector from which we get the value is different than the
+ // size of the vector that we need to create. We will insert the element
+ // such that the register coalescer will remove unnecessary copies.
+ if (VT != Value->getOperand(0).getValueType()) {
+ ConstantSDNode *constIndex;
+ constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
+ assert(constIndex && "The index is not a constant!");
+ unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+ VT.getVectorNumElements();
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+ Value, DAG.getConstant(index, MVT::i32)),
+ DAG.getConstant(index, MVT::i32));
+ } else {
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
Value->getOperand(0), Value->getOperand(1));
+ }
+ }
else
N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index b5020c97108..9acab0b0834 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -480,7 +480,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
/// HandleByVal - Target-specific cleanup for ByVal support.
- virtual void HandleByVal(CCState *, unsigned &) const;
+ virtual void HandleByVal(CCState *, unsigned &, unsigned) const;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index de655f1a0ee..ede4def2b73 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4500,12 +4500,25 @@ def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
+ (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
+ (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VCNTiQ,
@@ -4525,11 +4538,23 @@ def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
+ (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
+ (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON]>;
+def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6fdf873a8f0..c51ae24c50e 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -71,7 +71,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
- FrameLowering(Subtarget) {
+ FrameLowering(Subtarget),
+ STTI(&TLInfo) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -104,7 +105,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
- : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
+ STTI(&TLInfo){
}
namespace {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index f91e5bbd477..7a65a7f062d 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -25,6 +25,7 @@
#include "Thumb1FrameLowering.h"
#include "Thumb2InstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/OwningPtr.h"
@@ -67,6 +68,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -88,7 +91,12 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
virtual const ARMFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
-
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const ARMELFWriterInfo *getELFWriterInfo() const {
@@ -110,6 +118,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
OwningPtr<ARMFrameLowering> FrameLowering;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -138,6 +148,12 @@ public:
virtual const ARMFrameLowering *getFrameLowering() const {
return FrameLowering.get();
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const ARMELFWriterInfo *getELFWriterInfo() const {
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 93e5eca6252..0eec8622e97 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -259,9 +259,10 @@ public:
unsigned checkTargetMatchPredicate(MCInst &Inst);
- bool MatchAndEmitInstruction(SMLoc IDLoc,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
};
} // end anonymous namespace
@@ -7474,17 +7475,14 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
static const char *getSubtargetFeatureName(unsigned Val);
bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out) {
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
- unsigned Kind;
- unsigned ErrorInfo;
unsigned MatchResult;
- MatchInstMapAndConstraints MapAndConstraints;
- MatchResult = MatchInstructionImpl(Operands, Kind, Inst,
- MapAndConstraints, ErrorInfo,
- /*matchingInlineAsm*/ false);
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
switch (MatchResult) {
default: break;
case Match_Success:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 6bddc42b373..b404e6c6e01 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -67,9 +67,6 @@ public:
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
-
- static bool classof(const ARMMCExpr *) { return true; }
-
};
} // end namespace llvm
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 096ef001ed3..48df199437b 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_library(LLVMTarget
TargetMachineC.cpp
TargetRegisterInfo.cpp
TargetSubtargetInfo.cpp
+ TargetTransformImpl.cpp
)
foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index a37ad7f85ae..e92ad01e1d5 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -43,7 +43,8 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
FrameLowering(Subtarget),
TLInfo(*this),
TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ InstrItins(Subtarget.getInstrItineraryData()),
+ STTI(&TLInfo){
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 58699a30d26..7f53ea6fbeb 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -20,6 +20,7 @@
#include "SPUSelectionDAGInfo.h"
#include "SPUFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
namespace llvm {
@@ -34,6 +35,8 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUTargetLowering TLInfo;
SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
SPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -77,6 +80,12 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 096e2bc13b0..61fb4e98ecd 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -474,9 +474,9 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
Out << "AttributeWithIndex PAWI;"; nl(Out);
for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
unsigned index = PAL.getSlot(i).Index;
- Attributes::Builder attrs(PAL.getSlot(i).Attrs);
+ AttrBuilder attrs(PAL.getSlot(i).Attrs);
Out << "PAWI.Index = " << index << "U;\n";
- Out << " Attributes::Builder B;\n";
+ Out << " AttrBuilder B;\n";
#define HANDLE_ATTR(X) \
if (attrs.hasAttribute(Attributes::X)) \
@@ -509,13 +509,11 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(NonLazyBind);
#undef HANDLE_ATTR
if (attrs.hasAttribute(Attributes::StackAlignment))
- Out << "B.addStackAlignmentAttr(Attribute::constructStackAlignmentFromInt("
- << attrs.getStackAlignment()
- << "))";
+ Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")";
nl(Out);
attrs.removeAttribute(Attributes::StackAlignment);
assert(!attrs.hasAttributes() && "Unhandled attribute!");
- Out << "PAWI.Attrs = Attributes::get(B);";
+ Out << "PAWI.Attrs = Attributes::get(mod->getContext(), B);";
nl(Out);
Out << "Attrs.push_back(PAWI);";
nl(Out);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index d198a3f45b5..353542a8097 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -74,7 +74,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget),
- InstrItins(&Subtarget.getInstrItineraryData()) {
+ InstrItins(&Subtarget.getInstrItineraryData()),
+ STTI(&TLInfo) {
setMCUseCFI(false);
}
@@ -87,7 +88,7 @@ bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
PM.add(createDeadCodeEliminationPass());
PM.add(createConstantPropagationPass());
PM.add(createLoopUnrollPass());
- PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ PM.add(createLoopStrengthReducePass());
return true;
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index ade5b3e9c1f..7a4215c119a 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -21,6 +21,7 @@
#include "HexagonFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -34,6 +35,8 @@ class HexagonTargetMachine : public LLVMTargetMachine {
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
const InstrItineraryData* InstrItins;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -68,6 +71,14 @@ public:
return &TSInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
+
virtual const DataLayout *getDataLayout() const { return &DL; }
static unsigned getModuleMatchQuality(const Module &M);
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index d1e18b24c39..9e28a3d7d09 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -44,9 +44,10 @@ class MBlazeAsmParser : public MCTargetAsmParser {
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- bool MatchAndEmitInstruction(SMLoc IDLoc,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
/// @name Auto-generated Match Functions
/// {
@@ -312,15 +313,13 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
//
bool MBlazeAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out) {
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
- unsigned Kind;
- unsigned ErrorInfo;
- MatchInstMapAndConstraints MapAndConstraints;
- switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints,
- ErrorInfo, /*matchingInlineAsm*/ false)) {
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm)) {
default: break;
case Match_Success:
Out.EmitInstruction(Inst);
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
index 4ca30ba81f7..6b575099e59 100644
--- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
- : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64,
+ : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64,
TM.getDataLayout()->isLittleEndian()) {
}
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 91aaf940e62..1c2e3b26613 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -83,7 +83,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
-/// This defines the "getAttributes(ID id)" method.
+/// This defines the "getAttributes(LLVMContext &C, ID id)" method.
#define GET_INTRINSIC_ATTRIBUTES
#include "MBlazeGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
@@ -104,7 +104,8 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned numTy) const {
assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
- AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+ AttrListPtr AList = getAttributes(M->getContext(),
+ (mblazeIntrinsic::ID) IntrID);
return cast<Function>(M->getOrInsertFunction(getName(IntrID),
getType(M->getContext(), IntrID),
AList));
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 1f2cf6d9d2f..cb5f46062d9 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -42,7 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo) {
}
namespace {
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index d949e54f0d8..34648b9b9ae 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -25,6 +25,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
class formatted_raw_ostream;
@@ -39,6 +40,8 @@ namespace llvm {
MBlazeIntrinsicInfo IntrinsicInfo;
MBlazeELFWriterInfo ELFWriterInfo;
InstrItineraryData InstrItins;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
@@ -77,6 +80,10 @@ namespace llvm {
virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
return &ELFWriterInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const
+ { return &STTI; }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const
+ { return &VTTI; }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fc677aec38e..113378a5f31 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -881,7 +881,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = TD->getPointerSize();
+ uint64_t SlotSize = TD->getPointerSize(0);
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
true);
FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -901,7 +901,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ DAG.getConstant(TD->getPointerSize(0), MVT::i16);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index da5899b86d5..29ea6812162 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
// FIXME: Check DataLayout string.
DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) { }
+ FrameLowering(Subtarget), STTI(&TLInfo) { }
namespace {
/// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index ba3cef1f2ad..186172ede42 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -24,6 +24,7 @@
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -36,6 +37,8 @@ class MSP430TargetMachine : public LLVMTargetMachine {
MSP430TargetLowering TLInfo;
MSP430SelectionDAGInfo TSInfo;
MSP430FrameLowering FrameLowering;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
MSP430TargetMachine(const Target &T, StringRef TT,
@@ -61,7 +64,12 @@ public:
virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
-
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
}; // MSP430TargetMachine.
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index c2980ffeea8..00649d2f187 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -67,9 +67,10 @@ class MipsAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
- bool MatchAndEmitInstruction(SMLoc IDLoc,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
@@ -452,16 +453,13 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
bool MipsAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out) {
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
- unsigned Kind;
- unsigned ErrorInfo;
- MatchInstMapAndConstraints MapAndConstraints;
- unsigned MatchResult = MatchInstructionImpl(Operands, Kind, Inst,
- MapAndConstraints, ErrorInfo,
- /*matchingInlineAsm*/ false);
+ unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
switch (MatchResult) {
default: break;
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 127c5b89e8d..8991433005d 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -58,12 +58,22 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
- unsigned Opc = 0, ZeroReg = 0;
+ unsigned Opc = 0;
+
+ if (Mips::CPU16RegsRegClass.contains(DestReg) &&
+ Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::MoveR3216;
+ else if (Mips::CPURegsRegClass.contains(DestReg) &&
+ Mips::CPU16RegsRegClass.contains(SrcReg))
+ Opc = Mips::Move32R16;
+ else if ((SrcReg == Mips::HI) &&
+ (Mips::CPU16RegsRegClass.contains(DestReg)))
+ Opc = Mips::Mfhi16, SrcReg = 0;
+
+ else if ((SrcReg == Mips::LO) &&
+ (Mips::CPU16RegsRegClass.contains(DestReg)))
+ Opc = Mips::Mflo16, SrcReg = 0;
- if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
- if (Mips::CPURegsRegClass.contains(SrcReg))
- Opc = Mips::Move32R16;
- }
assert(Opc && "Cannot copy registers");
@@ -72,9 +82,6 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg)
MIB.addReg(DestReg, RegState::Define);
- if (ZeroReg)
- MIB.addReg(ZeroReg);
-
if (SrcReg)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index e1c90466fbf..eba201a0ea9 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -118,6 +118,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
!strconcat(asmstr, "\t$rx, $ry"), [], itin> {
}
+
+//
+// maybe refactor but need a $zero as a dummy first parameter
+//
+class FRR16_div_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs ), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$$zero, $rx, $ry"), [], itin> ;
+
class FRR16_M_ins<bits<5> f, string asmstr,
InstrItinClass itin> :
FRR16<f, (outs CPU16Regs:$rx), (ins),
@@ -196,6 +204,24 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
// To do a bitwise logical AND.
def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+//
+// Format: DIV rx, ry MIPS16e
+// Purpose: Divide Word
+// To divide 32-bit signed integers.
+//
+def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
+ let Defs = [HI, LO];
+}
+
+//
+// Format: DIVU rx, ry MIPS16e
+// Purpose: Divide Unsigned Word
+// To divide 32-bit unsigned integers.
+//
+def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
+ let Defs = [HI, LO];
+}
+
//
// Format: JR ra MIPS16e
@@ -551,5 +577,20 @@ def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
// Small immediates
def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
+//
+// MipsDivRem
+//
+def: Mips16Pat
+ <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry),
+ (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+//
+// MipsDivRemU
+//
+def: Mips16Pat
+ <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry),
+ (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+
def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
(AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index bd472d6f67e..99a9f25abd2 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -315,3 +315,33 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
// Instruction aliases
//===----------------------------------------------------------------------===//
def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>;
+
+/// Move between CPU and coprocessor registers
+let DecoderNamespace = "Mips64" in {
+def MFC0_3OP64 : MFC3OP<0x10, 0, (outs CPU64Regs:$rt),
+ (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
+def MTC0_3OP64 : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel),
+ (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">;
+def MFC2_3OP64 : MFC3OP<0x12, 0, (outs CPU64Regs:$rt),
+ (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
+def MTC2_3OP64 : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel),
+ (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">;
+def DMFC0_3OP64 : MFC3OP<0x10, 1, (outs CPU64Regs:$rt),
+ (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">;
+def DMTC0_3OP64 : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel),
+ (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">;
+def DMFC2_3OP64 : MFC3OP<0x12, 1, (outs CPU64Regs:$rt),
+ (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">;
+def DMTC2_3OP64 : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel),
+ (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">;
+}
+// Two operand (implicit 0 selector) versions:
+def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 9c196dd82f3..4c3981d9f68 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
TLInfo(*this), TSInfo(*this), JITInfo(),
- ELFWriterInfo(false, isLittle) {
+ ELFWriterInfo(false, isLittle), STTI(&TLInfo) {
}
void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 3a01828dd1d..60822d0c055 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -24,6 +24,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
class formatted_raw_ostream;
@@ -38,6 +39,8 @@ class MipsTargetMachine : public LLVMTargetMachine {
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
MipsELFWriterInfo ELFWriterInfo;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformInfo VTTI;
public:
MipsTargetMachine(const Target &T, StringRef TT,
@@ -74,6 +77,12 @@ public:
virtual const MipsELFWriterInfo *getELFWriterInfo() const {
return &ELFWriterInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index d3dfb35e261..c46094569e9 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -126,8 +126,10 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
return Base;
// Truncate/sext the offset to the pointer size.
- if (TD.getPointerSizeInBits() != 64) {
- int SExtAmount = 64-TD.getPointerSizeInBits();
+ unsigned AS = PtrVal->getType()->isPointerTy() ?
+ cast<PointerType>(PtrVal->getType())->getAddressSpace() : 0;
+ if (TD.getPointerSizeInBits(AS) != 64) {
+ int SExtAmount = 64-TD.getPointerSizeInBits(AS);
Offset = (Offset << SExtAmount) >> SExtAmount;
}
@@ -1378,7 +1380,7 @@ getOpenCLAlignment(const DataLayout *TD,
const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
if (FTy)
- return TD->getPointerPrefAlignment();
+ return TD->getPointerPrefAlignment(0);
return TD->getPrefTypeAlignment(Ty);
}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index dbfc660687e..7519b4a0831 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -72,7 +72,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DL(Subtarget.getDataLayout()),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit),
+ STTI(&TLInfo)
/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index d58a0768581..11bc9d4fa69 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -25,6 +25,7 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -44,6 +45,9 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
// Hold Strings that can be free'd all together with NVPTXTargetMachine
ManagedStringPool ManagedStrPool;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
+
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
// bool DisableVerify, MCContext *&OutCtx);
@@ -72,6 +76,12 @@ public:
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
//virtual bool addInstSelector(PassManagerBase &PM,
// CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 914a9b0dcea..d8abd9fba07 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -439,7 +439,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
const DataLayout *TD = TM.getDataLayout();
- bool isPPC64 = TD->getPointerSizeInBits() == 64;
+ bool isPPC64 = TD->getPointerSizeInBits(0) == 64;
if (isPPC64 && !TOC.empty()) {
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
@@ -451,8 +451,8 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
- OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
- "[TC]," + I->first->getName());
+ MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+ OutStreamer.EmitTCEntry(*S);
}
}
@@ -545,7 +545,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64;
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
@@ -640,7 +640,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64;
// Darwin/PPC always uses mach-o.
const TargetLoweringObjectFileMachO &TLOFMacho =
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b1c02e57f88..caf7bf2be79 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -50,6 +50,11 @@ static const uint16_t VRRegNo[] = {
/// to manipulate the VRSAVE register, even though it uses vector registers.
/// This can happen when the only registers used are known to be live in or out
/// of the function. Remove all of the VRSAVE related code from the function.
+/// FIXME: The removal of the code results in a compile failure at -O0 when the
+/// function contains a function call, as the GPR containing original VRSAVE
+/// contents is spilled and reloaded around the call. Without the prolog code,
+/// the spill instruction refers to an undefined register. This code needs
+/// to account for all uses of that GPR.
static void RemoveVRSaveCode(MachineInstr *MI) {
MachineBasicBlock *Entry = MI->getParent();
MachineFunction *MF = Entry->getParent();
@@ -283,12 +288,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
- for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
- if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
- HandleVRSaveUpdate(MBBI, TII);
- break;
+ if (!Subtarget.isSVR4ABI())
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
}
- }
// Move MBBI back to the beginning of the function.
MBBI = MBB.begin();
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b52452ce89b..6195441cfc0 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -53,7 +53,9 @@ namespace {
GlobalBaseReg = 0;
SelectionDAGISel::runOnMachineFunction(MF);
- InsertVRSaveCode(MF);
+ if (!PPCSubTarget.isSVR4ABI())
+ InsertVRSaveCode(MF);
+
return true;
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 64bbcdfa94e..c18250a78f7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2035,9 +2035,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
// All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize==1 || ObjSize==2) {
- CurArgOffset = CurArgOffset + (4 - ObjSize);
- }
+ if (ObjSize < PtrByteSize)
+ CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
// The value of the object is its address.
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -2087,7 +2086,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
++GPR_idx;
ArgOffset += PtrByteSize;
} else {
- ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ ArgOffset += ArgSize - j;
break;
}
}
@@ -2142,6 +2141,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
++FPR_idx;
} else {
needsLoad = true;
+ ArgSize = PtrByteSize;
}
ArgOffset += 8;
@@ -3638,12 +3638,13 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
ArgOffset += PtrByteSize;
} else {
- SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
- // This must go outside the CALLSEQ_START..END.
+ // The MEMCPY must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
CallSeqStart.getNode()->getOperand(1));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
@@ -3652,6 +3653,25 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
ArgOffset += PtrByteSize;
}
continue;
+ } else if (isSVR4ABI && GPR_idx == NumGPRs && Size < 8) {
+ // Case: Size is 3, 5, 6, or 7 for SVR4 and we're out of registers.
+ // This is the same case as 1, 2, and 4 for SVR4 with no registers.
+ // FIXME: Separate into 64-bit SVR4 and Darwin versions of this
+ // function, and combine the duplicated code chunks.
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // The MEMCPY must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ ArgOffset += PtrByteSize;
+ continue;
}
// Copy entire object into memory. There are cases where gcc-generated
// code assumes it is there, even if it could be put entirely into
@@ -3786,6 +3806,13 @@ PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
++GPR_idx;
}
} else {
+ // Single-precision floating-point values are mapped to the
+ // second (rightmost) word of the stack doubleword.
+ if (Arg.getValueType() == MVT::f32 && isPPC64 && isSVR4ABI) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ }
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
isPPC64, isTailCall, false, MemOpChains,
TailCallArguments, dl);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d2df6645bb0..d9d68446f53 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -570,12 +570,15 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// STVX VAL, 0, R0
//
// FIXME: We use R0 here, because it isn't available for RA.
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+ unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
FrameIdx, 0, 0));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
.addReg(SrcReg, getKillRegState(isKill))
- .addReg(PPC::R0)
- .addReg(PPC::R0));
+ .addReg(GPR0)
+ .addReg(GPR0));
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -707,10 +710,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
// Dest = LVX 0, R0
//
// FIXME: We use R0 here, because it isn't available for RA.
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+ unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
- .addReg(PPC::R0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
+ .addReg(GPR0));
} else {
llvm_unreachable("Unknown regclass!");
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 459c3589d3f..d1232114732 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -498,7 +498,7 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
} else if (CRSpillFrameIdx) {
FrameIdx = CRSpillFrameIdx;
} else {
- MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
+ MachineFrameInfo *MFI = (const_cast<MachineFunction &>(MF)).getFrameInfo();
FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
CRSpillFrameIdx = FrameIdx;
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 5f39b8d2c29..b8613834753 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -43,7 +43,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ InstrItins(Subtarget.getInstrItineraryData()),
+ STTI(&TLInfo){
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 02d69fd15d1..c168433a71b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -21,6 +21,7 @@
#include "PPCISelLowering.h"
#include "PPCSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
namespace llvm {
@@ -36,6 +37,8 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -63,6 +66,12 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 8b7559c2f9e..1d8cc771ddf 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayout()),
InstrInfo(Subtarget),
TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) {
+ FrameLowering(Subtarget),STTI(&TLInfo) {
}
namespace {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index c9f2d68eb19..0fbe2d7cda3 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -22,6 +22,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -32,6 +33,8 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -52,6 +55,12 @@ public:
virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index b0b5c875b82..393178a4692 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -26,6 +26,7 @@ using namespace llvm;
void llvm::initializeTarget(PassRegistry &Registry) {
initializeDataLayoutPass(Registry);
initializeTargetLibraryInfoPass(Registry);
+ initializeTargetTransformInfoPass(Registry);
}
void LLVMInitializeTarget(LLVMPassRegistryRef R) {
@@ -55,13 +56,21 @@ LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
}
unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
- return unwrap(TD)->getPointerSize();
+ return unwrap(TD)->getPointerSize(0);
+}
+
+unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) {
+ return unwrap(TD)->getPointerSize(AS);
}
LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
}
+LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
+ return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS));
+}
+
unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
}
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
new file mode 100644
index 00000000000..1cb5edab9d0
--- /dev/null
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -0,0 +1,43 @@
+// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const {
+ return TLI->isLegalAddImmediate(imm);
+}
+
+bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const {
+ return TLI->isLegalICmpImmediate(imm);
+}
+
+bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ return TLI->isLegalAddressingMode(AM, Ty);
+}
+
+bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ return TLI->isTruncateFree(Ty1, Ty2);
+}
+
+bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const {
+ EVT T = TLI->getValueType(Ty);
+ return TLI->isTypeLegal(T);
+}
+
+unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const {
+ return TLI->getJumpBufAlignment();
+}
+
+unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
+ return TLI->getJumpBufSize();
+}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index df34359a661..454664e3ed1 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -40,8 +40,8 @@ private:
bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
- bool matchingInlineAsm = false) {
- if (matchingInlineAsm) return true;
+ bool MatchingInlineAsm = false) {
+ if (MatchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
}
@@ -63,14 +63,10 @@ private:
bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- bool MatchAndEmitInstruction(SMLoc IDLoc,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
- bool MatchInstruction(SMLoc IDLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
- MatchInstMapAndConstraintsImpl &MapAndConstraints,
- unsigned &OrigErrorInfo, bool matchingInlineAsm = false);
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
@@ -756,6 +752,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getParser().ParseExpression(Disp, End)) return 0;
+ End = Parser.getTok().getLoc();
return X86Operand::CreateMem(Disp, Start, End, Size);
}
@@ -1520,29 +1517,18 @@ processInstruction(MCInst &Inst,
}
bool X86AsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out) {
- unsigned Kind;
- unsigned Opcode;
- unsigned ErrorInfo;
- MatchInstMapAndConstraints MapAndConstraints;
- bool Error = MatchInstruction(IDLoc, Operands, Out, Kind, Opcode,
- MapAndConstraints, ErrorInfo);
- return Error;
-}
-
-bool X86AsmParser::
-MatchInstruction(SMLoc IDLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
- SmallVectorImpl<std::pair< unsigned, std::string > > &MapAndConstraints,
- unsigned &OrigErrorInfo, bool matchingInlineAsm) {
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
+ // Clear the opcode.
+ Opcode = ~0x0;
+
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
@@ -1554,7 +1540,7 @@ MatchInstruction(SMLoc IDLoc,
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
- if (!matchingInlineAsm)
+ if (!MatchingInlineAsm)
Out.EmitInstruction(Inst);
const char *Repl =
@@ -1577,26 +1563,26 @@ MatchInstruction(SMLoc IDLoc,
MCInst Inst;
// First, try a direct match.
- switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints,
- OrigErrorInfo, matchingInlineAsm,
+ switch (MatchInstructionImpl(Operands, Inst,
+ ErrorInfo, MatchingInlineAsm,
isParsingIntelSyntax())) {
default: break;
case Match_Success:
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other.
- if (!matchingInlineAsm)
+ if (!MatchingInlineAsm)
while (processInstruction(Inst, Operands))
;
Inst.setLoc(IDLoc);
- if (!matchingInlineAsm)
+ if (!MatchingInlineAsm)
Out.EmitInstruction(Inst);
Opcode = Inst.getOpcode();
return false;
case Match_MissingFeature:
Error(IDLoc, "instruction requires a CPU feature not currently enabled",
- EmptyRanges, matchingInlineAsm);
+ EmptyRanges, MatchingInlineAsm);
return true;
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
@@ -1629,24 +1615,18 @@ MatchInstruction(SMLoc IDLoc,
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
unsigned Match1, Match2, Match3, Match4;
- unsigned tKind;
- MatchInstMapAndConstraints tMapAndConstraints[4];
- Match1 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[0],
- ErrorInfoIgnore, isParsingIntelSyntax());
- if (Match1 == Match_Success) Kind = tKind;
+ Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
Tmp[Base.size()] = Suffixes[1];
- Match2 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[1],
- ErrorInfoIgnore, isParsingIntelSyntax());
- if (Match2 == Match_Success) Kind = tKind;
+ Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
Tmp[Base.size()] = Suffixes[2];
- Match3 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[2],
- ErrorInfoIgnore, isParsingIntelSyntax());
- if (Match3 == Match_Success) Kind = tKind;
+ Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
Tmp[Base.size()] = Suffixes[3];
- Match4 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[3],
- ErrorInfoIgnore, isParsingIntelSyntax());
- if (Match4 == Match_Success) Kind = tKind;
+ Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
// Restore the old token.
Op->setTokenValue(Base);
@@ -1659,10 +1639,9 @@ MatchInstruction(SMLoc IDLoc,
(Match3 == Match_Success) + (Match4 == Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
- if (!matchingInlineAsm)
+ if (!MatchingInlineAsm)
Out.EmitInstruction(Inst);
Opcode = Inst.getOpcode();
- // FIXME: Handle the map and constraints.
return false;
}
@@ -1689,7 +1668,7 @@ MatchInstruction(SMLoc IDLoc,
OS << "'" << Base << MatchChars[i] << "'";
}
OS << ")";
- Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm);
+ Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
return true;
}
@@ -1700,28 +1679,28 @@ MatchInstruction(SMLoc IDLoc,
if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
- ArrayRef<SMRange> Ranges = matchingInlineAsm ? EmptyRanges :
+ ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
Op->getLocRange();
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
- Ranges, matchingInlineAsm);
+ Ranges, MatchingInlineAsm);
}
// Recover location info for the operand if we know which was the problem.
- if (OrigErrorInfo != ~0U) {
- if (OrigErrorInfo >= Operands.size())
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction",
- EmptyRanges, matchingInlineAsm);
+ EmptyRanges, MatchingInlineAsm);
- X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
+ X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
if (Operand->getStartLoc().isValid()) {
SMRange OperandRange = Operand->getLocRange();
return Error(Operand->getStartLoc(), "invalid operand for instruction",
- OperandRange, matchingInlineAsm);
+ OperandRange, MatchingInlineAsm);
}
}
return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
- matchingInlineAsm);
+ MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
@@ -1729,7 +1708,7 @@ MatchInstruction(SMLoc IDLoc,
if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
(Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
Error(IDLoc, "instruction requires a CPU feature not currently enabled",
- EmptyRanges, matchingInlineAsm);
+ EmptyRanges, MatchingInlineAsm);
return true;
}
@@ -1738,13 +1717,13 @@ MatchInstruction(SMLoc IDLoc,
if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
(Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
Error(IDLoc, "invalid operand for instruction", EmptyRanges,
- matchingInlineAsm);
+ MatchingInlineAsm);
return true;
}
// If all of these were an outright failure, report it in a useless way.
Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
- EmptyRanges, matchingInlineAsm);
+ EmptyRanges, MatchingInlineAsm);
return true;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 3809f3d3853..0ca1209449f 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -307,7 +307,9 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
};
// This CPU doesnt support long nops. If needed add more.
- if (CPU == "geode") {
+ // FIXME: Can we get this from the subtarget somehow?
+ if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" ||
+ CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") {
for (uint64_t i = 0; i < Count; ++i)
OW->Write8(0x90);
return true;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index c704ca17013..6b8385db6c6 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
@@ -693,7 +692,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(), 0);
+ TD->getPointerSize(0), 0);
}
Stubs.clear();
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 8acef9dc7ba..e7f817e3a98 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -317,7 +317,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
- int stackGrowth = -TD->getPointerSize();
+ int stackGrowth = -TD->getPointerSize(0);
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
@@ -717,7 +717,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
const DataLayout *TD = MF.getTarget().getDataLayout();
uint64_t NumBytes = 0;
- int stackGrowth = -TD->getPointerSize();
+ int stackGrowth = -TD->getPointerSize(0);
if (HasFP) {
// Calculate required stack adjustment.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5a19f8ab981..0efeef20f2b 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1302,7 +1302,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
// that are not a MemSDNode, and thus don't have proper addrspace info.
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
- Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+ Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
+ Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
unsigned AddrSpace =
cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
// AddrSpace 256 -> GS, 257 -> FS.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7e43e5432d8..2f09e9e6ff1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -457,6 +457,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, not
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// Darwin ABI issue.
setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
@@ -939,6 +947,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
@@ -2649,7 +2660,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
- uint64_t SlotSize = TD->getPointerSize();
+ uint64_t SlotSize = TD->getPointerSize(0);
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
@@ -3017,7 +3028,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = TD->getPointerSize();
+ uint64_t SlotSize = TD->getPointerSize(0);
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
false);
FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -5161,86 +5172,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
-// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
-// constraint of matching input/output vector elements.
-SDValue
-X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- SDNode *N = Op.getNode();
- EVT VT = Op.getValueType();
- unsigned NumElts = Op.getNumOperands();
-
- // Check supported types and sub-targets.
- //
- // Only v2f32 -> v2f64 needs special handling.
- if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
- return SDValue();
-
- SDValue VecIn;
- EVT VecInVT;
- SmallVector<int, 8> Mask;
- EVT SrcVT = MVT::Other;
-
- // Check the patterns could be translated into X86vfpext.
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue In = N->getOperand(i);
- unsigned Opcode = In.getOpcode();
-
- // Skip if the element is undefined.
- if (Opcode == ISD::UNDEF) {
- Mask.push_back(-1);
- continue;
- }
-
- // Quit if one of the elements is not defined from 'fpext'.
- if (Opcode != ISD::FP_EXTEND)
- return SDValue();
-
- // Check how the source of 'fpext' is defined.
- SDValue L2In = In.getOperand(0);
- EVT L2InVT = L2In.getValueType();
-
- // Check the original type
- if (SrcVT == MVT::Other)
- SrcVT = L2InVT;
- else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
- return SDValue();
-
- // Check whether the value being 'fpext'ed is extracted from the same
- // source.
- Opcode = L2In.getOpcode();
-
- // Quit if it's not extracted with a constant index.
- if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(L2In.getOperand(1)))
- return SDValue();
-
- SDValue ExtractedFromVec = L2In.getOperand(0);
-
- if (VecIn.getNode() == 0) {
- VecIn = ExtractedFromVec;
- VecInVT = ExtractedFromVec.getValueType();
- } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
- return SDValue();
-
- Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
- }
-
- // Quit if all operands of BUILD_VECTOR are undefined.
- if (!VecIn.getNode())
- return SDValue();
-
- // Fill the remaining mask as undef.
- for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
- Mask.push_back(-1);
-
- return DAG.getNode(X86ISD::VFPEXT, DL, VT,
- DAG.getVectorShuffle(VecInVT, DL,
- VecIn, DAG.getUNDEF(VecInVT),
- &Mask[0]));
-}
-
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5273,10 +5204,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (Broadcast.getNode())
return Broadcast;
- SDValue FpExt = LowerVectorFpExtend(Op, DAG);
- if (FpExt.getNode())
- return FpExt;
-
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
@@ -7724,7 +7651,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
false, false, false, 0);
- SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
+ SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize(0)),
getPointerTy());
IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
@@ -8215,6 +8142,20 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
return FIST;
}
+SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue In = Op.getOperand(0);
+ EVT SVT = In.getValueType();
+
+ assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
+ In, DAG.getUNDEF(SVT)));
+}
+
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
@@ -9215,6 +9156,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // X86 doesn't have an i8 cmov. If both operands are the result of a truncate
+ // widen the cmov and push the truncate through. This avoids introducing a new
+ // branch during isel and doesn't add any extensions.
+ if (Op.getValueType() == MVT::i8 &&
+ Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+ SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
+ if (T1.getValueType() == T2.getValueType() &&
+ // Blacklist CopyFromReg to avoid partial register stalls.
+ T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
+ SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
+ SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
+ return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
+ }
+ }
+
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
@@ -10345,7 +10301,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- DAG.getConstant(TD->getPointerSize(),
+ DAG.getConstant(TD->getPointerSize(0),
Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
@@ -10377,7 +10333,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
- return DAG.getIntPtrConstant(2*TD->getPointerSize());
+ return DAG.getIntPtrConstant(2*TD->getPointerSize(0));
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -10392,7 +10348,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
- DAG.getIntPtrConstant(TD->getPointerSize()));
+ DAG.getIntPtrConstant(TD->getPointerSize(0)));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
@@ -10403,6 +10359,21 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
}
+SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
return Op.getOperand(0);
}
@@ -11407,6 +11378,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -11426,6 +11398,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -11535,6 +11509,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::FP_ROUND: {
+ SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ Results.push_back(V);
+ return;
+ }
case ISD::READCYCLECOUNTER: {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
@@ -11713,6 +11692,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
+ case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
+ case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
@@ -11729,6 +11710,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
+ case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -12389,12 +12371,9 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
// Hi
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp) {
+ if (i == X86::AddrDisp)
MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
- // Don't forget to transfer the target flag.
- MachineOperand &MO = MIB->getOperand(MIB->getNumOperands()-1);
- MO.setTargetFlags(MI->getOperand(MemOpndSlot + i).getTargetFlags());
- } else
+ else
MIB.addOperand(MI->getOperand(MemOpndSlot + i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -13261,6 +13240,173 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
}
MachineBasicBlock *
+X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg;
+ unsigned MemOpndSlot = 0;
+
+ unsigned CurOp = 0;
+
+ DstReg = MI->getOperand(CurOp++).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MemOpndSlot = CurOp;
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // buf[Label_Offset] = ljMBB
+ // SjLjSetup restoreMBB
+ //
+ // mainMBB:
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+ // restoreMBB:
+ // v_restore = 1
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+ MF->push_back(restoreMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ unsigned PtrImmStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
+ const int64_t Label_Offset = 1 * PVT.getStoreSize();
+
+ // Store IP
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrImmStoreOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), Label_Offset);
+ else
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ }
+ MIB.addMBB(restoreMBB);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
+ .addMBB(restoreMBB);
+ MIB.addRegMask(RegInfo->getNoPreservedMask());
+ thisMBB->addSuccessor(mainMBB);
+ thisMBB->addSuccessor(restoreMBB);
+
+ // mainMBB:
+ // EAX = 0
+ BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(restoreMBB);
+
+ // restoreMBB:
+ BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+ BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB);
+ restoreMBB->addSuccessor(sinkMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
+ unsigned SP = RegInfo->getStackRegister();
+
+ MachineInstrBuilder MIB;
+
+ const int64_t Label_Offset = 1 * PVT.getStoreSize();
+ const int64_t SP_Offset = 2 * PVT.getStoreSize();
+
+ unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
+ unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
+
+ // Reload FP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Reload IP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(i), Label_Offset);
+ else
+ MIB.addOperand(MI->getOperand(i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Reload SP
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(i), SP_Offset);
+ else
+ MIB.addOperand(MI->getOperand(i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Jump
+ BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
@@ -13475,6 +13621,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VAARG_64:
return EmitVAARG64WithCustomInserter(MI, BB);
+
+ case X86::EH_SjLj_SetJmp32:
+ case X86::EH_SjLj_SetJmp64:
+ return emitEHSjLjSetJmp(MI, BB);
+
+ case X86::EH_SjLj_LongJmp32:
+ case X86::EH_SjLj_LongJmp64:
+ return emitEHSjLjLongJmp(MI, BB);
}
}
@@ -14478,6 +14632,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
+ std::swap(TrueOp, FalseOp);
}
// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
@@ -14560,6 +14715,46 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
}
}
}
+
+ // Handle these cases:
+ // (select (x != c), e, c) -> select (x != c), e, x),
+ // (select (x == c), c, e) -> select (x == c), x, e)
+ // where the c is an integer constant, and the "select" is the combination
+ // of CMOV and CMP.
+ //
+ // The rationale for this change is that the conditional-move from a constant
+ // needs two instructions, however, conditional-move from a register needs
+ // only one instruction.
+ //
+ // CAVEAT: By replacing a constant with a symbolic value, it may obscure
+ // some instruction-combining opportunities. This opt needs to be
+ // postponed as late as possible.
+ //
+ if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
+ // the DCI.xxxx conditions are provided to postpone the optimization as
+ // late as possible.
+
+ ConstantSDNode *CmpAgainst = 0;
+ if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
+ (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+
+ if (CC == X86::COND_NE &&
+ CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueOp, FalseOp);
+ }
+
+ if (CC == X86::COND_E &&
+ CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
+ SDValue Ops[] = { FalseOp, Cond.getOperand(0),
+ DAG.getConstant(CC, MVT::i8), Cond };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops,
+ array_lengthof(Ops));
+ }
+ }
+ }
+
return SDValue();
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 1cae7ed2681..40e966ad676 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -217,6 +217,12 @@ namespace llvm {
// EH_RETURN - Exception Handling helpers.
EH_RETURN,
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
/// TC_RETURN - Tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -233,6 +239,9 @@ namespace llvm {
// VFPEXT - Vector FP extend.
VFPEXT,
+ // VFPROUND - Vector FP round.
+ VFPROUND,
+
// VSHL, VSRL - 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -788,6 +797,7 @@ namespace llvm {
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
@@ -806,6 +816,8 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
@@ -818,8 +830,6 @@ namespace llvm {
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
-
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -904,6 +914,12 @@ namespace llvm {
MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index f27b6f7f53a..9e6f27988f7 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -165,6 +165,33 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
}
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in {
+ def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(X86eh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+ def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(X86eh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+ }
+}
+
+let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
+ def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
+
//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 90354354367..46281efa571 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -93,6 +93,9 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>]>>;
+def X86vfpround: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>]>>;
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 5c6084fe008..2f637685b3f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -216,6 +216,14 @@ def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
+def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2aa4f3f4dbb..cc1291a8a0f 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2125,6 +2125,10 @@ let Predicates = [HasAVX] in {
(VCVTDQ2PSYrm addr:$src)>;
// Match fround and fextend for 128/256-bit conversions
+ def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+ (VCVTPD2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ (VCVTPD2PSXrm addr:$src)>;
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
@@ -2139,7 +2143,12 @@ let Predicates = [HasAVX] in {
}
let Predicates = [UseSSE2] in {
- // Match fextend for 128 conversions
+ // Match fround and fextend for 128 conversions
+ def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+ (CVTPD2PSrr VR128:$src)>;
+ def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ (CVTPD2PSrm addr:$src)>;
+
def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
(CVTPS2PDrr VR128:$src)>;
}
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 6e4db73c3a9..c44549c30ac 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
@@ -29,6 +28,31 @@
#include "llvm/ADT/SmallString.h"
using namespace llvm;
+namespace {
+
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class X86MCInstLower {
+ MCContext &Ctx;
+ Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
+ X86AsmPrinter &AsmPrinter;
+public:
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+ MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+} // end anonymous namespace
+
X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
X86AsmPrinter &asmprinter)
: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
deleted file mode 100644
index b4d4cfd301a..00000000000
--- a/lib/Target/X86/X86MCInstLower.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_MCINSTLOWER_H
-#define X86_MCINSTLOWER_H
-
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
- class MCAsmInfo;
- class MCContext;
- class MCInst;
- class MCOperand;
- class MCSymbol;
- class MachineInstr;
- class MachineFunction;
- class MachineModuleInfoMachO;
- class MachineOperand;
- class Mangler;
- class TargetMachine;
- class X86AsmPrinter;
-
-/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
- MCContext &Ctx;
- Mangler *Mang;
- const MachineFunction &MF;
- const TargetMachine &TM;
- const MCAsmInfo &MAI;
- X86AsmPrinter &AsmPrinter;
-public:
- X86MCInstLower(Mangler *mang, const MachineFunction &MF,
- X86AsmPrinter &asmprinter);
-
- void Lower(const MachineInstr *MI, MCInst &OutMI) const;
-
- MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
- MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
-private:
- MachineModuleInfoMachO &getMachOMMI() const;
-};
-
-}
-
-#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c840ea21a89..4bcf6b1f19e 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -261,6 +261,11 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
return CSR_64_RegMask;
}
+const uint32_t*
+X86RegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 0287fa22062..7932ede8dd6 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -100,6 +100,7 @@ public:
/// callee-save registers on this target.
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getNoPreservedMask() const;
/// getReservedRegs - Returns a bitset indexed by physical register number
/// indicating if a register is a special register that has particular uses and
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f8cced885d1..655ede79ba3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -48,7 +48,8 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
- JITInfo(*this) {
+ JITInfo(*this),
+ STTI(&TLInfo) {
}
void X86_64TargetMachine::anchor() { }
@@ -64,7 +65,8 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
- JITInfo(*this) {
+ JITInfo(*this),
+ STTI(&TLInfo) {
}
/// X86TargetMachine ctor - Create an X86 target.
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 5301299c1f3..4bad695b4c4 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -25,6 +25,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -85,6 +86,8 @@ class X86_32TargetMachine : public X86TargetMachine {
X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -103,6 +106,12 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
};
/// X86_64TargetMachine - X86 64-bit target machine.
@@ -114,6 +123,8 @@ class X86_64TargetMachine : public X86TargetMachine {
X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -132,6 +143,12 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index c71d978ad81..0b7e3e10d4b 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
InstrInfo(),
FrameLowering(Subtarget),
TLInfo(*this),
- TSInfo(*this) {
+ TSInfo(*this), STTI(&TLInfo) {
}
namespace {
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index f7fec29f544..c60c6a37f95 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -20,6 +20,7 @@
#include "XCoreISelLowering.h"
#include "XCoreSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/DataLayout.h"
namespace llvm {
@@ -31,6 +32,8 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreFrameLowering FrameLowering;
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
+ ScalarTargetTransformImpl STTI;
+ VectorTargetTransformImpl VTTI;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -53,6 +56,12 @@ public:
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
+ virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
+ return &STTI;
+ }
+ virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
+ return &VTTI;
+ }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 6f6ff9ca2d5..8a0274b5ff7 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -518,8 +518,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
const AttrListPtr &PAL = F->getAttributes();
// Add any return attributes.
- if (Attributes attrs = PAL.getRetAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+ Attributes attrs = PAL.getRetAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ attrs));
// First, determine the new argument list
unsigned ArgIndex = 1;
@@ -535,7 +537,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
} else if (!ArgsToPromote.count(I)) {
// Unchanged argument
Params.push_back(I->getType());
- if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+ Attributes attrs = PAL.getParamAttributes(ArgIndex);
+ if (attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
@@ -588,8 +591,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
// Add any function attributes.
- if (Attributes attrs = PAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+ attrs = PAL.getFnAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ attrs));
Type *RetTy = FTy->getReturnType();
@@ -634,8 +639,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
const AttrListPtr &CallPAL = CS.getAttributes();
// Add any return attributes.
- if (Attributes attrs = CallPAL.getRetAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+ Attributes attrs = CallPAL.getRetAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ attrs));
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
@@ -646,7 +653,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
Args.push_back(*AI); // Unmodified argument
- if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
} else if (ByValArgsToTransform.count(I)) {
@@ -707,13 +715,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Push any varargs arguments on the list.
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
Args.push_back(*AI);
- if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
}
// Add any function attributes.
- if (Attributes attrs = CallPAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+ attrs = CallPAL.getFnAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ attrs));
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index b107669b177..fc22548db70 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -276,8 +276,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
SmallVector<AttributeWithIndex, 8> AttributesVec;
for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
AttributesVec.push_back(PAL.getSlot(i));
- if (Attributes FnAttrs = PAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ Attributes FnAttrs = PAL.getFnAttributes();
+ if (FnAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
PAL = AttrListPtr::get(AttributesVec);
}
@@ -762,13 +764,17 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs &= ~Attributes::typeIncompatible(NRetTy);
+ RAttrs =
+ Attributes::get(NRetTy->getContext(), AttrBuilder(RAttrs).
+ removeAttributes(Attributes::typeIncompatible(NRetTy)));
else
- assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0
- && "Return attributes no longer compatible?");
+ assert(!AttrBuilder(RAttrs).
+ hasAttributes(Attributes::typeIncompatible(NRetTy)) &&
+ "Return attributes no longer compatible?");
- if (RAttrs)
- AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+ if (RAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ RAttrs));
// Remember which arguments are still alive.
SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -785,7 +791,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Get the original parameter attributes (skipping the first one, that is
// for the return value.
- if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+ Attributes Attrs = PAL.getParamAttributes(i + 1);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
} else {
++NumArgumentsEliminated;
@@ -795,7 +802,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
if (FnAttrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
// Reconstruct the AttributesList based on the vector we constructed.
AttrListPtr NewPAL = AttrListPtr::get(AttributesVec);
@@ -831,9 +839,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Attributes RAttrs = CallPAL.getRetAttributes();
Attributes FnAttrs = CallPAL.getFnAttributes();
// Adjust in case the function was changed to return void.
- RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType());
- if (RAttrs)
- AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+ RAttrs =
+ Attributes::get(NF->getContext(), AttrBuilder(RAttrs).
+ removeAttributes(Attributes::typeIncompatible(NF->getReturnType())));
+ if (RAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ RAttrs));
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
@@ -845,19 +856,22 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
}
// Push any varargs arguments on the list. Don't forget their attributes.
for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
Args.push_back(*I);
- if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
}
if (FnAttrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
// Reconstruct the AttributesList based on the vector we constructed.
AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 43e12d44441..d8f374c330e 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -212,15 +212,17 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
MadeChange = true;
// Clear out any existing attributes.
- Attributes::Builder B;
+ AttrBuilder B;
B.addAttribute(Attributes::ReadOnly)
.addAttribute(Attributes::ReadNone);
- F->removeAttribute(~0, Attributes::get(B));
+ F->removeAttribute(AttrListPtr::FunctionIndex,
+ Attributes::get(F->getContext(), B));
// Add in the new attribute.
B.clear();
B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone);
- F->addAttribute(~0, Attributes::get(B));
+ F->addAttribute(AttrListPtr::FunctionIndex,
+ Attributes::get(F->getContext(), B));
if (ReadsMemory)
++NumReadOnly;
@@ -355,7 +357,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
ArgumentGraph AG;
- Attributes::Builder B;
+ AttrBuilder B;
B.addAttribute(Attributes::NoCapture);
// Check each function in turn, determining which pointer arguments are not
@@ -379,7 +381,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
A != E; ++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(Attributes::get(B));
+ A->addAttr(Attributes::get(F->getContext(), B));
++NumNoCapture;
Changed = true;
}
@@ -394,7 +396,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(Attributes::get(B));
+ A->addAttr(Attributes::get(F->getContext(), B));
++NumNoCapture;
Changed = true;
} else {
@@ -427,7 +429,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
// eg. "void f(int* x) { if (...) f(x); }"
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
- ArgumentSCC[0]->Definition->addAttr(Attributes::get(B));
+ ArgumentSCC[0]->
+ Definition->
+ addAttr(Attributes::get(ArgumentSCC[0]->Definition->getContext(), B));
++NumNoCapture;
Changed = true;
}
@@ -469,7 +473,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(Attributes::get(B));
+ A->addAttr(Attributes::get(A->getContext(), B));
++NumNoCapture;
Changed = true;
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a1b976577a7..678189b3d6c 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2061,28 +2061,26 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttrListPtr StripNest(const AttrListPtr &Attrs) {
- Attributes::Builder B;
- B.addAttribute(Attributes::Nest);
-
+static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) {
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest))
continue;
// There can be only one.
- return Attrs.removeAttr(Attrs.getSlot(i).Index, Attributes::get(B));
+ return Attrs.removeAttr(C, Attrs.getSlot(i).Index,
+ Attributes::get(C, Attributes::Nest));
}
return Attrs;
}
static void RemoveNestAttribute(Function *F) {
- F->setAttributes(StripNest(F->getAttributes()));
+ F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
if (isa<BlockAddress>(*UI))
continue;
CallSite User(cast<Instruction>(*UI));
- User.setAttributes(StripNest(User.getAttributes()));
+ User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
}
}
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 6233922db92..86c76f0c0a0 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -1,4 +1,4 @@
-//===-- Scalar.cpp --------------------------------------------------------===//
+//===-- IPO.cpp -----------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 3e598abfcf6..fb4ecbfe7b0 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -137,7 +137,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Attributes::Builder NewAttributes;
+ AttrBuilder NewAttributes;
if (!SCCMightUnwind)
NewAttributes.addAttribute(Attributes::NoUnwind);
@@ -146,7 +146,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
Function *F = (*I)->getFunction();
const AttrListPtr &PAL = F->getAttributes();
- const AttrListPtr &NPAL = PAL.addAttr(~0, Attributes::get(NewAttributes));
+ const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0,
+ Attributes::get(F->getContext(),
+ NewAttributes));
if (PAL != NPAL) {
MadeChange = true;
F->setAttributes(NPAL);
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 325bb20fbe8..41017c52879 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -18,6 +18,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/TargetFolder.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
namespace llvm {
class CallSite;
@@ -74,6 +75,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
DataLayout *TD;
TargetLibraryInfo *TLI;
bool MadeIRChange;
+ LibCallSimplifier *Simplifier;
public:
/// Worklist - All of the instructions that need to be simplified.
InstCombineWorklist Worklist;
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f92c4baeba7..5ad6f9111c8 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -111,10 +111,13 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// get the TBAA tag describing our copy.
if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
if (M->getNumOperands() == 3 &&
+ M->getOperand(0) &&
isa<ConstantInt>(M->getOperand(0)) &&
cast<ConstantInt>(M->getOperand(0))->isNullValue() &&
+ M->getOperand(1) &&
isa<ConstantInt>(M->getOperand(1)) &&
cast<ConstantInt>(M->getOperand(1))->getValue() == Size &&
+ M->getOperand(2) &&
isa<MDNode>(M->getOperand(2)))
CopyMD = cast<MDNode>(M->getOperand(2));
}
@@ -775,39 +778,6 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
return true;
}
-namespace {
-class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
- InstCombiner *IC;
-protected:
- void replaceCall(Value *With) {
- NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
- }
- bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
- if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
- return true;
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
- if (SizeCI->isAllOnesValue())
- return true;
- if (isString) {
- uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
- // If the length is 0 we don't know how long it is and so we can't
- // remove the check.
- if (Len == 0) return false;
- return SizeCI->getZExtValue() >= Len;
- }
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(
- CI->getArgOperand(SizeArgOp)))
- return SizeCI->getZExtValue() >= Arg->getZExtValue();
- }
- return false;
- }
-public:
- InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
- Instruction *NewInstruction;
-};
-} // end anonymous namespace
-
// Try to fold some different type of calls here.
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
@@ -815,9 +785,10 @@ public:
Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
if (CI->getCalledFunction() == 0) return 0;
- InstCombineFortifiedLibCalls Simplifier(this);
- Simplifier.fold(CI, TD, TLI);
- return Simplifier.NewInstruction;
+ if (Value *With = Simplifier->optimizeCall(CI))
+ return ReplaceInstUsesWith(*CI, With);
+
+ return 0;
}
static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
@@ -1036,7 +1007,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
- Attributes::Builder RAttrs = CallerPAL.getRetAttributes();
+ AttrBuilder RAttrs = CallerPAL.getRetAttributes();
if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
@@ -1067,7 +1038,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this parameter value.
Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
- if (Attrs & Attributes::typeIncompatible(ParamTy))
+ if (AttrBuilder(Attrs).
+ hasAttributes(Attributes::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
// If the parameter is passed as a byval argument, then we have to have a
@@ -1137,7 +1109,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
attrVec.reserve(NumCommonArgs);
// Get any return attributes.
- Attributes::Builder RAttrs = CallerPAL.getRetAttributes();
+ AttrBuilder RAttrs = CallerPAL.getRetAttributes();
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
@@ -1145,7 +1117,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Add the new return attributes.
if (RAttrs.hasAttributes())
- attrVec.push_back(AttributeWithIndex::get(0, Attributes::get(RAttrs)));
+ attrVec.push_back(
+ AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ Attributes::get(FT->getContext(), RAttrs)));
AI = CS.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -1159,7 +1133,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
// Add any parameter attributes.
- if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+ if (PAttrs.hasAttributes())
attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
}
@@ -1187,14 +1162,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
// Add any parameter attributes.
- if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+ if (PAttrs.hasAttributes())
attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
}
}
}
- if (Attributes FnAttrs = CallerPAL.getFnAttributes())
- attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ Attributes FnAttrs = CallerPAL.getFnAttributes();
+ if (FnAttrs.hasAttributes())
+ attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
@@ -1302,8 +1280,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// mean appending it. Likewise for attributes.
// Add any result attributes.
- if (Attributes Attr = Attrs.getRetAttributes())
- NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+ Attributes Attr = Attrs.getRetAttributes();
+ if (Attr.hasAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ Attr));
{
unsigned Idx = 1;
@@ -1323,7 +1303,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Add the original argument and attributes.
NewArgs.push_back(*I);
- if (Attributes Attr = Attrs.getParamAttributes(Idx))
+ Attr = Attrs.getParamAttributes(Idx);
+ if (Attr.hasAttributes())
NewAttrs.push_back
(AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
@@ -1332,8 +1313,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
}
// Add any function attributes.
- if (Attributes Attr = Attrs.getFnAttributes())
- NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+ Attr = Attrs.getFnAttributes();
+ if (Attr.hasAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ Attr));
// The trampoline may have been bitcast to a bogus type (FTy).
// Handle this by synthesizing a new function type, equal to FTy
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index b59210a9df1..f3f3f8f585d 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1293,15 +1293,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
+ unsigned AS = CI.getAddressSpace();
if (TD) {
if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
- TD->getPointerSizeInBits()) {
+ TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreateTrunc(CI.getOperand(0),
TD->getIntPtrType(CI.getContext()));
return new IntToPtrInst(P, CI.getType());
}
if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
- TD->getPointerSizeInBits()) {
+ TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreateZExt(CI.getOperand(0),
TD->getIntPtrType(CI.getContext()));
return new IntToPtrInst(P, CI.getType());
@@ -1368,13 +1369,14 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
+ unsigned AS = CI.getPointerAddressSpace();
if (TD) {
- if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+ if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
TD->getIntPtrType(CI.getContext()));
return new TruncInst(P, CI.getType());
}
- if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+ if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
TD->getIntPtrType(CI.getContext()));
return new ZExtInst(P, CI.getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4d5ffddc4c7..e3e5ddae80b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -365,11 +365,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// order the state machines in complexity of the generated code.
Value *Idx = GEP->getOperand(2);
+ unsigned AS = GEP->getPointerAddressSpace();
// If the index is larger than the pointer size of the target, truncate the
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds() &&
- Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+ Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS))
Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
// If the comparison is only true for one or two elements, emit direct
@@ -528,10 +529,11 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
}
}
+ unsigned AS = cast<GetElementPtrInst>(GEP)->getPointerAddressSpace();
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
if (Offset == 0) {
// Cast to intptrty in case a truncation occurs. If an extension is needed,
// we don't need to bother extending: the extension won't affect where the
@@ -1552,7 +1554,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits() ==
+ TD->getPointerSizeInBits(
+ cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) ==
cast<IntegerType>(DestTy)->getBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index edfc060888b..5356fdcba7c 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2130,6 +2130,9 @@ bool InstCombiner::runOnFunction(Function &F) {
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
+ LibCallSimplifier TheSimplifier(TD, TLI);
+ Simplifier = &TheSimplifier;
+
bool EverMadeChange = false;
// Lower dbg.declare intrinsics otherwise their value may be clobbered
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 10ab9cb6039..b566994edfc 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -148,38 +148,29 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
cl::Hidden, cl::init(-1));
namespace {
-
-/// An object of this type is created while instrumenting every function.
-struct AsanFunctionContext {
- AsanFunctionContext(Function &Function) : F(Function) { }
-
- Function &F;
-};
-
/// AddressSanitizer: instrument the code in module to find memory bugs.
-struct AddressSanitizer : public ModulePass {
+struct AddressSanitizer : public FunctionPass {
AddressSanitizer();
virtual const char *getPassName() const;
- void instrumentMop(AsanFunctionContext &AFC, Instruction *I);
- void instrumentAddress(AsanFunctionContext &AFC,
- Instruction *OrigIns, IRBuilder<> &IRB,
+ void instrumentMop(Instruction *I);
+ void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
Value *Addr, uint32_t TypeSize, bool IsWrite);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex);
- bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
- void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
- Instruction *OrigIns, Value *Addr,
+ bool instrumentMemIntrinsic(MemIntrinsic *MI);
+ void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
Value *Size,
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
- bool handleFunction(Module &M, Function &F);
+ bool runOnFunction(Function &F);
void createInitializerPoisonCalls(Module &M,
Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
- bool poisonStackInFunction(Module &M, Function &F);
- virtual bool runOnModule(Module &M);
+ bool poisonStackInFunction(Function &F);
+ virtual bool doInitialization(Module &M);
+ virtual bool doFinalization(Module &M);
bool insertGlobalRedzones(Module &M);
static char ID; // Pass identification, replacement for typeid
@@ -216,6 +207,8 @@ struct AddressSanitizer : public ModulePass {
Type *IntptrPtrTy;
Function *AsanCtorFunction;
Function *AsanInitFunction;
+ Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+ Function *AsanHandleNoReturnFunc;
Instruction *CtorInsertBefore;
OwningPtr<BlackList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
@@ -230,8 +223,8 @@ char AddressSanitizer::ID = 0;
INITIALIZE_PASS(AddressSanitizer, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
false, false)
-AddressSanitizer::AddressSanitizer() : ModulePass(ID) { }
-ModulePass *llvm::createAddressSanitizerPass() {
+AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { }
+FunctionPass *llvm::createAddressSanitizerPass() {
return new AddressSanitizer();
}
@@ -295,12 +288,12 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
}
void AddressSanitizer::instrumentMemIntrinsicParam(
- AsanFunctionContext &AFC, Instruction *OrigIns,
+ Instruction *OrigIns,
Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
// Check the first byte.
{
IRBuilder<> IRB(InsertBefore);
- instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite);
+ instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
}
// Check the last byte.
{
@@ -310,13 +303,12 @@ void AddressSanitizer::instrumentMemIntrinsicParam(
SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
- instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
+ instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
}
}
// Instrument memset/memmove/memcpy
-bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
- MemIntrinsic *MI) {
+bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
Value *Dst = MI->getDest();
MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
Value *Src = MemTran ? MemTran->getSource() : 0;
@@ -335,9 +327,9 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
}
- instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
+ instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
if (Src)
- instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false);
+ instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
return true;
}
@@ -391,7 +383,7 @@ bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
return DynamicallyInitializedGlobals.count(G);
}
-void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
+void AddressSanitizer::instrumentMop(Instruction *I) {
bool IsWrite = false;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
assert(Addr);
@@ -424,7 +416,7 @@ void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
}
IRBuilder<> IRB(I);
- instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite);
+ instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
}
// Validate the result of Module::getOrInsertFunction called for an interface
@@ -469,8 +461,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}
-void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
- Instruction *OrigIns,
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
IRBuilder<> &IRB, Value *Addr,
uint32_t TypeSize, bool IsWrite) {
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
@@ -494,7 +485,8 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
- BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+ BasicBlock *CrashBlock =
+ BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
CrashTerm = new UnreachableInst(*C, CrashBlock);
BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
ReplaceInstWithInst(CheckTerm, NewTerm);
@@ -734,15 +726,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
}
// virtual
-bool AddressSanitizer::runOnModule(Module &M) {
+bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
TD = getAnalysisIfAvailable<DataLayout>();
+
if (!TD)
return false;
BL.reset(new BlackList(ClBlackListFile));
C = &(M.getContext());
- LongSize = TD->getPointerSizeInBits();
+ LongSize = TD->getPointerSizeInBits(0);
IntptrTy = Type::getIntNTy(*C, LongSize);
IntptrPtrTy = PointerType::get(IntptrTy, 0);
@@ -771,6 +764,15 @@ bool AddressSanitizer::runOnModule(Module &M) {
M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
}
}
+
+ AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
+ AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackFreeName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy, NULL));
+ AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -797,10 +799,6 @@ bool AddressSanitizer::runOnModule(Module &M) {
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
RedzoneSize = std::max(32, (int)(1 << MappingScale));
- bool Res = false;
-
- if (ClGlobals)
- Res |= insertGlobalRedzones(M);
if (ClMappingOffsetLog >= 0) {
// Tell the run-time the current values of mapping offset and scale.
@@ -820,17 +818,20 @@ bool AddressSanitizer::runOnModule(Module &M) {
IRB.CreateLoad(asan_mapping_scale, true);
}
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- Res |= handleFunction(M, *F);
- }
-
appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
- return Res;
+ return true;
+}
+
+bool AddressSanitizer::doFinalization(Module &M) {
+ // We transform the globals at the very end so that the optimization analysis
+ // works on the original globals.
+ if (ClGlobals)
+ return insertGlobalRedzones(M);
+ return false;
}
+
bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// For each NSObject descendant having a +load method, this method is invoked
// by the ObjC runtime before any of the static constructors is called.
@@ -847,7 +848,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
return false;
}
-bool AddressSanitizer::handleFunction(Module &M, Function &F) {
+bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
@@ -899,8 +900,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
}
}
- AsanFunctionContext AFC(F);
-
// Instrument.
int NumInstrumented = 0;
for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
@@ -908,24 +907,23 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
if (isInterestingMemoryAccess(Inst, &IsWrite))
- instrumentMop(AFC, Inst);
+ instrumentMop(Inst);
else
- instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst));
+ instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
}
NumInstrumented++;
}
DEBUG(dbgs() << F);
- bool ChangedStack = poisonStackInFunction(M, F);
+ bool ChangedStack = poisonStackInFunction(F);
// We must unpoison the stack before every NoReturn call (throw, _exit, etc).
// See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
Instruction *CI = NoReturnCalls[i];
IRBuilder<> IRB(CI);
- IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName,
- IRB.getVoidTy(), NULL));
+ IRB.CreateCall(AsanHandleNoReturnFunc);
}
return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
@@ -1039,7 +1037,7 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
// compiler hoists the load of the shadow value somewhere too high.
// This causes asan to report a non-existing bug on 453.povray.
// It sounds like an LLVM bug.
-bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
+bool AddressSanitizer::poisonStackInFunction(Function &F) {
if (!ClStack) return false;
SmallVector<AllocaInst*, 16> AllocaVec;
SmallVector<Instruction*, 8> RetVec;
@@ -1089,8 +1087,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
Value *LocalStackBase = OrigStackBase;
if (DoStackMalloc) {
- Value *AsanStackMallocFunc = M.getOrInsertFunction(
- kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL);
LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
}
@@ -1126,7 +1122,7 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
ConstantInt::get(IntptrTy, LongSize/8));
BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
Value *Description = IRB.CreatePointerCast(
- createPrivateGlobalForString(M, StackDescription.str()),
+ createPrivateGlobalForString(*F.getParent(), StackDescription.str()),
IntptrTy);
IRB.CreateStore(Description, BasePlus1);
@@ -1134,13 +1130,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
Value *ShadowBase = memToShadow(LocalStackBase, IRB);
PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
- Value *AsanStackFreeFunc = NULL;
- if (DoStackMalloc) {
- AsanStackFreeFunc = M.getOrInsertFunction(
- kAsanStackFreeName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy, NULL);
- }
-
// Unpoison the stack before all ret instructions.
for (size_t i = 0, n = RetVec.size(); i < n; i++) {
Instruction *Ret = RetVec[i];
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 4d31444b764..4d8176bc6c7 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -774,8 +774,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
- if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr)
- .removeAttribute(Attributes::NoAlias).hasAttributes())
+ if (AttrBuilder(CalleeRetAttr).
+ removeAttribute(Attributes::NoAlias) !=
+ AttrBuilder(CallerRetAttr).
+ removeAttribute(Attributes::NoAlias))
continue;
// Make sure the call instruction is followed by an unconditional branch to
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 602e5a4785c..736cc05e043 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -701,6 +701,22 @@ bool DSE::HandleFree(CallInst *F) {
return MadeChange;
}
+namespace {
+ struct CouldRef {
+ typedef Value *argument_type;
+ const CallSite CS;
+ AliasAnalysis *AA;
+
+ bool operator()(Value *I) {
+ // See if the call site touches the value.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+
+ return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ }
+ };
+}
+
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block. Ex:
/// %A = alloca i32
@@ -802,26 +818,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If the call might load from any of our allocas, then any store above
// the call is live.
- SmallVector<Value*, 8> LiveAllocas;
- for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
- E = DeadStackObjects.end(); I != E; ++I) {
- // See if the call site touches it.
- AliasAnalysis::ModRefResult A =
- AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
-
- if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
- LiveAllocas.push_back(*I);
- }
+ CouldRef Pred = { CS, AA };
+ DeadStackObjects.remove_if(Pred);
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
- if (DeadStackObjects.size() == LiveAllocas.size())
+ if (DeadStackObjects.empty())
break;
- for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
- E = LiveAllocas.end(); I != E; ++I)
- DeadStackObjects.remove(*I);
-
continue;
}
@@ -858,6 +862,20 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
return MadeChange;
}
+namespace {
+ struct CouldAlias {
+ typedef Value *argument_type;
+ const AliasAnalysis::Location &LoadedLoc;
+ AliasAnalysis *AA;
+
+ bool operator()(Value *I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+ return !AA->isNoAlias(StackLoc, LoadedLoc);
+ }
+ };
+}
+
/// RemoveAccessedObjects - Check to see if the specified location may alias any
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
@@ -876,16 +894,7 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
return;
}
- SmallVector<Value*, 16> NowLive;
- for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
- E = DeadStackObjects.end(); I != E; ++I) {
- // See if the loaded location could alias the stack location.
- AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
- if (!AA->isNoAlias(StackLoc, LoadedLoc))
- NowLive.push_back(*I);
- }
-
- for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
- I != E; ++I)
- DeadStackObjects.remove(*I);
+ // Remove objects that could alias LoadedLoc.
+ CouldAlias Pred = { LoadedLoc, AA };
+ DeadStackObjects.remove_if(Pred);
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 958348d9faa..99a62dbe62f 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,7 +37,7 @@
//
// TODO: Handle multiple loops at a time.
//
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+// TODO: Should AddrMode::BaseGV be changed to a ConstantExpr
// instead of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
@@ -67,6 +67,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
@@ -74,7 +75,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
@@ -1118,7 +1118,7 @@ public:
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
- Address, ///< An address use; folding according to TargetLowering
+ Address, ///< An address use; folding according to ScalarTargetTransformInfo.
ICmpZero ///< An equality icmp with both operands folded into one.
// TODO: Add a generic icmp too?
};
@@ -1272,12 +1272,12 @@ void LSRUse::dump() const {
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const AddrMode &AM,
LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+ const ScalarTargetTransformInfo *STTI) {
switch (Kind) {
case LSRUse::Address:
// If we have low-level target information, ask the target if it can
// completely fold this address.
- if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+ if (STTI) return STTI->isLegalAddressingMode(AM, AccessTy);
// Otherwise, just guess that reg+reg addressing is legal.
return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
@@ -1300,7 +1300,7 @@ static bool isLegalUse(const AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (!TLI)
+ if (!STTI)
return false;
// We have one of:
// ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
@@ -1309,7 +1309,7 @@ static bool isLegalUse(const AddrMode &AM,
int64_t Offs = AM.BaseOffs;
if (AM.Scale == 0)
Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
- return TLI->isLegalICmpImmediate(Offs);
+ return STTI->isLegalICmpImmediate(Offs);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1330,20 +1330,20 @@ static bool isLegalUse(const AddrMode &AM,
static bool isLegalUse(AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+ const ScalarTargetTransformInfo *LTTI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
(MinOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
- if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+ if (isLegalUse(AM, Kind, AccessTy, LTTI)) {
AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
(MaxOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
- return isLegalUse(AM, Kind, AccessTy, TLI);
+ return isLegalUse(AM, Kind, AccessTy, LTTI);
}
return false;
}
@@ -1352,7 +1352,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+ const ScalarTargetTransformInfo *LTTI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
@@ -1371,14 +1371,14 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
AM.HasBaseReg = true;
}
- return isLegalUse(AM, Kind, AccessTy, TLI);
+ return isLegalUse(AM, Kind, AccessTy, LTTI);
}
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI,
+ const ScalarTargetTransformInfo *LTTI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
@@ -1402,7 +1402,7 @@ static bool isAlwaysFoldable(const SCEV *S,
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+ return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, LTTI);
}
namespace {
@@ -1502,7 +1502,7 @@ class LSRInstance {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
- const TargetLowering *const TLI;
+ const ScalarTargetTransformInfo *const STTI;
Loop *const L;
bool Changed;
@@ -1638,7 +1638,7 @@ class LSRInstance {
Pass *P);
public:
- LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+ LSRInstance(const ScalarTargetTransformInfo *ltti, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
@@ -1688,11 +1688,10 @@ void LSRInstance::OptimizeShadowIV() {
}
if (!DestTy) continue;
- if (TLI) {
+ if (STTI) {
// If target does not support DestTy natively then do not apply
// this transformation.
- EVT DVT = TLI->getValueType(DestTy);
- if (!TLI->isTypeLegal(DVT)) continue;
+ if (!STTI->isTypeLegal(DestTy)) continue;
}
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
@@ -2015,18 +2014,18 @@ LSRInstance::OptimizeLoopTermCond() {
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
- // Without TLI, assume that any stride might be valid, and so any
+ // Without STTI, assume that any stride might be valid, and so any
// use might be shared.
- if (!TLI)
+ if (!STTI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
AddrMode AM;
AM.Scale = C->getSExtValue();
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ if (STTI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
AM.Scale = -AM.Scale;
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ if (STTI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
}
}
@@ -2097,12 +2096,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ Kind, AccessTy, STTI))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ Kind, AccessTy, STTI))
return false;
NewMaxOffset = NewOffset;
}
@@ -2131,7 +2130,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, STTI)) {
Expr = Copy;
Offset = 0;
}
@@ -2396,7 +2395,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
- ScalarEvolution &SE, const TargetLowering *TLI) {
+ ScalarEvolution &SE, const ScalarTargetTransformInfo *STTI) {
if (StressIVChain)
return true;
@@ -2654,7 +2653,7 @@ void LSRInstance::CollectChains() {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+ ChainUsersVec[UsersIdx].FarUsers, SE, STTI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
@@ -2681,7 +2680,8 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
- Value *Operand, const TargetLowering *TLI) {
+ Value *Operand,
+ const ScalarTargetTransformInfo *STTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
@@ -2691,7 +2691,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
- LSRUse::Address, getAccessType(UserInst), TLI))
+ LSRUse::Address, getAccessType(UserInst), STTI))
return false;
return true;
@@ -2762,7 +2762,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
- TLI)) {
+ STTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
@@ -3108,7 +3108,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ LU.Kind, LU.AccessTy, STTI, SE))
continue;
// Collect all operands except *J.
@@ -3122,7 +3122,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ LU.Kind, LU.AccessTy, STTI, SE))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,9 +3132,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (TLI && InnerSumSC &&
+ if (STTI && InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
@@ -3144,8 +3144,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ if (STTI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
@@ -3205,7 +3205,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseGV = GV;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ LU.Kind, LU.AccessTy, STTI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3230,7 +3230,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
- LU.Kind, LU.AccessTy, TLI)) {
+ LU.Kind, LU.AccessTy, STTI)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
@@ -3250,7 +3250,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ LU.Kind, LU.AccessTy, STTI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3297,7 +3297,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
F.AM.BaseOffs = NewBaseOffs;
// Check that this scale is legal.
- if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+ if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, STTI))
continue;
// Compensate for the use having MinOffset built into it.
@@ -3353,12 +3353,12 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
+ LU.Kind, LU.AccessTy, STTI)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LSRUse::Special, LU.AccessTy, TLI) &&
+ LSRUse::Special, LU.AccessTy, STTI) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
@@ -3391,8 +3391,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
- // This requires TargetLowering to tell us which truncates are free.
- if (!TLI) return;
+ // This requires ScalarTargetTransformInfo to tell us which truncates are free.
+ if (!STTI) return;
// Don't bother truncating symbolic values.
if (Base.AM.BaseGV) return;
@@ -3405,7 +3405,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
- if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+ if (SrcTy != DstTy && STTI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3561,7 +3561,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
Formula NewF = F;
NewF.AM.BaseOffs = Offs;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ LU.Kind, LU.AccessTy, STTI))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
@@ -3586,9 +3586,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
- if (!TLI ||
- !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ LU.Kind, LU.AccessTy, STTI)) {
+ if (!STTI ||
+ !STTI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3900,7 +3900,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(F.AM,
LUThatHas->MinOffset, LUThatHas->MaxOffset,
- LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+ LUThatHas->Kind, LUThatHas->AccessTy, STTI)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
@@ -4589,12 +4589,12 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
LI(P->getAnalysis<LoopInfo>()),
- TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
+ STTI(stti), L(l), Changed(false), IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
@@ -4684,7 +4684,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J)
assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI) &&
+ LU.Kind, LU.AccessTy, STTI) &&
"Illegal formula generated!");
};
#endif
@@ -4757,13 +4757,13 @@ void LSRInstance::dump() const {
namespace {
class LoopStrengthReduce : public LoopPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetLowering *const TLI;
+ /// ScalarTargetTransformInfo provides target information that is needed
+ /// for strength reducing loops.
+ const ScalarTargetTransformInfo *STTI;
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+ LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4783,13 +4783,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
-
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
- return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+ return new LoopStrengthReduce();
}
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(ID), TLI(tli) {
+LoopStrengthReduce::LoopStrengthReduce()
+ : LoopPass(ID), STTI(0) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
@@ -4815,8 +4814,13 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
+ TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+
+ if (TTI)
+ STTI = TTI->getScalarTargetTransformInfo();
+
// Run the main LSR transformation.
- Changed |= LSRInstance(TLI, L, this).getChanged();
+ Changed |= LSRInstance(STTI, L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -4827,7 +4831,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.
- replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+ replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, STTI);
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 517657cf526..97fff9edd68 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -174,10 +174,11 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// this width can be stored. If so, check to see whether we will end up
// actually reducing the number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned NumPointerStores = Bytes/TD.getPointerSize();
+ unsigned AS = cast<StoreInst>(TheStores[0])->getPointerAddressSpace();
+ unsigned NumPointerStores = Bytes/TD.getPointerSize(AS);
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+ unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(AS);
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 629f9d2ff57..dfdf50549da 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -1788,9 +1788,9 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- Attributes::Builder B;
- B.addAttribute(Attributes::NoUnwind);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainRVCallee =
M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
Attributes);
@@ -1804,9 +1804,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- Attributes::Builder B;
- B.addAttribute(Attributes::NoUnwind);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
AutoreleaseRVCallee =
M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
Attributes);
@@ -1818,9 +1818,9 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
if (!ReleaseCallee) {
LLVMContext &C = M->getContext();
Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- Attributes::Builder B;
- B.addAttribute(Attributes::NoUnwind);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
ReleaseCallee =
M->getOrInsertFunction(
"objc_release",
@@ -1834,9 +1834,9 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
if (!RetainCallee) {
LLVMContext &C = M->getContext();
Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- Attributes::Builder B;
- B.addAttribute(Attributes::NoUnwind);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainCallee =
M->getOrInsertFunction(
"objc_retain",
@@ -1865,9 +1865,9 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
if (!AutoreleaseCallee) {
LLVMContext &C = M->getContext();
Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- Attributes::Builder B;
- B.addAttribute(Attributes::NoUnwind);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
AutoreleaseCallee =
M->getOrInsertFunction(
"objc_autorelease",
@@ -3840,13 +3840,10 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
Type *I8XX = PointerType::getUnqual(I8X);
Type *Params[] = { I8XX, I8X };
- Attributes::Builder BNoUnwind;
- BNoUnwind.addAttribute(Attributes::NoUnwind);
- Attributes::Builder BNoCapture;
- BNoCapture.addAttribute(Attributes::NoCapture);
AttrListPtr Attributes = AttrListPtr()
- .addAttr(~0u, Attributes::get(BNoUnwind))
- .addAttr(1, Attributes::get(BNoCapture));
+ .addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind))
+ .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture));
StoreStrongCallee =
M->getOrInsertFunction(
@@ -3863,9 +3860,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- Attributes::Builder B;
- B.addAttribute(Attributes::NoUnwind);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainAutoreleaseCallee =
M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
}
@@ -3878,9 +3875,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- Attributes::Builder B;
- B.addAttribute(Attributes::NoUnwind);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainAutoreleaseRVCallee =
M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
Attributes);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index ca762514929..3e84a91c1db 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -447,6 +447,7 @@ protected:
bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
GEPOffset = Offset;
+ unsigned int AS = GEPI.getPointerAddressSpace();
for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
GTI != GTE; ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
@@ -476,7 +477,7 @@ protected:
continue;
}
- APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
+ APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits(AS));
Index *= APInt(Index.getBitWidth(),
TD.getTypeAllocSize(GTI.getIndexedType()));
Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
@@ -1784,7 +1785,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
break;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
ElementTy = SeqTy->getElementType();
- Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0)));
+ Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(
+ ElementTy->isPointerTy() ?
+ cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0)));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; // Nothing left to descend into.
@@ -2004,6 +2007,51 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
return Ptr;
}
+/// \brief Test whether we can convert a value from the old to the new type.
+///
+/// This predicate should be used to guard calls to convertValue in order to
+/// ensure that we only try to convert viable values. The strategy is that we
+/// will peel off single element struct and array wrappings to get to an
+/// underlying value, and convert that value.
+static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
+ if (OldTy == NewTy)
+ return true;
+ if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
+ return false;
+ if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
+ return false;
+
+ if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
+ if (NewTy->isPointerTy() && OldTy->isPointerTy())
+ return true;
+ if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
+ return true;
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Generic routine to convert an SSA value to a value of a different
+/// type.
+///
+/// This will try various different casting techniques, such as bitcasts,
+/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
+/// two types for viability with this routine.
+static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+ Type *Ty) {
+ assert(canConvertValue(DL, V->getType(), Ty) &&
+ "Value not convertable to type");
+ if (V->getType() == Ty)
+ return V;
+ if (V->getType()->isIntegerTy() && Ty->isPointerTy())
+ return IRB.CreateIntToPtr(V, Ty);
+ if (V->getType()->isPointerTy() && Ty->isIntegerTy())
+ return IRB.CreatePtrToInt(V, Ty);
+
+ return IRB.CreateBitCast(V, Ty);
+}
+
/// \brief Test whether the given alloca partition can be promoted to a vector.
///
/// This is a quick test to check whether we can rewrite a particular alloca
@@ -2075,47 +2123,74 @@ static bool isVectorPromotionViable(const DataLayout &TD,
return true;
}
-/// \brief Test whether the given alloca partition can be promoted to an int.
+/// \brief Test whether the given alloca partition's integer operations can be
+/// widened to promotable ones.
///
-/// This is a quick test to check whether we can rewrite a particular alloca
-/// partition (and its newly formed alloca) into an integer alloca suitable for
-/// promotion to an SSA value. We only can ensure this for a limited set of
-/// operations, and we don't want to do the rewrites unless we are confident
-/// that the result will be promotable, so we have an early test here.
-static bool isIntegerPromotionViable(const DataLayout &TD,
- Type *AllocaTy,
- uint64_t AllocBeginOffset,
- AllocaPartitioning &P,
- AllocaPartitioning::const_use_iterator I,
- AllocaPartitioning::const_use_iterator E) {
- IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
- if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())
+/// This is a quick test to check whether we can rewrite the integer loads and
+/// stores to a particular alloca into wider loads and stores and be able to
+/// promote the resulting alloca.
+static bool isIntegerWideningViable(const DataLayout &TD,
+ Type *AllocaTy,
+ uint64_t AllocBeginOffset,
+ AllocaPartitioning &P,
+ AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+
+ // Don't try to handle allocas with bit-padding.
+ if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
return false;
+ uint64_t Size = TD.getTypeStoreSize(AllocaTy);
+
// Check the uses to ensure the uses are (likely) promoteable integer uses.
// Also ensure that the alloca has a covering load or store. We don't want
- // promote because of some other unsplittable entry (which we may make
- // splittable later) and lose the ability to promote each element access.
+ // to widen the integer operotains only to fail to promote due to some other
+ // unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
if (!I->U)
continue; // Skip dead use.
+ uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
+ uint64_t RelEnd = I->EndOffset - AllocBeginOffset;
+
// We can't reasonably handle cases where the load or store extends past
// the end of the aloca's type and into its padding.
- if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))
+ if (RelEnd > Size)
return false;
if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
- if (LI->isVolatile() || !LI->getType()->isIntegerTy())
+ if (LI->isVolatile())
return false;
- if (LI->getType() == Ty)
+ if (RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+ if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+ return false;
+ continue;
+ }
+ // Non-integer loads need to be convertible from the alloca type so that
+ // they are promotable.
+ if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(TD, AllocaTy, LI->getType()))
+ return false;
} else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
- if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
+ Type *ValueTy = SI->getValueOperand()->getType();
+ if (SI->isVolatile())
return false;
- if (SI->getValueOperand()->getType() == Ty)
+ if (RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
+ if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+ return false;
+ continue;
+ }
+ // Non-integer stores need to be convertible to the alloca type so that
+ // they are promotable.
+ if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(TD, ValueTy, AllocaTy))
+ return false;
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
if (MI->isVolatile())
return false;
@@ -2125,6 +2200,10 @@ static bool isIntegerPromotionViable(const DataLayout &TD,
if (!MTO.IsSplittable)
return false;
}
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
} else {
return false;
}
@@ -2149,6 +2228,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
SROA &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
+ Type *NewAllocaTy;
// If we are rewriting an alloca partition which can be written as pure
// vector operations, we stash extra information here. When VecTy is
@@ -2164,10 +2244,10 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
uint64_t ElementSize;
// This is a convenience and flag variable that will be null unless the new
- // alloca has a promotion-targeted integer type due to passing
- // isIntegerPromotionViable above. If it is non-null does, the desired
+ // alloca's integer operations should be widened to this integer type due to
+ // passing isIntegerWideningViable above. If it is non-null, the desired
// integer type will be stored here for easy access during rewriting.
- IntegerType *IntPromotionTy;
+ IntegerType *IntTy;
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
@@ -2186,7 +2266,8 @@ public:
OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewBeginOffset),
NewAllocaEndOffset(NewEndOffset),
- VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
+ NewAllocaTy(NewAI.getAllocatedType()),
+ VecTy(), ElementTy(), ElementSize(), IntTy(),
BeginOffset(), EndOffset() {
}
@@ -2202,9 +2283,10 @@ public:
assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
"Only multiple-of-8 sized vector elements are viable");
ElementSize = VecTy->getScalarSizeInBits() / 8;
- } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
- NewAllocaBeginOffset, P, I, E)) {
- IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
+ } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
+ NewAllocaBeginOffset, P, I, E)) {
+ IntTy = Type::getIntNTy(NewAI.getContext(),
+ TD.getTypeSizeInBits(NewAI.getAllocatedType()));
}
bool CanSROA = true;
for (; I != E; ++I) {
@@ -2223,6 +2305,10 @@ public:
ElementTy = 0;
ElementSize = 0;
}
+ if (IntTy) {
+ assert(CanSROA);
+ IntTy = 0;
+ }
return CanSROA;
}
@@ -2239,7 +2325,8 @@ private:
Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
assert(BeginOffset >= NewAllocaBeginOffset);
- APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
+ unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace();
+ APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset);
return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
}
@@ -2286,55 +2373,56 @@ private:
Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
uint64_t Offset) {
- assert(IntPromotionTy && "Alloca is not an integer we can extract from");
+ assert(IntTy && "We cannot extract an integer from the alloca");
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
+ V = convertValue(TD, IRB, V, IntTy);
assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
assert(TD.getTypeStoreSize(TargetTy) + RelOffset <=
- TD.getTypeStoreSize(IntPromotionTy) &&
+ TD.getTypeStoreSize(IntTy) &&
"Element load outside of alloca store");
uint64_t ShAmt = 8*RelOffset;
if (TD.isBigEndian())
- ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) -
+ ShAmt = 8*(TD.getTypeStoreSize(IntTy) -
TD.getTypeStoreSize(TargetTy) - RelOffset);
if (ShAmt)
V = IRB.CreateLShr(V, ShAmt, getName(".shift"));
- if (TargetTy != IntPromotionTy) {
- assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
- "Cannot extract to a larger integer!");
+ assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot extract to a larger integer!");
+ if (TargetTy != IntTy)
V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
- }
return V;
}
StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
IntegerType *Ty = cast<IntegerType>(V->getType());
- if (Ty == IntPromotionTy)
- return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
-
- assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot insert a larger integer!");
- V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
+ if (Ty != IntTy)
+ V = IRB.CreateZExt(V, IntTy, getName(".ext"));
assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
assert(TD.getTypeStoreSize(Ty) + RelOffset <=
- TD.getTypeStoreSize(IntPromotionTy) &&
+ TD.getTypeStoreSize(IntTy) &&
"Element store outside of alloca store");
uint64_t ShAmt = 8*RelOffset;
if (TD.isBigEndian())
- ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) - TD.getTypeStoreSize(Ty)
+ ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty)
- RelOffset);
if (ShAmt)
V = IRB.CreateShl(V, ShAmt, getName(".shift"));
- APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()).shl(ShAmt);
- Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
- NewAI.getAlignment(),
- getName(".oldload")),
- Mask, getName(".mask"));
- return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
- &NewAI, NewAI.getAlignment());
+ if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
+ APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ Old = IRB.CreateAnd(Old, Mask, getName(".mask"));
+ V = IRB.CreateOr(Old, V, getName(".insert"));
+ }
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
}
void deleteIfTriviallyDead(Value *V) {
@@ -2343,15 +2431,6 @@ private:
Pass.DeadInsts.push_back(I);
}
- Value *getValueCast(IRBuilder<> &IRB, Value *V, Type *Ty) {
- if (V->getType()->isIntegerTy() && Ty->isPointerTy())
- return IRB.CreateIntToPtr(V, Ty);
- if (V->getType()->isPointerTy() && Ty->isIntegerTy())
- return IRB.CreatePtrToInt(V, Ty);
-
- return IRB.CreateBitCast(V, Ty);
- }
-
bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
Value *Result;
if (LI.getType() == VecTy->getElementType() ||
@@ -2364,7 +2443,7 @@ private:
getName(".load"));
}
if (Result->getType() != LI.getType())
- Result = getValueCast(IRB, Result, LI.getType());
+ Result = convertValue(TD, IRB, Result, LI.getType());
LI.replaceAllUsesWith(Result);
Pass.DeadInsts.push_back(&LI);
@@ -2390,9 +2469,23 @@ private:
if (VecTy)
return rewriteVectorizedLoadInst(IRB, LI, OldOp);
- if (IntPromotionTy)
+ if (IntTy && LI.getType()->isIntegerTy())
return rewriteIntegerLoad(IRB, LI);
+ if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, NewAllocaTy, LI.getType())) {
+ Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LI.isVolatile(), getName(".load"));
+ Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
+ LI.replaceAllUsesWith(NewV);
+ Pass.DeadInsts.push_back(&LI);
+
+ DEBUG(dbgs() << " to: " << *NewLI << "\n");
+ return !LI.isVolatile();
+ }
+
+ assert(!IntTy && "Invalid load found with int-op widening enabled");
+
Value *NewPtr = getAdjustedAllocaPtr(IRB,
LI.getPointerOperand()->getType());
LI.setOperand(0, NewPtr);
@@ -2409,13 +2502,13 @@ private:
if (V->getType() == ElementTy ||
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
if (V->getType() != ElementTy)
- V = getValueCast(IRB, V, ElementTy);
+ V = convertValue(TD, IRB, V, ElementTy);
LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
getName(".insert"));
} else if (V->getType() != VecTy) {
- V = getValueCast(IRB, V, VecTy);
+ V = convertValue(TD, IRB, V, VecTy);
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.push_back(&SI);
@@ -2442,16 +2535,31 @@ private:
if (VecTy)
return rewriteVectorizedStoreInst(IRB, SI, OldOp);
- if (IntPromotionTy)
+ Type *ValueTy = SI.getValueOperand()->getType();
+ if (IntTy && ValueTy->isIntegerTy())
return rewriteIntegerStore(IRB, SI);
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after promoting this alloca.
- if (SI.getValueOperand()->getType()->isPointerTy())
+ if (ValueTy->isPointerTy())
if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand()
->stripInBoundsOffsets()))
Pass.PostPromotionWorklist.insert(AI);
+ if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, ValueTy, NewAllocaTy)) {
+ Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
+ StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
+ SI.isVolatile());
+ (void)NewSI;
+ Pass.DeadInsts.push_back(&SI);
+
+ DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ return !SI.isVolatile();
+ }
+
+ assert(!IntTy && "Invalid store found with int-op widening enabled");
+
Value *NewPtr = getAdjustedAllocaPtr(IRB,
SI.getPointerOperand()->getType());
SI.setOperand(1, NewPtr);
@@ -2487,10 +2595,11 @@ private:
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memset.
- if (!VecTy && (BeginOffset != NewAllocaBeginOffset ||
- EndOffset != NewAllocaEndOffset ||
- !AllocaTy->isSingleValueType() ||
- !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+ if (!VecTy && !IntTy &&
+ (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaEndOffset ||
+ !AllocaTy->isSingleValueType() ||
+ !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
CallInst *New
@@ -2508,32 +2617,24 @@ private:
// a sensible representation for the alloca type. This is essentially
// splatting the byte to a sufficiently wide integer, bitcasting to the
// desired scalar type, and splatting it across any desired vector type.
+ uint64_t Size = EndOffset - BeginOffset;
Value *V = II.getValue();
IntegerType *VTy = cast<IntegerType>(V->getType());
- Type *IntTy = Type::getIntNTy(VTy->getContext(),
- TD.getTypeSizeInBits(ScalarTy));
- if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth())
- V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")),
+ Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+ if (Size*8 > VTy->getBitWidth())
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
ConstantExpr::getUDiv(
- Constant::getAllOnesValue(IntTy),
+ Constant::getAllOnesValue(SplatIntTy),
ConstantExpr::getZExt(
Constant::getAllOnesValue(V->getType()),
- IntTy)),
+ SplatIntTy)),
getName(".isplat"));
- if (V->getType() != ScalarTy) {
- if (ScalarTy->isPointerTy())
- V = IRB.CreateIntToPtr(V, ScalarTy);
- else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy())
- V = IRB.CreateBitCast(V, ScalarTy);
- else if (ScalarTy->isIntegerTy())
- llvm_unreachable("Computed different integer types with equal widths");
- else
- llvm_unreachable("Invalid scalar type");
- }
// If this is an element-wide memset of a vectorizable alloca, insert it.
if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)) {
+ if (V->getType() != ScalarTy)
+ V = convertValue(TD, IRB, V, ScalarTy);
StoreInst *Store = IRB.CreateAlignedStore(
IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
NewAI.getAlignment(),
@@ -2546,18 +2647,20 @@ private:
return true;
}
- // Splat to a vector if needed.
- if (VectorType *VecTy = dyn_cast<VectorType>(AllocaTy)) {
- VectorType *SplatSourceTy = VectorType::get(V->getType(), 1);
- V = IRB.CreateShuffleVector(
- IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V,
- IRB.getInt32(0), getName(".vsplat.insert")),
- UndefValue::get(SplatSourceTy),
- ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)),
- getName(".vsplat.shuffle"));
- assert(V->getType() == VecTy);
+ // If this is a memset on an alloca where we can widen stores, insert the
+ // set integer.
+ if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
+ EndOffset < NewAllocaEndOffset)) {
+ assert(!II.isVolatile());
+ StoreInst *Store = insertInteger(IRB, V, BeginOffset);
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
}
+ if (V->getType() != AllocaTy)
+ V = convertValue(TD, IRB, V, AllocaTy);
+
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
II.isVolatile());
(void)New;
@@ -2578,8 +2681,10 @@ private:
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(II);
+ assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!");
+ unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace();
// Compute the relative offset within the transfer.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
: MTO.SourceBegin));
@@ -2618,9 +2723,9 @@ private:
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memcpy.
bool EmitMemCpy
- = !VecTy && (BeginOffset != NewAllocaBeginOffset ||
- EndOffset != NewAllocaEndOffset ||
- !NewAI.getAllocatedType()->isSingleValueType());
+ = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaEndOffset ||
+ !NewAI.getAllocatedType()->isSingleValueType());
// If we're just going to emit a memcpy, the alloca hasn't changed, and the
// size hasn't been shrunk based on analysis of the viable range, this is
@@ -2642,14 +2747,23 @@ private:
if (Pass.DeadSplitInsts.insert(&II))
Pass.DeadInsts.push_back(&II);
- bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset);
+ bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset;
+ bool IsVectorElement = VecTy && !IsWholeAlloca;
+ uint64_t Size = EndOffset - BeginOffset;
+ IntegerType *SubIntTy
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
: II.getRawDest()->getType();
- if (!EmitMemCpy)
- OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo()
- : NewAI.getType();
+ if (!EmitMemCpy) {
+ if (IsVectorElement)
+ OtherPtrTy = VecTy->getElementType()->getPointerTo();
+ else if (IntTy && !IsWholeAlloca)
+ OtherPtrTy = SubIntTy->getPointerTo();
+ else
+ OtherPtrTy = NewAI.getType();
+ }
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
@@ -2696,11 +2810,20 @@ private:
IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
getIndex(IRB, BeginOffset),
getName(".copyextract"));
+ } else if (IntTy && !IsWholeAlloca && !IsDest) {
+ Src = extractInteger(IRB, SubIntTy, BeginOffset);
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
getName(".copyload"));
}
+ if (IntTy && !IsWholeAlloca && IsDest) {
+ StoreInst *Store = insertInteger(IRB, Src, BeginOffset);
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
+ }
+
if (IsVectorElement && IsDest) {
// We have to insert into a loaded copy before storing.
Src = IRB.CreateInsertElement(
@@ -2993,6 +3116,36 @@ private:
};
}
+/// \brief Strip aggregate type wrapping.
+///
+/// This removes no-op aggregate types wrapping an underlying type. It will
+/// strip as many layers of types as it can without changing either the type
+/// size or the allocated size.
+static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
+ if (Ty->isSingleValueType())
+ return Ty;
+
+ uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+ uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
+
+ Type *InnerTy;
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+ InnerTy = ArrTy->getElementType();
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Index = SL->getElementContainingOffset(0);
+ InnerTy = STy->getElementType(Index);
+ } else {
+ return Ty;
+ }
+
+ if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
+ TypeSize > DL.getTypeSizeInBits(InnerTy))
+ return Ty;
+
+ return stripAggregateTypeWrapping(DL, InnerTy);
+}
+
/// \brief Try to find a partition of the aggregate type passed in for a given
/// offset and size.
///
@@ -3009,7 +3162,7 @@ private:
static Type *getTypePartition(const DataLayout &TD, Type *Ty,
uint64_t Offset, uint64_t Size) {
if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
- return Ty;
+ return stripAggregateTypeWrapping(TD, Ty);
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
// We can't partition pointers...
@@ -3038,7 +3191,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
assert(Offset == 0);
if (Size == ElementSize)
- return ElementTy;
+ return stripAggregateTypeWrapping(TD, ElementTy);
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
@@ -3074,7 +3227,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
assert(Offset == 0);
if (Size == ElementSize)
- return ElementTy;
+ return stripAggregateTypeWrapping(TD, ElementTy);
StructType::element_iterator EI = STy->element_begin() + Index,
EE = STy->element_end();
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 73f53b7cecc..d86c4cbc9f6 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -133,295 +133,7 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
// String and Memory LibCall Optimizations
//===----------------------------------------------------------------------===//
-//===---------------------------------------===//
-// 'strcat' Optimizations
namespace {
-struct StrCatOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strcat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType())
- return 0;
-
- // Extract some information from the instruction
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
- --Len; // Unbias length.
-
- // Handle the simple, do-nothing case: strcat(x, "") -> x
- if (Len == 0)
- return Dst;
-
- // These optimizations require DataLayout.
- if (!TD) return 0;
-
- return EmitStrLenMemCpy(Src, Dst, Len, B);
- }
-
- Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
- // We need to find the end of the destination string. That's where the
- // memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
- if (!DstLen)
- return 0;
-
- // Now that we have the destination's length, we must index into the
- // destination's pointer to get the actual memcpy destination (end of
- // the string .. we're concatenating).
- Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
-
- // We have enough information to now generate the memcpy call to do the
- // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
- return Dst;
- }
-};
-
-//===---------------------------------------===//
-// 'strncat' Optimizations
-
-struct StrNCatOpt : public StrCatOpt {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strncat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType() ||
- !FT->getParamType(2)->isIntegerTy())
- return 0;
-
- // Extract some information from the instruction
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
- uint64_t Len;
-
- // We don't do anything if length is not constant
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Len = LengthArg->getZExtValue();
- else
- return 0;
-
- // See if we can get the length of the input string.
- uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return 0;
- --SrcLen; // Unbias length.
-
- // Handle the simple, do-nothing cases:
- // strncat(x, "", c) -> x
- // strncat(x, c, 0) -> x
- if (SrcLen == 0 || Len == 0) return Dst;
-
- // These optimizations require DataLayout.
- if (!TD) return 0;
-
- // We don't optimize this case
- if (Len < SrcLen) return 0;
-
- // strncat(x, s, c) -> strcat(x, s)
- // s is constant so the strcat can be optimized further
- return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
- }
-};
-
-//===---------------------------------------===//
-// 'strchr' Optimizations
-
-struct StrChrOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strchr" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return 0;
-
- Value *SrcStr = CI->getArgOperand(0);
-
- // If the second operand is non-constant, see if we can compute the length
- // of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (CharC == 0) {
- // These optimizations require DataLayout.
- if (!TD) return 0;
-
- uint64_t Len = GetStringLength(SrcStr);
- if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
- return 0;
-
- return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD, TLI);
- }
-
- // Otherwise, the character is a constant, see if the first argument is
- // a string literal. If so, we can constant fold.
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str))
- return 0;
-
- // Compute the offset, make sure to handle the case when we're searching for
- // zero (a weird way to spell strlen).
- size_t I = CharC->getSExtValue() == 0 ?
- Str.size() : Str.find(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. strchr returns null.
- return Constant::getNullValue(CI->getType());
-
- // strchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
- }
-};
-
-//===---------------------------------------===//
-// 'strrchr' Optimizations
-
-struct StrRChrOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strrchr" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return 0;
-
- Value *SrcStr = CI->getArgOperand(0);
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-
- // Cannot fold anything if we're not looking for a constant.
- if (!CharC)
- return 0;
-
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str)) {
- // strrchr(s, 0) -> strchr(s, 0)
- if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD, TLI);
- return 0;
- }
-
- // Compute the offset.
- size_t I = CharC->getSExtValue() == 0 ?
- Str.size() : Str.rfind(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. Return null.
- return Constant::getNullValue(CI->getType());
-
- // strrchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
- }
-};
-
-//===---------------------------------------===//
-// 'strcmp' Optimizations
-
-struct StrCmpOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strcmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
-
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strcmp(x,x) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- // strcmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(), Str1.compare(Str2));
-
- if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
- return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
- CI->getType()));
-
- if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
- // strcmp(P, "x") -> memcmp(P, "x", 2)
- uint64_t Len1 = GetStringLength(Str1P);
- uint64_t Len2 = GetStringLength(Str2P);
- if (Len1 && Len2) {
- // These optimizations require DataLayout.
- if (!TD) return 0;
-
- return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD, TLI);
- }
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'strncmp' Optimizations
-
-struct StrNCmpOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strncmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getParamType(2)->isIntegerTy())
- return 0;
-
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strncmp(x,x,n) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- // Get the length argument if it is constant.
- uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Length = LengthArg->getZExtValue();
- else
- return 0;
-
- if (Length == 0) // strncmp(x,y,0) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- // strncmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2) {
- StringRef SubStr1 = Str1.substr(0, Length);
- StringRef SubStr2 = Str2.substr(0, Length);
- return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
- }
-
- if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
- return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
- CI->getType()));
-
- if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
- return 0;
- }
-};
-
-
//===---------------------------------------===//
// 'strcpy' Optimizations
@@ -636,9 +348,8 @@ struct StrToOpt : public LibCallOptimization {
if (isa<ConstantPointerNull>(EndPtr)) {
// With a null EndPtr, this function won't capture the main argument.
// It would be readonly too, except that it still may write to errno.
- Attributes::Builder B;
- B.addAttribute(Attributes::NoCapture);
- CI->addAttribute(1, Attributes::get(B));
+ CI->addAttribute(1, Attributes::get(Callee->getContext(),
+ Attributes::NoCapture));
}
return 0;
@@ -1564,8 +1275,6 @@ namespace {
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
- StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
- StrCmpOpt StrCmp; StrNCmpOpt StrNCmp;
StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
StrNCpyOpt StrNCpy;
@@ -1639,12 +1348,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
/// we know.
void SimplifyLibCalls::InitOptimizations() {
// String and Memory LibCall Optimizations
- Optimizations["strcat"] = &StrCat;
- Optimizations["strncat"] = &StrNCat;
- Optimizations["strchr"] = &StrChr;
- Optimizations["strrchr"] = &StrRChr;
- Optimizations["strcmp"] = &StrCmp;
- Optimizations["strncmp"] = &StrNCmp;
Optimizations["strcpy"] = &StrCpy;
Optimizations["strncpy"] = &StrNCpy;
Optimizations["stpcpy"] = &StpCpy;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 26240d4dfe4..fa2faa2dad8 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -41,9 +41,10 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
- AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI),
@@ -67,9 +68,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
- AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
@@ -95,7 +97,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
AttributeWithIndex AWI =
- AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+ AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
@@ -117,10 +120,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
- AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI),
@@ -147,8 +151,9 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
I8Ptr, I8Ptr, I8Ptr, NULL);
@@ -169,8 +174,9 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
I8Ptr, I8Ptr, I8Ptr,
@@ -193,7 +199,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
- AWI = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+ AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
AttrListPtr::get(AWI),
@@ -221,7 +228,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
- AWI = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+ AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI),
B.getInt8PtrTy(),
@@ -246,10 +254,11 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
- AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI),
@@ -325,8 +334,9 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI),
B.getInt32Ty(),
@@ -347,8 +357,9 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
Constant *F;
if (File->getType()->isPointerTy())
F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI),
@@ -378,9 +389,10 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
StringRef FPutsName = TLI->getName(LibFunc::fputs);
Constant *F;
if (File->getType()->isPointerTy())
@@ -409,9 +421,10 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
- AWI[1] = AttributeWithIndex::get(4, Attributes::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index c3f72b13afc..620209bccbc 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMTransformUtils
SimplifyCFG.cpp
SimplifyIndVar.cpp
SimplifyInstructions.cpp
+ SimplifyLibCalls.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index e2932501f31..7ba9f6d9d25 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -98,10 +98,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
Anew->addAttr( OldFunc->getAttributes()
.getParamAttributes(I->getArgNo() + 1));
NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttr(0, OldFunc->getAttributes()
+ .addAttr(NewFunc->getContext(),
+ AttrListPtr::ReturnIndex,
+ OldFunc->getAttributes()
.getRetAttributes()));
NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttr(~0, OldFunc->getAttributes()
+ .addAttr(NewFunc->getContext(),
+ AttrListPtr::FunctionIndex,
+ OldFunc->getAttributes()
.getFnAttributes()));
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a954d82c05b..9729687a83e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -806,7 +806,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
const DataLayout *TD) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(V, KnownZero, KnownOne, TD);
unsigned TrailZ = KnownZero.countTrailingOnes();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 930555424de..f35cbbdde5e 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -45,10 +45,10 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
#include <csetjmp>
#include <set>
using namespace llvm;
@@ -70,15 +70,14 @@ namespace {
Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
bool useExpensiveEHSupport;
- // We peek in TLI to grab the target's jmp_buf size and alignment
- const TargetLowering *TLI;
+ // We peek in STTI to grab the target's jmp_buf size and alignment
+ const ScalarTargetTransformInfo *STTI;
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerInvoke(const TargetLowering *tli = NULL,
- bool useExpensiveEHSupport = ExpensiveEHSupport)
+ explicit LowerInvoke(bool useExpensiveEHSupport = ExpensiveEHSupport)
: FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
- TLI(tli) {
+ STTI(0) {
initializeLowerInvokePass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M);
@@ -108,21 +107,24 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
- return new LowerInvoke(TLI, ExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass() {
+ return new LowerInvoke(ExpensiveEHSupport);
}
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
- bool useExpensiveEHSupport) {
- return new LowerInvoke(TLI, useExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass(bool useExpensiveEHSupport) {
+ return new LowerInvoke(useExpensiveEHSupport);
}
// doInitialization - Make sure that there is a prototype for abort in the
// current module.
bool LowerInvoke::doInitialization(Module &M) {
+ TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ if (TTI)
+ STTI = TTI->getScalarTargetTransformInfo();
+
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
if (useExpensiveEHSupport) {
// Insert a type for the linked list of jump buffers.
- unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+ unsigned JBSize = STTI ? STTI->getJumpBufSize() : 0;
JBSize = JBSize ? JBSize : 200;
Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
@@ -430,7 +432,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an
// alloca because the value needs to be live across invokes.
- unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+ unsigned Align = STTI ? STTI->getJumpBufAlignment() : 0;
AllocaInst *JmpBuf =
new AllocaInst(JBLinkTy, 0, Align,
"jblink", F.begin()->begin());
@@ -575,6 +577,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
}
bool LowerInvoke::runOnFunction(Function &F) {
+ TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ if (TTI)
+ STTI = TTI->getScalarTargetTransformInfo();
+
if (useExpensiveEHSupport)
return insertExpensiveEHSupport(F);
else
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index af8d1128523..a008da67e92 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -76,6 +76,8 @@ namespace {
// Comparing pointers is ok as we only rely on the order for uniquing.
return Value < RHS.Value;
}
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
};
class SimplifyCFGOpt {
@@ -564,11 +566,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
/// in the list that match the specified block.
static void EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- for (unsigned i = 0, e = Cases.size(); i != e; ++i)
- if (Cases[i].Dest == BB) {
- Cases.erase(Cases.begin()+i);
- --i; --e;
- }
+ Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
}
/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
@@ -695,7 +693,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
SI->removeCase(i);
}
}
- if (HasWeight)
+ if (HasWeight && Weights.size() >= 2)
SI->setMetadata(LLVMContext::MD_prof,
MDBuilder(SI->getParent()->getContext()).
createBranchWeights(Weights));
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 00000000000..bd28ec35273
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,579 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/DataLayout.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+
+using namespace llvm;
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+ Function *Caller;
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ LLVMContext* Context;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// callOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = TD;
+ this->TLI = TLI;
+ if (CI->getCalledFunction())
+ Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
+ return callOptimizer(CI->getCalledFunction(), CI, B);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FortifiedLibCallOptimization : public LibCallOptimization {
+protected:
+ virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+ bool isString) const = 0;
+};
+
+struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
+ CallInst *CI;
+
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+ if (SizeCI->isAllOnesValue())
+ return true;
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0) return false;
+ return SizeCI->getZExtValue() >= Len;
+ }
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
+ }
+ return false;
+ }
+};
+
+struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+ TLI, Name.substr(2, 6));
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct StrCatOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return emitStrLenMemCpy(Src, Dst, Len, B);
+ }
+
+ Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+ IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
+ }
+};
+
+struct StrNCatOpt : public StrCatOpt {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0) return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // We don't optimize this case
+ if (Len < SrcLen) return 0;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+ }
+};
+
+struct StrChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (CharC == 0) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ B, TD, TLI);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ }
+};
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+ return 0;
+ }
+
+ // Compute the offset.
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.rfind(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+ }
+};
+
+struct StrCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, TD, TLI);
+ }
+
+ return 0;
+ }
+};
+
+struct StrNCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = Str1.substr(0, Length);
+ StringRef SubStr2 = Str2.substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ return 0;
+ }
+};
+
+} // End anonymous namespace.
+
+namespace llvm {
+
+class LibCallSimplifierImpl {
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ StringMap<LibCallOptimization*> Optimizations;
+
+ // Fortified library call optimizations.
+ MemCpyChkOpt MemCpyChk;
+ MemMoveChkOpt MemMoveChk;
+ MemSetChkOpt MemSetChk;
+ StrCpyChkOpt StrCpyChk;
+ StrNCpyChkOpt StrNCpyChk;
+
+ // String and memory library call optimizations.
+ StrCatOpt StrCat;
+ StrNCatOpt StrNCat;
+ StrChrOpt StrChr;
+ StrRChrOpt StrRChr;
+ StrCmpOpt StrCmp;
+ StrNCmpOpt StrNCmp;
+
+ void initOptimizations();
+public:
+ LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ this->TD = TD;
+ this->TLI = TLI;
+ }
+
+ Value *optimizeCall(CallInst *CI);
+};
+
+void LibCallSimplifierImpl::initOptimizations() {
+ // Fortified library call optimizations.
+ Optimizations["__memcpy_chk"] = &MemCpyChk;
+ Optimizations["__memmove_chk"] = &MemMoveChk;
+ Optimizations["__memset_chk"] = &MemSetChk;
+ Optimizations["__strcpy_chk"] = &StrCpyChk;
+ Optimizations["__stpcpy_chk"] = &StrCpyChk;
+ Optimizations["__strncpy_chk"] = &StrNCpyChk;
+ Optimizations["__stpncpy_chk"] = &StrNCpyChk;
+ Optimizations["strcmp"] = &StrCmp;
+ Optimizations["strncmp"] = &StrNCmp;
+
+ // String and memory library call optimizations.
+ Optimizations["strcat"] = &StrCat;
+ Optimizations["strncat"] = &StrNCat;
+ Optimizations["strchr"] = &StrChr;
+ Optimizations["strrchr"] = &StrRChr;
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+ if (Optimizations.empty())
+ initOptimizations();
+
+ Function *Callee = CI->getCalledFunction();
+ LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+ if (LCO) {
+ IRBuilder<> Builder(CI);
+ return LCO->optimizeCall(CI, TD, TLI, Builder);
+ }
+ return 0;
+}
+
+LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ Impl = new LibCallSimplifierImpl(TD, TLI);
+}
+
+LibCallSimplifier::~LibCallSimplifier() {
+ delete Impl;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ return Impl->optimizeCall(CI);
+}
+
+}
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 0625ef3f09c..4f55fb203bd 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -7,11 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the AttributesList class and Attribute utilities.
+// This file implements the Attributes, AttributeImpl, AttrBuilder,
+// AttributeListImpl, and AttrListPtr classes.
//
//===----------------------------------------------------------------------===//
#include "llvm/Attributes.h"
+#include "AttributesImpl.h"
#include "LLVMContextImpl.h"
#include "llvm/Type.h"
#include "llvm/ADT/StringExtras.h"
@@ -27,27 +29,23 @@ using namespace llvm;
// Attributes Implementation
//===----------------------------------------------------------------------===//
-Attributes::Attributes(uint64_t Val) : Attrs(Val) {}
-
-Attributes::Attributes(AttributesImpl *A) : Attrs(A->Bits) {}
-
-Attributes::Attributes(const Attributes &A) : Attrs(A.Attrs) {}
-
-// FIXME: This is temporary until we have implemented the uniquified version of
-// AttributesImpl.
-Attributes Attributes::get(Attributes::Builder &B) {
- return Attributes(B.Bits);
+Attributes Attributes::get(LLVMContext &Context, ArrayRef<AttrVal> Vals) {
+ AttrBuilder B;
+ for (ArrayRef<AttrVal>::iterator I = Vals.begin(), E = Vals.end();
+ I != E; ++I)
+ B.addAttribute(*I);
+ return Attributes::get(Context, B);
}
-Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
+Attributes Attributes::get(LLVMContext &Context, AttrBuilder &B) {
// If there are no attributes, return an empty Attributes class.
- if (B.Bits == 0)
+ if (!B.hasAttributes())
return Attributes();
// Otherwise, build a key to look up the existing attributes.
LLVMContextImpl *pImpl = Context.pImpl;
FoldingSetNodeID ID;
- ID.AddInteger(B.Bits);
+ ID.AddInteger(B.Raw());
void *InsertPoint;
AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
@@ -55,7 +53,7 @@ Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
if (!PA) {
// If we didn't find any existing attributes of the same shape then create a
// new one and insert it.
- PA = new AttributesImpl(B.Bits);
+ PA = new AttributesImpl(B.Raw());
pImpl->AttrsSet.InsertNode(PA, InsertPoint);
}
@@ -64,18 +62,22 @@ Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
}
bool Attributes::hasAttribute(AttrVal Val) const {
- return Attrs.hasAttribute(Val);
+ return Attrs && Attrs->hasAttribute(Val);
+}
+
+bool Attributes::hasAttributes() const {
+ return Attrs && Attrs->hasAttributes();
}
bool Attributes::hasAttributes(const Attributes &A) const {
- return Attrs.hasAttributes(A);
+ return Attrs && Attrs->hasAttributes(A);
}
/// This returns the alignment field of an attribute as a byte alignment value.
unsigned Attributes::getAlignment() const {
if (!hasAttribute(Attributes::Alignment))
return 0;
- return 1U << ((Attrs.getAlignment() >> 16) - 1);
+ return 1U << ((Attrs->getAlignment() >> 16) - 1);
}
/// This returns the stack alignment field of an attribute as a byte alignment
@@ -83,46 +85,21 @@ unsigned Attributes::getAlignment() const {
unsigned Attributes::getStackAlignment() const {
if (!hasAttribute(Attributes::StackAlignment))
return 0;
- return 1U << ((Attrs.getStackAlignment() >> 26) - 1);
-}
-
-bool Attributes::isEmptyOrSingleton() const {
- return Attrs.isEmptyOrSingleton();
-}
-
-Attributes Attributes::operator | (const Attributes &A) const {
- return Attributes(Raw() | A.Raw());
-}
-Attributes Attributes::operator & (const Attributes &A) const {
- return Attributes(Raw() & A.Raw());
-}
-Attributes Attributes::operator ^ (const Attributes &A) const {
- return Attributes(Raw() ^ A.Raw());
-}
-Attributes &Attributes::operator |= (const Attributes &A) {
- Attrs.Bits |= A.Raw();
- return *this;
-}
-Attributes &Attributes::operator &= (const Attributes &A) {
- Attrs.Bits &= A.Raw();
- return *this;
-}
-Attributes Attributes::operator ~ () const {
- return Attributes(~Raw());
+ return 1U << ((Attrs->getStackAlignment() >> 26) - 1);
}
uint64_t Attributes::Raw() const {
- return Attrs.Bits;
+ return Attrs ? Attrs->Raw() : 0;
}
Attributes Attributes::typeIncompatible(Type *Ty) {
- Attributes::Builder Incompatible;
-
+ AttrBuilder Incompatible;
+
if (!Ty->isIntegerTy())
// Attributes that only apply to integers.
Incompatible.addAttribute(Attributes::SExt)
.addAttribute(Attributes::ZExt);
-
+
if (!Ty->isPointerTy())
// Attributes that only apply to pointers.
Incompatible.addAttribute(Attributes::ByVal)
@@ -130,8 +107,46 @@ Attributes Attributes::typeIncompatible(Type *Ty) {
.addAttribute(Attributes::NoAlias)
.addAttribute(Attributes::NoCapture)
.addAttribute(Attributes::StructRet);
-
- return Attributes(Incompatible.Bits); // FIXME: Use Attributes::get().
+
+ return Attributes::get(Ty->getContext(), Incompatible);
+}
+
+/// encodeLLVMAttributesForBitcode - This returns an integer containing an
+/// encoding of all the LLVM attributes found in the given attribute bitset.
+/// Any change to this encoding is a breaking change to bitcode compatibility.
+uint64_t Attributes::encodeLLVMAttributesForBitcode(Attributes Attrs) {
+ // FIXME: It doesn't make sense to store the alignment information as an
+ // expanded out value, we should store it as a log2 value. However, we can't
+ // just change that here without breaking bitcode compatibility. If this ever
+ // becomes a problem in practice, we should introduce new tag numbers in the
+ // bitcode file and have those tags use a more efficiently encoded alignment
+ // field.
+
+ // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit
+ // log2 encoded value. Shift the bits above the alignment up by 11 bits.
+ uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
+ if (Attrs.hasAttribute(Attributes::Alignment))
+ EncodedAttrs |= Attrs.getAlignment() << 16;
+ EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11;
+ return EncodedAttrs;
+}
+
+/// decodeLLVMAttributesForBitcode - This returns an attribute bitset containing
+/// the LLVM attributes that have been decoded from the given integer. This
+/// function must stay in sync with 'encodeLLVMAttributesForBitcode'.
+Attributes Attributes::decodeLLVMAttributesForBitcode(LLVMContext &C,
+ uint64_t EncodedAttrs) {
+ // The alignment is stored as a 16-bit raw value from bits 31--16. We shift
+ // the bits above 31 down by 11 bits.
+ unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
+ assert((!Alignment || isPowerOf2_32(Alignment)) &&
+ "Alignment must be a power of two.");
+
+ AttrBuilder B(EncodedAttrs & 0xffff);
+ if (Alignment)
+ B.addAlignmentAttr(Alignment);
+ B.addRawValue((EncodedAttrs & (0xfffULL << 32)) >> 11);
+ return Attributes::get(C, B);
}
std::string Attributes::getAsString() const {
@@ -203,61 +218,72 @@ std::string Attributes::getAsString() const {
}
//===----------------------------------------------------------------------===//
-// Attributes::Builder Implementation
+// AttrBuilder Implementation
//===----------------------------------------------------------------------===//
-Attributes::Builder &Attributes::Builder::
-addAttribute(Attributes::AttrVal Val) {
+AttrBuilder &AttrBuilder::addAttribute(Attributes::AttrVal Val){
Bits |= AttributesImpl::getAttrMask(Val);
return *this;
}
-void Attributes::Builder::addAlignmentAttr(unsigned Align) {
- if (Align == 0) return;
+AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
+ Bits |= Val;
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) {
+ if (Align == 0) return *this;
assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
assert(Align <= 0x40000000 && "Alignment too large.");
Bits |= (Log2_32(Align) + 1) << 16;
+ return *this;
}
-void Attributes::Builder::addStackAlignmentAttr(unsigned Align) {
+AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align){
// Default alignment, allow the target to define how to align it.
- if (Align == 0) return;
+ if (Align == 0) return *this;
assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
assert(Align <= 0x100 && "Alignment too large.");
Bits |= (Log2_32(Align) + 1) << 26;
+ return *this;
}
-Attributes::Builder &Attributes::Builder::
-removeAttribute(Attributes::AttrVal Val) {
+AttrBuilder &AttrBuilder::removeAttribute(Attributes::AttrVal Val) {
Bits &= ~AttributesImpl::getAttrMask(Val);
return *this;
}
-void Attributes::Builder::removeAttributes(const Attributes &A) {
+AttrBuilder &AttrBuilder::addAttributes(const Attributes &A) {
+ Bits |= A.Raw();
+ return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttributes(const Attributes &A){
Bits &= ~A.Raw();
+ return *this;
}
-bool Attributes::Builder::hasAttribute(Attributes::AttrVal A) const {
+bool AttrBuilder::hasAttribute(Attributes::AttrVal A) const {
return Bits & AttributesImpl::getAttrMask(A);
}
-bool Attributes::Builder::hasAttributes() const {
+bool AttrBuilder::hasAttributes() const {
return Bits != 0;
}
-bool Attributes::Builder::hasAttributes(const Attributes &A) const {
+bool AttrBuilder::hasAttributes(const Attributes &A) const {
return Bits & A.Raw();
}
-bool Attributes::Builder::hasAlignmentAttr() const {
+bool AttrBuilder::hasAlignmentAttr() const {
return Bits & AttributesImpl::getAttrMask(Attributes::Alignment);
}
-uint64_t Attributes::Builder::getAlignment() const {
+uint64_t AttrBuilder::getAlignment() const {
if (!hasAlignmentAttr())
return 0;
return 1U <<
(((Bits & AttributesImpl::getAttrMask(Attributes::Alignment)) >> 16) - 1);
}
-uint64_t Attributes::Builder::getStackAlignment() const {
+uint64_t AttrBuilder::getStackAlignment() const {
if (!hasAlignmentAttr())
return 0;
return 1U <<
@@ -322,10 +348,6 @@ uint64_t AttributesImpl::getStackAlignment() const {
return Bits & getAttrMask(Attributes::StackAlignment);
}
-bool AttributesImpl::isEmptyOrSingleton() const {
- return (Bits & (Bits - 1)) == 0;
-}
-
//===----------------------------------------------------------------------===//
// AttributeListImpl Definition
//===----------------------------------------------------------------------===//
@@ -341,19 +363,19 @@ static ManagedStatic<sys::SmartMutex<true> > ALMutex;
class AttributeListImpl : public FoldingSetNode {
sys::cas_flag RefCount;
-
+
// AttributesList is uniqued, these should not be publicly available.
void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
~AttributeListImpl(); // Private implementation
public:
SmallVector<AttributeWithIndex, 4> Attrs;
-
+
AttributeListImpl(ArrayRef<AttributeWithIndex> attrs)
: Attrs(attrs.begin(), attrs.end()) {
RefCount = 0;
}
-
+
void AddRef() {
sys::SmartScopedLock<true> Lock(*ALMutex);
++RefCount;
@@ -366,7 +388,7 @@ public:
if (new_val == 0)
delete this;
}
-
+
void Profile(FoldingSetNodeID &ID) const {
Profile(ID, Attrs);
}
@@ -377,50 +399,49 @@ public:
}
}
};
-}
+
+} // end llvm namespace
AttributeListImpl::~AttributeListImpl() {
// NOTE: Lock must be acquired by caller.
AttributesLists->RemoveNode(this);
}
-
AttrListPtr AttrListPtr::get(ArrayRef<AttributeWithIndex> Attrs) {
// If there are no attributes then return a null AttributesList pointer.
if (Attrs.empty())
return AttrListPtr();
-
+
#ifndef NDEBUG
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
- assert(Attrs[i].Attrs.hasAttributes() &&
+ assert(Attrs[i].Attrs.hasAttributes() &&
"Pointless attribute!");
assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
"Misordered AttributesList!");
}
#endif
-
+
// Otherwise, build a key to look up the existing attributes.
FoldingSetNodeID ID;
AttributeListImpl::Profile(ID, Attrs);
void *InsertPos;
-
+
sys::SmartScopedLock<true> Lock(*ALMutex);
-
+
AttributeListImpl *PAL =
AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
-
+
// If we didn't find any existing attributes of the same shape then
// create a new one and insert it.
if (!PAL) {
PAL = new AttributeListImpl(Attrs);
AttributesLists->InsertNode(PAL, InsertPos);
}
-
+
// Return the AttributesList that we found or created.
return AttrListPtr(PAL);
}
-
//===----------------------------------------------------------------------===//
// AttrListPtr Method Implementations
//===----------------------------------------------------------------------===//
@@ -430,7 +451,7 @@ AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {
}
AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
- if (AttrList) AttrList->AddRef();
+ if (AttrList) AttrList->AddRef();
}
const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
@@ -446,7 +467,7 @@ AttrListPtr::~AttrListPtr() {
if (AttrList) AttrList->DropRef();
}
-/// getNumSlots - Return the number of slots used in this attribute list.
+/// getNumSlots - Return the number of slots used in this attribute list.
/// This is the number of arguments that have an attribute set on them
/// (including the function itself).
unsigned AttrListPtr::getNumSlots() const {
@@ -460,13 +481,12 @@ const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
return AttrList->Attrs[Slot];
}
-
-/// getAttributes - The attributes for the specified index are
-/// returned. Attributes for the result are denoted with Idx = 0.
-/// Function notes are denoted with idx = ~0.
+/// getAttributes - The attributes for the specified index are returned.
+/// Attributes for the result are denoted with Idx = 0. Function notes are
+/// denoted with idx = ~0.
Attributes AttrListPtr::getAttributes(unsigned Idx) const {
if (AttrList == 0) return Attributes();
-
+
const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
if (Attrs[i].Index == Idx)
@@ -497,7 +517,8 @@ Attributes &AttrListPtr::getAttributesAtIndex(unsigned i) const {
return AttrList->Attrs[i].Attrs;
}
-AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
+AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx,
+ Attributes Attrs) const {
Attributes OldAttrs = getAttributes(Idx);
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment.
@@ -507,11 +528,12 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
"Attempt to change alignment!");
#endif
-
- Attributes NewAttrs = OldAttrs | Attrs;
- if (NewAttrs == OldAttrs)
+
+ AttrBuilder NewAttrs =
+ AttrBuilder(OldAttrs).addAttributes(Attrs);
+ if (NewAttrs == AttrBuilder(OldAttrs))
return *this;
-
+
SmallVector<AttributeWithIndex, 8> NewAttrList;
if (AttrList == 0)
NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
@@ -524,21 +546,24 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
// If there are attributes already at this index, merge them in.
if (i != e && OldAttrList[i].Index == Idx) {
- Attrs |= OldAttrList[i].Attrs;
+ Attrs =
+ Attributes::get(C, AttrBuilder(Attrs).
+ addAttributes(OldAttrList[i].Attrs));
++i;
}
-
+
NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
-
+
// Copy attributes for arguments after this one.
- NewAttrList.insert(NewAttrList.end(),
+ NewAttrList.insert(NewAttrList.end(),
OldAttrList.begin()+i, OldAttrList.end());
}
-
+
return get(NewAttrList);
}
-AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
+AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
+ Attributes Attrs) const {
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment.
// For now, say we can't pass in alignment, which no current use does.
@@ -546,31 +571,33 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
"Attempt to exclude alignment!");
#endif
if (AttrList == 0) return AttrListPtr();
-
+
Attributes OldAttrs = getAttributes(Idx);
- Attributes NewAttrs = OldAttrs & ~Attrs;
- if (NewAttrs == OldAttrs)
+ AttrBuilder NewAttrs =
+ AttrBuilder(OldAttrs).removeAttributes(Attrs);
+ if (NewAttrs == AttrBuilder(OldAttrs))
return *this;
SmallVector<AttributeWithIndex, 8> NewAttrList;
const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
unsigned i = 0, e = OldAttrList.size();
-
+
// Copy attributes for arguments before this one.
for (; i != e && OldAttrList[i].Index < Idx; ++i)
NewAttrList.push_back(OldAttrList[i]);
-
+
// If there are attributes already at this index, merge them in.
assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
- Attrs = OldAttrList[i].Attrs & ~Attrs;
+ Attrs = Attributes::get(C, AttrBuilder(OldAttrList[i].Attrs).
+ removeAttributes(Attrs));
++i;
- if (Attrs) // If any attributes left for this parameter, add them.
+ if (Attrs.hasAttributes()) // If any attributes left for this param, add them.
NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
-
+
// Copy attributes for arguments after this one.
- NewAttrList.insert(NewAttrList.end(),
+ NewAttrList.insert(NewAttrList.end(),
OldAttrList.begin()+i, OldAttrList.end());
-
+
return get(NewAttrList);
}
@@ -578,8 +605,8 @@ void AttrListPtr::dump() const {
dbgs() << "PAL[ ";
for (unsigned i = 0; i < getNumSlots(); ++i) {
const AttributeWithIndex &PAWI = getSlot(i);
- dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+ dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs.getAsString() << "} ";
}
-
+
dbgs() << "]\n";
}
diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h
new file mode 100644
index 00000000000..b4a0f615f36
--- /dev/null
+++ b/lib/VMCore/AttributesImpl.h
@@ -0,0 +1,51 @@
+//===-- AttributesImpl.h - Attributes Internals -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various helper methods and classes used by LLVMContextImpl
+// for creating and managing attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ATTRIBUTESIMPL_H
+#define LLVM_ATTRIBUTESIMPL_H
+
+#include "llvm/ADT/FoldingSet.h"
+
+namespace llvm {
+
+class Attributes;
+
+class AttributesImpl : public FoldingSetNode {
+ uint64_t Bits; // FIXME: We will be expanding this.
+public:
+ AttributesImpl(uint64_t bits) : Bits(bits) {}
+
+ bool hasAttribute(uint64_t A) const;
+
+ bool hasAttributes() const;
+ bool hasAttributes(const Attributes &A) const;
+
+ uint64_t getAlignment() const;
+ uint64_t getStackAlignment() const;
+
+ uint64_t Raw() const { return Bits; } // FIXME: Remove.
+
+ static uint64_t getAttrMask(uint64_t Val);
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, Bits);
+ }
+ static void Profile(FoldingSetNodeID &ID, uint64_t Bits) {
+ ID.AddInteger(Bits);
+ }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 094ca755132..5fff460e8bc 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -148,7 +148,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
if (NewFn)
F = NewFn;
if (unsigned id = F->getIntrinsicID())
- F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
+ F->setAttributes(Intrinsic::getAttributes(F->getContext(),
+ (Intrinsic::ID)id));
return Upgraded;
}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 6c309679740..ba807fcacca 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_library(LLVMCore
PrintModulePass.cpp
Type.cpp
TypeFinder.cpp
+ TargetTransformInfo.cpp
Use.cpp
User.cpp
Value.cpp
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 90ecdaecf41..847bc134ddb 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1381,14 +1381,20 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.addAttr(~0U, Attributes(PA));
+ AttrBuilder B(PA);
+ const AttrListPtr PALnew =
+ PAL.addAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(Func->getContext(), B));
Func->setAttributes(PALnew);
}
void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.removeAttr(~0U, Attributes(PA));
+ AttrBuilder B(PA);
+ const AttrListPtr PALnew =
+ PAL.removeAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(Func->getContext(), B));
Func->setAttributes(PALnew);
}
@@ -1458,11 +1464,15 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
}
void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
- unwrap<Argument>(Arg)->addAttr(Attributes(PA));
+ Argument *A = unwrap<Argument>(Arg);
+ AttrBuilder B(PA);
+ A->addAttr(Attributes::get(A->getContext(), B));
}
void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
- unwrap<Argument>(Arg)->removeAttr(Attributes(PA));
+ Argument *A = unwrap<Argument>(Arg);
+ AttrBuilder B(PA);
+ A->removeAttr(Attributes::get(A->getContext(), B));
}
LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
@@ -1474,8 +1484,10 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
- unwrap<Argument>(Arg)->addAttr(
- Attributes::constructAlignmentFromInt(align));
+ AttrBuilder B;
+ B.addAlignmentAttr(align);
+ unwrap<Argument>(Arg)->addAttr(Attributes::
+ get(unwrap<Argument>(Arg)->getContext(), B));
}
/*--.. Operations on basic blocks ..........................................--*/
@@ -1664,23 +1676,28 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
LLVMAttribute PA) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ AttrBuilder B(PA);
Call.setAttributes(
- Call.getAttributes().addAttr(index, Attributes(PA)));
+ Call.getAttributes().addAttr(Call->getContext(), index,
+ Attributes::get(Call->getContext(), B)));
}
void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
LLVMAttribute PA) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ AttrBuilder B(PA);
Call.setAttributes(
- Call.getAttributes().removeAttr(index, Attributes(PA)));
+ Call.getAttributes().removeAttr(Call->getContext(), index,
+ Attributes::get(Call->getContext(), B)));
}
void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
unsigned align) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
- Call.setAttributes(
- Call.getAttributes().addAttr(index,
- Attributes::constructAlignmentFromInt(align)));
+ AttrBuilder B;
+ B.addAlignmentAttr(align);
+ Call.setAttributes(Call.getAttributes().addAttr(Call->getContext(), index,
+ Attributes::get(Call->getContext(), B)));
}
/*--.. Operations on call instructions (only) ..............................--*/
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 5c2a03ce091..9c4f2d93995 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -185,7 +185,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage,
// Ensure intrinsics have the right parameter attributes.
if (unsigned IID = getIntrinsicID())
- setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID)));
+ setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID)));
}
@@ -249,13 +249,13 @@ void Function::dropAllReferences() {
void Function::addAttribute(unsigned i, Attributes attr) {
AttrListPtr PAL = getAttributes();
- PAL = PAL.addAttr(i, attr);
+ PAL = PAL.addAttr(getContext(), i, attr);
setAttributes(PAL);
}
void Function::removeAttribute(unsigned i, Attributes attr) {
AttrListPtr PAL = getAttributes();
- PAL = PAL.removeAttr(i, attr);
+ PAL = PAL.removeAttr(getContext(), i, attr);
setAttributes(PAL);
}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 74c0c6e1d97..13c4a5d2574 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -332,21 +332,22 @@ CallInst::CallInst(const CallInst &CI)
void CallInst::addAttribute(unsigned i, Attributes attr) {
AttrListPtr PAL = getAttributes();
- PAL = PAL.addAttr(i, attr);
+ PAL = PAL.addAttr(getContext(), i, attr);
setAttributes(PAL);
}
void CallInst::removeAttribute(unsigned i, Attributes attr) {
AttrListPtr PAL = getAttributes();
- PAL = PAL.removeAttr(i, attr);
+ PAL = PAL.removeAttr(getContext(), i, attr);
setAttributes(PAL);
}
bool CallInst::hasFnAttr(Attributes::AttrVal A) const {
- if (AttributeList.getParamAttributes(~0U).hasAttribute(A))
+ if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex)
+ .hasAttribute(A))
return true;
if (const Function *F = getCalledFunction())
- return F->getParamAttributes(~0U).hasAttribute(A);
+ return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
return false;
}
@@ -571,10 +572,11 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
}
bool InvokeInst::hasFnAttr(Attributes::AttrVal A) const {
- if (AttributeList.getParamAttributes(~0U).hasAttribute(A))
+ if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex).
+ hasAttribute(A))
return true;
if (const Function *F = getCalledFunction())
- return F->getParamAttributes(~0U).hasAttribute(A);
+ return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
return false;
}
@@ -588,13 +590,13 @@ bool InvokeInst::paramHasAttr(unsigned i, Attributes::AttrVal A) const {
void InvokeInst::addAttribute(unsigned i, Attributes attr) {
AttrListPtr PAL = getAttributes();
- PAL = PAL.addAttr(i, attr);
+ PAL = PAL.addAttr(getContext(), i, attr);
setAttributes(PAL);
}
void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
AttrListPtr PAL = getAttributes();
- PAL = PAL.removeAttr(i, attr);
+ PAL = PAL.removeAttr(getContext(), i, attr);
setAttributes(PAL);
}
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index a86363b632a..74247bdde13 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -97,9 +97,11 @@ LLVMContextImpl::~LLVMContextImpl() {
// Destroy attributes.
for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(),
- E = AttrsSet.end(); I != E; ++I)
- delete &*I;
-
+ E = AttrsSet.end(); I != E;) {
+ FoldingSetIterator<AttributesImpl> Elem = I++;
+ delete &*Elem;
+ }
+
// Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
// and the NonUniquedMDNodes sets, so copy the values out first.
SmallVector<MDNode*, 8> MDNodes;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 524f7e54bb4..ee31814c055 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -16,9 +16,9 @@
#define LLVM_LLVMCONTEXT_IMPL_H
#include "llvm/LLVMContext.h"
+#include "AttributesImpl.h"
#include "ConstantsContext.h"
#include "LeaksContext.h"
-#include "llvm/AttributesImpl.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Metadata.h"
diff --git a/lib/VMCore/TargetTransformInfo.cpp b/lib/VMCore/TargetTransformInfo.cpp
new file mode 100644
index 00000000000..3af0222a211
--- /dev/null
+++ b/lib/VMCore/TargetTransformInfo.cpp
@@ -0,0 +1,27 @@
+//===- llvm/VMCore/TargetTransformInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TargetTransformInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) {
+ report_fatal_error("Bad TargetTransformInfo ctor used. "
+ "Tool did not specify a TargetTransformInfo to use?");
+}
+
+INITIALIZE_PASS(TargetTransformInfo, "TargetTransformInfo",
+ "Target Transform Info", false, true)
+char TargetTransformInfo::ID = 0;
+
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 53744b48691..fd629b485aa 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -567,9 +567,10 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
Attrs.hasAttribute(Attributes::AlwaysInline)), "Attributes "
"'noinline and alwaysinline' are incompatible!", V);
- Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty);
- Assert1(!TypeI, "Wrong type for attribute " +
- TypeI.getAsString(), V);
+ Assert1(!AttrBuilder(Attrs).
+ hasAttributes(Attributes::typeIncompatible(Ty)),
+ "Wrong types for attribute: " +
+ Attributes::typeIncompatible(Ty).getAsString(), V);
if (PointerType *PTy = dyn_cast<PointerType>(Ty))
Assert1(!Attrs.hasAttribute(Attributes::ByVal) ||
@@ -614,10 +615,10 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
}
Attributes FAttrs = Attrs.getFnAttributes();
- Attributes::Builder NotFn(FAttrs);
+ AttrBuilder NotFn(FAttrs);
NotFn.removeFunctionOnlyAttrs();
Assert1(!NotFn.hasAttributes(), "Attributes '" +
- Attributes::get(NotFn).getAsString() +
+ Attributes::get(V->getContext(), NotFn).getAsString() +
"' do not apply to the function!", V);
// Check for mutually incompatible attributes.